我试图使用RexExp和JS来解析圣经经文。输出并不重要,因为我的一些正则表达式给了我错误的否定(除非我只是不了解RegExp的一些东西,可能就是这样)。
考虑以下职能:
function wtf() {
var s = "1:1-8, 3:5, 4:-8-10, 25-36, 5:1-6:1-26, 32-40, 55, 7:8",
exp1 = new RegExp('(\\d+:)?([\\d-]+(?=\\d:)|[\\d-]+)','g'),
result = s.split(exp1),
exp2 = new RegExp('\\d+[ :]+','g'), //tests for \d:
exp3 = new RegExp('\\d+[-]\\B','g'), //tests for \d-
exp4 = new RegExp('\\b\\d{1,3}[ -]+\\d{1,3}\\b','g'), //tests for \d-\d
exp5 = new RegExp('\\d:.*\\b','g'), //tests for d: followed by anything
exp6 = new RegExp('^\\d{1,3}$','g'), //tests for 1,12,123,etc.
output = [];
for(i=0;i<result.length;i++) {
var t = String(result[i]);
if(result[i] == "" | result[i] == ","| result[i] == " ," | result[i] == ", " | result[i] == undefined) {}
else if(exp5.test(result[i]) == true) {}
else {output[i] = result[i];}
}
output = output.filter(function(val){return val});
console.log(JSON.stringify(output));
for(i=0;i<output.length;i++) {
if(exp2.test(output[i]) == true) { //tests for '3:','10:','100:', etc
console.log("Current Index: "+output[i]);
console.log("IF Branch: "+exp2);
}
else if(exp4.test(output[i]) == true){//tests for '1-1','12-34','123-456', etc.
console.log("Current Index: "+output[i]);
console.log("IF Branch: "+exp4);
}
else if(exp3.test(output[i]) == true) { //tests for '/\\d[-]\\B/g'
console.log("Current Index: "+output[i]);
console.log("IF Branch: "+exp3);
}
else if(exp6.test(output[i]) == true) { //tests for '1','12','123',etc.
console.log("Current Index: "+output[i]);
console.log("IF Branch: "+exp6);
}
else {
console.log("Current Index: "+output[i]);
console.log("IF Branch: else");
}
console.log("");
}
}上面的代码将字符串解析为数组,删除无关的索引,重命名其余的索引,然后遍历每个索引,运行测试以确定要在每个索引上运行的代码块。
在大多数情况下,这是很好的。该函数主要工作在遵循相应模式的字符串上。但是有一个问题,正如您在函数的输出中看到的:
["1:","1-8","3:","5","4:","-8-10","25-36","5:","1-","6:","1-26","32-40","55","7:","8"]
Current Index: 1:
IF Branch: /\d+[ :]+/g
Current Index: 1-8
IF Branch: /\b\d{1,3}[ -]+\d{1,3}\b/g
Current Index: 3:
IF Branch: /\d+[ :]+/g
Current Index: 5
IF Branch: /^\d{1,3}$/g
Current Index: 4:
IF Branch: /\d+[ :]+/g
Current Index: -8-10
IF Branch: /\b\d{1,3}[ -]+\d{1,3}\b/g
Current Index: 25-36
IF Branch: else
Current Index: 5:
IF Branch: /\d+[ :]+/g
Current Index: 1-
IF Branch: /\d+[-]\B/g
Current Index: 6:
IF Branch: /\d+[ :]+/g
Current Index: 1-26
IF Branch: /\b\d{1,3}[ -]+\d{1,3}\b/g
Current Index: 32-40
IF Branch: else
Current Index: 55
IF Branch: /^\d{1,3}$/g
Current Index: 7:
IF Branch: /\d+[ :]+/g
Current Index: 8
IF Branch: else如您所知,首先是JSON.stringified数组,然后是数组索引的循环。对于每个索引,将输出所匹配的值和模式。您会注意到,当索引值为"25-36“、"32-40”和"8“时,会触发else分支,尽管它们都清楚地匹配所使用的模式。此外,在每种情况下,前面的索引的格式与触发if语句的适当分支的方式完全相同。
到底是怎么回事?我对这里发生了什么不明白吗?我正在检查以确保regex101上的模式是正确的,所以我确信它们是有效的。怎么回事?
发布于 2017-04-02 10:25:04
您正在创建带有全局标志的正则表达式,该标志在匹配项之间保持其状态:
var re = /\d/g;
re.exec('123') // ['1']
re.exec('123') // ['2']
re.exec('123') // ['3']
re.exec('123') // null所以,对于test
var re = /\d/g;
re.test('123') // true
re.test('123') // true
re.test('123') // true
re.test('123') // false解决方案:不需要时不要添加全局标志。
一般简化如下:
const output = [
"1:1-8", "3:5", "4:-8-10", "25-36", "5:1-6:1-26", "32-40", "55", "7:8"];
const patterns = [
/\d+[ :]+/,
/\b\d{1,3}[ -]+\d{1,3}\b/,
/\d+[-]\B/,
/^\d{1,3}$/,
];
output.forEach(t => {
const matched = patterns.find(p => p.test(t));
console.log("Item: " + t);
console.log("Matched: " + matched);
console.log();
});https://stackoverflow.com/questions/43167129
复制相似问题