我修改了一个正则表达式我得到了这里。我需要修改它,因为我需要它来匹配以下附加条件:
这就是我到目前为止所拥有的。我用RegexBuddy来帮助解析逻辑,但是它太复杂了,我不确定我有最有效的解决方案。
\b(?:((Jan(uary)?|Feb(ruary)?|Ma(r(ch)?|y)|Apr(il)?|Ju((ly?)|(ne?))|Aug(ust)?|Oct(ober)?|(Sept|Nov|Dec)(ember)?)|((((Jan(uary)?|Ma(r(ch)?|y)|Jul(y)?|Aug(ust)?|Oct(ober)?|Dec(ember)?) 31)|((Jan(uary)?|Ma(r(ch)?|y)|Apr(il)?|Ju((ly?)|(ne?))|Aug(ust)?|Oct(ober)?|(Sept|Nov|Dec)(ember)?) (0?[1-9]|([12]\d)|30))|(Feb(ruary)? (0?[1-9]|1\d|2[0-8]|(29(?=, ((1[6-9]|[2-9]\d)(0[48]|[2468][048]|[13579][26])|((16|[2468][048]|[3579][26])00))))))),)) ((1[6-9]|[2-9]\d)\d{2}))|((1[6-9]|[2-9]\d)\d{2})
有什么可以做的,以保持原来的正则表达式和我的附加标准的功能吗?
这是我实现这个功能的代码,如果它能帮助您看到我想做的事情的话。parseDate函数的输出应该是"yyyy dd“形式的字符串日期(即示例4应该输出”2008MAR“):
//generalized RegEx function
function returnRegExMatch(ex,haystack) {
var needle = ex.exec(haystack);
if (needle) { return needle[0]; }
}
// date extraction (uses returnRegExMatch)
function parseDate(date) {
//strip anything other than a valid date
var dateRe = /\b(?:((Jan(uary)?|Feb(ruary)?|Ma(r(ch)?|y)|Apr(il)?|Ju((ly?)|(ne?))|Aug(ust)?|Oct(ober)?|(Sept|Nov|Dec)(ember)?)|((((Jan(uary)?|Ma(r(ch)?|y)|Jul(y)?|Aug(ust)?|Oct(ober)?|Dec(ember)?) 31)|((Jan(uary)?|Ma(r(ch)?|y)|Apr(il)?|Ju((ly?)|(ne?))|Aug(ust)?|Oct(ober)?|(Sept|Nov|Dec)(ember)?) (0?[1-9]|([12]\d)|30))|(Feb(ruary)? (0?[1-9]|1\d|2[0-8]|(29(?=, ((1[6-9]|[2-9]\d)(0[48]|[2468][048]|[13579][26])|((16|[2468][048]|[3579][26])00))))))),)) ((1[6-9]|[2-9]\d)\d{2}))|((1[6-9]|[2-9]\d)\d{2})/;
date = returnRegExMatch(dateRe,date);
var yearRe = /[0-9][0-9][0-9][0-9]/;
var monthRe = /Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec/;
var dayRe = /[0-9]?[0-9],/;
var year = returnRegExMatch(yearRe,date);
var month = returnRegExMatch(monthRe,date);
var day = parseInt(returnRegExMatch(dayRe,date),10);
var dateReturned = "";
if (year) { dateReturned = year; }
if (month) { dateReturned = dateReturned + " " + month; }
if (month && day) { dateReturned = dateReturned + " " + day; }
return dateReturned;
}谢谢!
编辑,感谢所有花时间回复的人。你们做了我想做的,指出了我执行过程中最荒谬的事情。我决定把主正则表达式简化得相当简单。结果如下:
\b(?:(?:Jan(?:uary)?|Feb(?:ruary)?|Ma(?:r(?:ch)?|y)|Apr(?:il)?|Ju(?:(?:ly?)|(?:ne?))|Aug(?:ust)?|Oct(?:ober)?|(?:Sept|Nov|Dec)(?:ember)?) (?:\d{1,2}, )?)?\d{4}这不担心根据闰年或其他什么来检测无效日期。@Bart让我相信,与regex相比,使用本地JS可能是最好的方法。感谢@Tim也指出了不捕获括号的必要性。
如果有人有进一步的建议,我应该如何完善这一准则,请开火。
发布于 2009-11-11 16:01:47
我必须说,我在摸这个怪物方面有困难:)
有两件事立即显现出来:
(?:...)将比使用常规括号更有效。我将向RegexMagic查询是否有更好的方法来获得您所需要的东西。但是,既然没有人强迫您在单个正则表达式中做您想做的所有事情,那么为什么不将问题分解为组件,对每个组件使用一个简单的正则表达式呢?
发布于 2009-11-11 17:21:17
像这样的事怎么样:
#!/usr/bin/js
function getMonth(monthStr) {
var monthMap = new Array();
monthMap['jan'] = monthMap['january'] = 1;
monthMap['feb'] = monthMap['february'] = 2;
monthMap['mar'] = monthMap['march'] = 3;
monthMap['apr'] = monthMap['april'] = 4;
monthMap['may'] = 5;
monthMap['jun'] = monthMap['june'] = 6;
monthMap['jul'] = monthMap['july'] = 7;
monthMap['aug'] = monthMap['august'] = 8;
monthMap['sep'] = monthMap['september'] = 9;
monthMap['oct'] = monthMap['october'] = 10;
monthMap['nov'] = monthMap['november'] = 11;
monthMap['dec'] = monthMap['december'] = 12;
return monthMap[monthStr.toLowerCase()];
}
function isLeapYear(year) {
return year%400 == 0 || (year%100 != 0 && year%4 == 0);
}
function isPositiveNumber(str) {
return str.match(/^\d+$/);
}
function parseDate(date) {
var tokens = date.split(/,?\s+/);
var m = getMonth(tokens[0]);
var d = tokens[1];
var y = tokens[2];
if(!isPositiveNumber(d) || !m || !isPositiveNumber(y)) return false;
if(
((m==4 || m==6 || m==9 || m==11) && d <= 30) ||
(m==2 && ((isLeapYear(y) && d <= 29) || d <= 28)) ||
((m==1 || m==3 || m==5 || m==7 || m==8 || m==10 || m==12) && d <= 31)
) {
var dateObj = new Date();
dateObj.setFullYear(y, m-1, d);
return dateObj;
}
return false;
}
var tests = new Array('January 31, 2009', 'Nov 31, 2009', 'Feb 29, 2001', 'Feb 29, 2000', 'Feb 29, 1900');
for(var i in tests) {
var date = parseDate(tests[i]);
print(date ? tests[i]+" is a valid date, parsed as: "+date : tests[i]+" invalid");
}输出:
January 31, 2009 is a valid date, parsed as: Sat Jan 31 2009 20:31:33 GMT+0100 (CET)
Nov 31, 2009 invalid
Feb 29, 2001 invalid
Feb 29, 2000 is a valid date, parsed as: Tue Feb 29 2000 20:31:33 GMT+0100 (CET)
Feb 29, 1900 invalidhttps://stackoverflow.com/questions/1715932
复制相似问题