我正在使用Amazon Mechanical Turk API,它只允许我使用正则表达式来过滤数据字段。
我想向函数输入一个整数范围,例如256-311或45-1233,然后返回一个只与该范围匹配的正则表达式。
匹配256-321的正则表达式为:
\b((25[6-9])|(2[6-9][0-9])|(3[0-1][0-9])|(32[0-1]))\b这部分相当简单,但我在创建这个正则表达式的循环中遇到了问题。
我正在尝试构建一个定义如下的函数:
function getRangeRegex( int fromInt, int toInt)
{
return regexString;
}我看遍了整个网络,我很惊讶,看起来没有人在过去解决过这个问题。这是一个很难解决的问题。
耽误您时间,实在对不起。
发布于 2011-07-16 06:58:56
这里有一个简单的技巧:
<?php
function regex_range($from, $to) {
if($from < 0 || $to < 0) {
throw new Exception("Negative values not supported");
}
if($from > $to) {
throw new Exception("Invalid range $from..$to, from > to");
}
$ranges = array($from);
$increment = 1;
$next = $from;
$higher = true;
while(true) {
$next += $increment;
if($next + $increment > $to) {
if($next <= $to) {
$ranges[] = $next;
}
$increment /= 10;
$higher = false;
}
else if($next % ($increment*10) === 0) {
$ranges[] = $next;
$increment = $higher ? $increment*10 : $increment/10;
}
if(!$higher && $increment < 10) {
break;
}
}
$ranges[] = $to + 1;
$regex = '/^(?:';
for($i = 0; $i < sizeof($ranges) - 1; $i++) {
$str_from = (string)($ranges[$i]);
$str_to = (string)($ranges[$i + 1] - 1);
for($j = 0; $j < strlen($str_from); $j++) {
if($str_from[$j] == $str_to[$j]) {
$regex .= $str_from[$j];
}
else {
$regex .= "[" . $str_from[$j] . "-" . $str_to[$j] . "]";
}
}
$regex .= "|";
}
return substr($regex, 0, strlen($regex)-1) . ')$/';
}
function test($from, $to) {
try {
printf("%-10s %s\n", $from . '-' . $to, regex_range($from, $to));
} catch (Exception $e) {
echo $e->getMessage() . "\n";
}
}
test(2, 8);
test(5, 35);
test(5, 100);
test(12, 1234);
test(123, 123);
test(256, 321);
test(256, 257);
test(180, 195);
test(2,1);
test(-2,4);
?>这会产生:
2-8 /^(?:[2-7]|8)$/
5-35 /^(?:[5-9]|[1-2][0-9]|3[0-5])$/
5-100 /^(?:[5-9]|[1-9][0-9]|100)$/
12-1234 /^(?:1[2-9]|[2-9][0-9]|[1-9][0-9][0-9]|1[0-2][0-3][0-4])$/
123-123 /^(?:123)$/
256-321 /^(?:25[6-9]|2[6-9][0-9]|3[0-2][0-1])$/
256-257 /^(?:256|257)$/
180-195 /^(?:18[0-9]|19[0-5])$/
Invalid range 2..1, from > to
Negative values not supported未经过适当测试,使用风险自负!
是的,在许多情况下,生成的正则表达式可以编写得更紧凑,但我将其留给读者作为练习:)
发布于 2012-08-09 06:57:56
对于像我一样正在寻找伟大的@Bart Kier作品的javascript版本的其他人,请参阅上面的
//Credit: Bart Kiers 2011
function regex_range(from, to){
if(from < 0 || to < 0) {
//throw new Exception("Negative values not supported");
return null;
}
if(from > to) {
//throw new Exception("Invalid range from..to, from > to");
return null;
}
var ranges = [];
ranges.push(from);
var increment = 1;
var next = from;
var higher = true;
while(true){
next += increment;
if(next + increment > to) {
if(next <= to) {
ranges.push(next);
}
increment /= 10;
higher = false;
}else{
if(next % (increment*10) == 0) {
ranges.push(next);
increment = higher ? increment*10 : increment/10;
}
}
if(!higher && increment < 10) {
break;
}
}
ranges.push(to + 1);
var regex = '/^(?:';
for(var i = 0; i < ranges.length - 1; i++) {
var str_from = ranges[i];
str_from = str_from.toString();
var str_to = ranges[i + 1] - 1;
str_to = str_to.toString();
for(var j = 0; j < str_from.length; j++) {
if(str_from[j] == str_to[j]) {
regex += str_from[j];
}
else {
regex += "[" + str_from[j] + "-" + str_to[j] + "]";
}
}
regex += "|";
}
return regex.substr(0, regex.length - 1 ) + ')$/';
}发布于 2019-08-11 09:07:53
RegexNumericRangeGenerator的PHP端口
class RegexRangeNumberGenerator {
static function parse($min, $max, $MatchWholeWord = FALSE, $MatchWholeLine = FALSE, $MatchLeadingZero = FALSE) {
if (!is_int($min) || !is_int($max) || $min > $max || $min < 0 || $max < 0) {
return FALSE;
}
if ($min == $max) {
return self::parseIntoPattern($min, $MatchWholeWord, $MatchWholeLine, $MatchLeadingZero);
}
$s = [];
$x = self::parseStartRange($min, $max);
foreach ($x as $o) {
$s[] = self::parseEndRange($o[0], $o[1]);
}
$n = self::reformatArray($s);
$h = self::parseIntoRegex($n);
return self::parseIntoPattern($h, $MatchWholeWord, $MatchWholeLine, $MatchLeadingZero);
}
static private function parseIntoPattern($t, $MatchWholeWord = FALSE, $MatchWholeLine = FALSE, $MatchLeadingZero = FALSE) {
$r = ((is_array($t)) ? implode("|", $t) : $t);
return (($MatchWholeLine && $MatchLeadingZero) ? "^0*(" . $r . ")$" : (($MatchLeadingZero) ? "0*(" . $r . ")" : (($MatchWholeLine) ? "^(" . $r . ")$" : (($MatchWholeWord) ? "\\b(" . $r . ")\\b" : "(" . $r . ")"))));
}
static private function parseIntoRegex($t) {
if (!is_array($t)) {
throw new Exception("Argument needs to be an array!");
}
$r = [];
for ($i = 0; $i < count($t); $i++) {
$e = str_split($t[$i][0]);
$n = str_split($t[$i][1]);
$s = "";
$o = 0;
$h = "";
for ($a = 0; $a < count($e); $a++) {
if ($e[$a] === $n[$a]) {
$h .= $e[$a];
} else {
if ((intval($e[$a]) + 1) === intval($n[$a])) {
$h .= "[" . $e[$a] . $n[$a] . "]";
} else {
if ($s === ($e[$a] . $n[$a])) {
$o++;
}
$s = $e[$a] . $n[$a];
if ($a == (count($e) - 1)) {
$h .= (($o > 0) ? "{" . ($o + 1) . "}" : "[" . $e[$a] . "-" . $n[$a] . "]");
} else {
if ($o === 0) {
$h .= "[" . $e[$a] . "-" . $n[$a] . "]";
}
}
}
}
}
$r[] = $h;
}
return $r;
}
static private function reformatArray($t) {
$arrReturn = [];
for ($i = 0; $i < count($t); $i++) {
$page = count($t[$i]) / 2;
for ($a = 0; $a < $page; $a++) {
$arrReturn[] = array_slice($t[$i], (2 * $a), 2);
}
}
return $arrReturn;
}
static private function parseStartRange($t, $r) {
if (strlen($t) === strlen($r)) {
return [[$t, $r]];
}
$break = pow(10, strlen($t)) - 1;
return array_merge([[$t, $break]], self::parseStartRange($break + 1, $r));
}
static private function parseEndRange($t, $r) {
if (strlen($t) == 1) {
return [$t, $r];
}
if (str_repeat("0", strlen($t)) === "0" . substr($t, 1)) {
if (str_repeat("0", strlen($r)) == "9" . substr($r, 1)) {
return [$t, $r];
}
if ((int) substr($t, 0, 1) < (int) substr($r, 0, 1)) {
$e = intval(substr($r, 0, 1) . str_repeat("0", strlen($r) - 1)) - 1;
return array_merge([$t, self::strBreakPoint($e)], self::parseEndRange(self::strBreakPoint($e + 1), $r));
}
}
if (str_repeat("9", strlen($r)) === "9" . substr($r, 1) && (int) substr($t, 0, 1) < (int) substr($r, 0, 1)) {
$e = intval(intval((int) substr($t, 0, 1) + 1) . "" . str_repeat("0", strlen($r) - 1)) - 1;
return array_merge(self::parseEndRange($t, self::strBreakPoint($e)), [self::strBreakPoint($e + 1), $r]);
}
if ((int) substr($t, 0, 1) < (int) substr($r, 0, 1)) {
$e = intval(intval((int) substr($t, 0, 1) + 1) . "" . str_repeat("0", strlen($r) - 1)) - 1;
return array_merge(self::parseEndRange($t, self::strBreakPoint($e)), self::parseEndRange(self::strBreakPoint($e + 1), $r));
}
$a = (int) substr($t, 0, 1);
$o = self::parseEndRange(substr($t, 1), substr($r, 1));
$h = [];
for ($u = 0; $u < count($o); $u++) {
$h[] = ($a . $o[$u]);
}
return $h;
}
static private function strBreakPoint($t) {
return str_pad($t, strlen(($t + 1)), "0", STR_PAD_LEFT);
}
}测试结果
2-8 ^([2-8])$
5-35 ^([5-9]|[12][0-9]|3[0-5])$
5-100 ^([5-9]|[1-8][0-9]|9[0-9]|100)$
12-1234 ^(1[2-9]|[2-9][0-9]|[1-8][0-9]{2}|9[0-8][0-9]|99[0-9]|1[01][0-9]{2}|12[0-2][0-9]|123[0-4])$
123-123 ^(123)$
256-321 ^(25[6-9]|2[6-9][0-9]|3[01][0-9]|32[01])$
256-257 ^(25[67])$
180-195 ^(18[0-9]|19[0-5])$https://stackoverflow.com/questions/6710236
复制相似问题