我已经开始学习FParsec了。它有一种非常灵活的解析数字的方法;我可以提供一组我想使用的数字格式:
type Number =
| Numeral of int
| Decimal of float
| Hexadecimal of int
| Binary of int
let numberFormat = NumberLiteralOptions.AllowFraction
||| NumberLiteralOptions.AllowHexadecimal
||| NumberLiteralOptions.AllowBinary
let pnumber =
numberLiteral numberFormat "number"
|>> fun num -> if num.IsHexadecimal then Hexadecimal (int num.String)
elif num.IsBinary then Binary (int num.String)
elif num.IsInteger then Numeral (int num.String)
else Decimal (float num.String)然而,我试图解析的语言有点奇怪。数字可以是数字(非负int)、十进制(非负float)、十六进制(带有前缀#x)或二进制(带有前缀#b):
numeral: 0, 2
decimal: 0.2, 2.0
hexadecimal: #xA04, #x611ff
binary: #b100, #b001现在,为了使用pnumber,我必须进行两次解析,将#替换为0 (如果有必要
let number: Parser<_, unit> =
let isDotOrDigit c = isDigit c || c = '.'
let numOrDec = many1Satisfy2 isDigit isDotOrDigit
let hexOrBin = skipChar '#' >>. manyChars (letter <|> digit) |>> sprintf "0%s"
let str = spaces >>. numOrDec <|> hexOrBin
str |>> fun s -> match run pnumber s with
| Success(result, _, _) -> result
| Failure(errorMsg, _, _) -> failwith errorMsg在这种情况下,什么是更好的解析方法?或者,我如何修改FParsec的CharStream,使其更容易进行条件解析?
发布于 2012-02-06 23:16:25
如果您希望生成良好的错误消息并正确地检查溢出,那么解析数字可能会相当混乱。
下面是一个简单的数字解析器的FParsec实现:
let numeralOrDecimal : Parser<_, unit> =
// note: doesn't parse a float exponent suffix
numberLiteral NumberLiteralOptions.AllowFraction "number"
|>> fun num ->
// raises an exception on overflow
if num.IsInteger then Numeral(int num.String)
else Decimal(float num.String)
let hexNumber =
pstring "#x" >>. many1SatisfyL isHex "hex digit"
|>> fun hexStr ->
// raises an exception on overflow
Hexadecimal(System.Convert.ToInt32(hexStr, 16))
let binaryNumber =
pstring "#b" >>. many1SatisfyL (fun c -> c = '0' || c = '1') "binary digit"
|>> fun hexStr ->
// raises an exception on overflow
Binary(System.Convert.ToInt32(hexStr, 2))
let number =
choiceL [numeralOrDecimal
hexNumber
binaryNumber]
"number literal"在溢出时生成好的错误消息会使这个实现变得有点复杂,因为理想情况下,您还需要在错误之后回溯,这样错误位置就会在数字文字的开头结束(参见numberLiteral文档中的示例)。
一种优雅地处理可能的溢出异常的简单方法是使用一个小的异常处理组合器,如下所示:
let mayThrow (p: Parser<'t,'u>) : Parser<'t,'u> =
fun stream ->
let state = stream.State
try
p stream
with e -> // catching all exceptions is somewhat dangerous
stream.BacktrackTo(state)
Reply(FatalError, messageError e.Message)然后,您可以编写
let number = mayThrow (choiceL [...] "number literal")我不知道您说的“修改FParsec的CharStream以使条件解析更容易”是什么意思,但是下面的示例演示了如何编写只直接使用CharStream方法的低级实现。
type NumberStyles = System.Globalization.NumberStyles
let invariantCulture = System.Globalization.CultureInfo.InvariantCulture
let number: Parser<Number, unit> =
let expectedNumber = expected "number"
let inline isBinary c = c = '0' || c = '1'
let inline hex2int c = (int c &&& 15) + (int c >>> 6)*9
let hexStringToInt (str: string) = // does no argument or overflow checking
let mutable n = 0
for c in str do
n <- n*16 + hex2int c
n
let binStringToInt (str: string) = // does no argument or overflow checking
let mutable n = 0
for c in str do
n <- n*2 + (int c - int '0')
n
let findIndexOfFirstNonNull (str: string) =
let mutable i = 0
while i < str.Length && str.[i] = '0' do
i <- i + 1
i
let isHexFun = id isHex // tricks the compiler into caching the function object
let isDigitFun = id isDigit
let isBinaryFun = id isBinary
fun stream ->
let start = stream.IndexToken
let cs = stream.Peek2()
match cs.Char0, cs.Char1 with
| '#', 'x' ->
stream.Skip(2)
let str = stream.ReadCharsOrNewlinesWhile(isHexFun, false)
if str.Length <> 0 then
let i = findIndexOfFirstNonNull str
let length = str.Length - i
if length < 8 || (length = 8 && str.[i] <= '7') then
Reply(Hexadecimal(hexStringToInt str))
else
stream.Seek(start)
Reply(Error, messageError "hex number literal is too large for 32-bit int")
else
Reply(Error, expected "hex digit")
| '#', 'b' ->
stream.Skip(2)
let str = stream.ReadCharsOrNewlinesWhile(isBinaryFun, false)
if str.Length <> 0 then
let i = findIndexOfFirstNonNull str
let length = str.Length - i
if length < 32 then
Reply(Binary(binStringToInt str))
else
stream.Seek(start)
Reply(Error, messageError "binary number literal is too large for 32-bit int")
else
Reply(Error, expected "binary digit")
| c, _ ->
if not (isDigit c) then Reply(Error, expectedNumber)
else
stream.SkipCharsOrNewlinesWhile(isDigitFun) |> ignore
if stream.Skip('.') then
let n2 = stream.SkipCharsOrNewlinesWhile(isDigitFun)
if n2 <> 0 then
// we don't parse any exponent, as in the other example
let mutable result = 0.
if System.Double.TryParse(stream.ReadFrom(start),
NumberStyles.AllowDecimalPoint,
invariantCulture,
&result)
then Reply(Decimal(result))
else
stream.Seek(start)
Reply(Error, messageError "decimal literal is larger than System.Double.MaxValue")
else
Reply(Error, expected "digit")
else
let decimalString = stream.ReadFrom(start)
let mutable result = 0
if System.Int32.TryParse(stream.ReadFrom(start),
NumberStyles.None,
invariantCulture,
&result)
then Reply(Numeral(result))
else
stream.Seek(start)
Reply(Error, messageError "decimal number literal is too large for 32-bit int")虽然此实现无需系统方法的帮助即可解析十六进制和二进制数,但它最终将十进制数的解析委托给Int32.TryParse和Double.TryParse方法。
正如我所说的:它是混乱的。
https://stackoverflow.com/questions/9159554
复制相似问题