我用Python实现了分流码算法,我有C#方面的经验,但我希望得到更多的注解。我还感兴趣的是,这些方法是否更好地作为类方法而不是静态方法。
from collections import deque
import re
class ShuntingYardParser:
"""Converts an infix expression to a postfix expression
using the Shunting-yard algorithm"""
_operatorPrecedence = {"*": 3,
"/": 3,
"+": 2,
"-": 2}
@staticmethod
def _tokenize(expression):
tokenizer = re.compile(r'\s*([()+*/-]|\d+)')
index = 0
tokens = []
while index < len(expression):
match = tokenizer.match(expression, index)
if match is None:
raise ValueError("Syntax error.")
tokens.append(match.group(1))
index = match.end()
return tokens
@staticmethod
def _is_operand(token):
try:
int(token)
return True
except ValueError:
return False
@staticmethod
def _is_operator(token):
return token in ShuntingYardParser._operatorPrecedence
@staticmethod
def parse(infix_expression):
"""Parses the infix expression and returns the postfix expression"""
if not isinstance(infix_expression, str):
raise TypeError("Input is not of type string")
output = list()
operators = deque()
tokens = ShuntingYardParser._tokenize(infix_expression)
for token in tokens:
if ShuntingYardParser._is_operand(token):
output.append(token)
elif ShuntingYardParser._is_operator(token):
current_operator_precedence = ShuntingYardParser._operatorPrecedence[token]
while (operators
and operators[-1] != "("
and ShuntingYardParser._operatorPrecedence[operators[-1]] <= current_operator_precedence):
output.append(operators.pop())
operators.append(token)
elif token == "(":
operators.append(token)
elif token == ")":
# Pop all the operators in front of the "(".
while operators and operators[-1] != "(":
output.append(operators.pop())
# The previous operation would have removed all the operators
# because there is no matching opening parenthesises.
if not operators:
raise ValueError("Non matching parenthesises in the input.")
# Remove matching parenthesis.
operators.pop()
for operator in operators:
# If there are still opening parenthesises in the stack this means
# we haven't found a matching closing one.
if operator == "(":
raise ValueError("Non matching parenthesises in the input.")
output.append(operator)
return output
if __name__ == "__main__":
while True:
action = input("Enter infix expression:")
try:
postfix_notation = ShuntingYardParser.parse(action)
print(postfix_notation)
except ValueError as e:
print(e.args)单元测试:
from unittest import TestCase
from ShuntingYardParser import ShuntingYardParser
class TestShuntingYardParser(TestCase):
def test_single_number(self):
# arrange & act
output = ShuntingYardParser.parse("5")
# assert
self.assertEqual(output, list("5"))
def test_non_string_input(self):
# arrange & act & assert
self.assertRaises(TypeError, ShuntingYardParser.parse, 5)
def test_matching_parenthesises(self):
# arrange & act
output = ShuntingYardParser.parse("(5+2)")
# assert
self.assertEqual(output, ["5", "2", "+"])
def test_non_matching_parenthesises_start(self):
# arrange & act
self.assertRaises(ValueError, ShuntingYardParser.parse, "((5+2)")
def test_non_matching_parenthesises_end(self):
# arrange & act
self.assertRaises(ValueError, ShuntingYardParser.parse, "(5+2))")发布于 2018-08-08 22:23:31
请注意,您只支持非负整数作为操作数。
您的算法正确地按照后缀顺序重新排列输入令牌,但如果它在输出中保留运算符/操作数分类,则会更有用。结果很可能会被输入到RPN评估器中,如果RPN评估器不需要重做这项工作,那就太好了。(您的._is_operator()和._is_operand()方法是“私有的”,因此不应该重用。)
考虑将.parse()和.tokenize()转化为yields的结果的生成器,而不是构建要返回的列表。
我没看到任何定义的理由ShuntingYardParser作为一个阶级。类不保留任何实例状态,因此它只是一个美化的命名空间。将这些“方法”编写为ShuntingYardParser.py模块中的函数将不会那么麻烦。
令牌程序可以利用正则表达式对令牌类型进行分类,这样就不需要._is_operand()和.is_operator()了。我建议在Python的记号器配方文档中使用re。
在.parse()中,operators不需要成为collections.deque;列表可以正常工作。
您实际上不需要测试if not isinstance(infix_expression, str):,因为正则表达式搜索除了字符串(或字节串)之外的任何东西都会引发TypeError。
“括号”不是一个词。异常消息可以提供更多的信息,指定开始括号或结束括号是否不匹配。
类似地,“语法错误”可以更具体地说明违规令牌。
要打印与捕获异常相关的消息(而不是一些奇怪的args元组),请执行以下操作:
except ValueError as e:
print(e)ShuntingYardParser.py
from collections import namedtuple
import re
class Token(namedtuple('Token', 'kind value')):
@property
def precedence(self):
if self.kind == 'OPERATOR':
return {
'*': 3, '/': 3,
'+': 2, '-': 2,
}[self.value]
def tokenize(expression):
"""
Yield tokens in the order that they are encountered in the expression.
"""
# https://docs.python.org/3/library/re.html#writing-a-tokenizer
token_spec = [
('PAREN', r'[()]'),
('OPERATOR', r'[+*/-]'),
('OPERAND', r'\d+'),
('IGNORE', r'\s+'),
('JUNK', r'\S+?\b'),
]
tokenizer = re.compile('|'.join(
'(?P<{kind}>{pattern})'.format(kind=kind, pattern=pattern)
for kind, pattern in token_spec
))
for match in tokenizer.finditer(expression):
kind, value = match.lastgroup, match.group(match.lastgroup)
if kind == 'JUNK':
raise ValueError('Unrecognized token: {0}'.format(value))
elif kind != 'IGNORE':
yield Token(kind, value)
def parse(infix_expression):
"""
Yield tokens in the infix expression, reordered as as a postfix expression.
"""
operators = []
for token in tokenize(infix_expression):
if token.kind == 'OPERAND':
yield token
elif token.kind == 'OPERATOR':
while (operators and
operators[-1].value != '(' and
operators[-1].precedence <= token.precedence):
yield operators.pop()
operators.append(token)
elif token.value == '(':
operators.append(token)
elif token.value == ')':
# Pop all the operators in front of the "(".
while operators and operators[-1].value != '(':
yield operators.pop()
if not operators:
raise ValueError('Unmatched )')
# Remove matching prenthesis.
operators.pop()
for operator in operators:
if operator.value == '(':
raise ValueError('Unmatched (')
yield operator
if __name__ == '__main__':
while True:
try:
postfix_expr = parse(input("Enter infix expression: "))
print([op for op in postfix_expr])
except ValueError as e:
print(e)单元测试:
import unittest
import ShuntingYardParser
class TestShuntingYardParser(unittest.TestCase):
def test_single_number(self):
output = list(ShuntingYardParser.parse("5"))
self.assertEqual(output, [
ShuntingYardParser.Token('OPERAND', '5'),
])
def test_non_string_input(self):
# arrange & act & assert
self.assertRaises(TypeError, lambda: list(ShuntingYardParser.parse(5)))
…https://codereview.stackexchange.com/questions/201232
复制相似问题