我试图打印出Penn Tree Bank风格的解析器,但我似乎无法正确地平衡括号(令人沮丧的是-解析器工作得很好,但我无法正确打印输出!)在递归函数上有什么技巧或技巧可以做到这一点吗?
这是我当前的打印方法,我从解析的start节点开始调用函数。
def print_tree(current_node, parents, side):
print(parents)
for i in range(parents):
print(" ", " ", end="")
if current_node.is_terminal:
if side == 'left':
print("(" + current_node.lhs, current_node.word_label + " )")
else:
print("(" + current_node.lhs, current_node.word_label +")", "", end ="")
for i in range(parents):
print(")", "", end="")
print()
else:
print("( " +current_node.lhs)
if current_node.left_child != None:
print_tree(current_node.left_child, parents+1, 'left')
if current_node.right_child != None:
print_tree(current_node.right_child, parents+1, 'right')我得到的结果是:
( TOP
( S_VP
(VB 'List' )
( NP
( NP
(DT 'the' )
(NNS 'flights') ) ) ) ) )
( PP
( PP
(IN 'from' )
(NP_NNP 'Baltimore') ) ) ) ) ) )
( PP
(TO 'to' )
( NP
(NP_NNP 'Seattle' )
( NP
( NP
(DT 'that' )
(NN 'stop') ) ) ) ) ) ) ) ) )
( PP
(IN 'in' )
(NP_NNP 'Minneapolis') ) ) ) ) ) ) ) ) )
(PUNC '.') ) ) 想要的结果:
( TOP
( S_VP
(VB 'List' )
( NP
( NP
(DT 'the' )
(NNS 'flights') )
( PP
( PP
(IN 'from' )
(NP_NNP 'Baltimore') )
( PP
(TO 'to' )
( NP
(NP_NNP 'Seattle' ))
( NP
( NP
(DT 'that' )
(NN 'stop') )
( PP
(IN 'in' )
(NP_NNP 'Minneapolis')))))
(PUNC '.') ) ))我试着想一种方法来做它的递归/缩进次数的函数,但没有太多的成功。
发布于 2015-02-05 12:02:30
在每个终端节点(叶)上,您将为所有父节点打印);您应该只为连续的右侧父节点打印。
我建议将parents重命名为depth,并添加一个right_depth参数。
编辑:在尝试了一下之后,我决定最好把它委托给树:
class Node:
INDENT = " "
__slots__ = ["lhs", "word_label", "left_child", "right_child"]
def __init__(self, lhs, *args):
self.lhs = lhs
num_args = len(args)
if num_args == 1:
self.word_label = args[0]
self.left_child = None
self.right_child = None
elif num_args == 2:
self.word_label = None
self.left_child = args[0]
self.right_child = args[1]
else:
raise ValueError("should have one arg (word_label: str) or two args (left: Node and right: Node)")
def is_terminal(self):
return self.word_label is not None
def tree_str(self, depth=0, indent=None):
if indent is None:
indent = self.INDENT
if self.is_terminal():
return "\n{}({} '{}' )".format(
indent * depth,
self.lhs,
self.word_label
)
else:
return "\n{}( {}{}{} )".format(
indent * depth,
self.lhs,
self.left_child .tree_str(depth + 1, indent),
self.right_child.tree_str(depth + 1, indent)
)
def __str__(self):
return self.tree_str()然后是一些语法助手,
def make_leaf_type(name):
def fn(x):
return Node(name, x)
fn.__name__ = name
return fn
for leaf_type in ("VB", "DT", "NNS", "IN", "NP_NNP", "TO", "NN", "PUNC"):
locals()[leaf_type] = make_leaf_type(leaf_type)
def make_node_type(name):
def fn(l, r):
return Node(name, l, r)
fn.__name__ = name
return fn
for node_type in ("TOP", "S_VP", "NP", "PP"):
locals()[node_type] = make_node_type(node_type)所以我可以创建这棵树,
tree = \
TOP(
S_VP(
VB('List'),
NP(
NP(
DT('the'),
NNS('flights')
),
PP(
PP(
IN('from'),
NP_NNP('Baltimore')
),
PP(
TO('to'),
NP(
NP_NNP('Seattle'),
NP(
NP(
DT('that'),
NN('stop')
),
PP(
IN('in'),
NP_NNP('Minneapolis')
)
)
)
)
)
)
),
PUNC('.')
)然后输出如下所示
>>> print(tree)
( TOP
( S_VP
(VB 'List' )
( NP
( NP
(DT 'the' )
(NNS 'flights' ) )
( PP
( PP
(IN 'from' )
(NP_NNP 'Baltimore' ) )
( PP
(TO 'to' )
( NP
(NP_NNP 'Seattle' )
( NP
( NP
(DT 'that' )
(NN 'stop' ) )
( PP
(IN 'in' )
(NP_NNP 'Minneapolis' ) ) ) ) ) ) ) )
(PUNC '.' ) )我相信这正是我们所期望的。
https://stackoverflow.com/questions/28335269
复制相似问题