我开始为一家没有足够时间进行代码评审的公司工作。作为一个初级程序员,我想提高我在实际工作案例上的技能。我被要求以英文代替法文文本,以方便翻译"tableau工作簿“文件(这些只是XML文件)。
我分4个步骤工作(因此我编写了4个可以独立运行的文件):
我的大部分工作都是从.twb文件(XML )中检索有趣的字段。因此,我编写了一个助手文件,该文件具有检索/获取感兴趣的节点的功能,以及其他更新XML文件中节点的函数( XML文件可以看作是一棵有节点的树)。下面是我的助手文件的摘录,让您了解我想要完成的任务。
我想我的设计很糟糕,如果你能给我一些建议(OO设计等),我会很高兴的。或任何建议(书籍.)帮助我提高我的设计技能。
from lxml import etree
import os
from copy import deepcopy
from collections import namedtuple
from ast import literal_eval
import re
####### Variable globales ######
textual_node = namedtuple('textual_node', ['id', 'label', 'type'])
stop_words = ['<', '>', 'Æ', '', '\n', ':', '()']
####### Parsing xml #######
def get_root_tree(file_source):
tree = etree.parse(file_source)
root = tree.getroot()
return tree, root
####### Utilities #########
def get_parent(node,depth):
""" Retrieve ancestor node """
ancestor_node = node
for i in range(depth):
ancestor_node=ancestor_node.getparent()
return ancestor_node
def get_brute_path(root,tree,xpath):
brute_paths = []
paths = []
for node in root.xpath(xpath):
path = tree.getpath(node)
brute_path = clean_path(path)
if not (brute_path in brute_paths):
paths.append(path)
brute_paths.append(brute_path)
return brute_paths,paths
def clean_path(path):
split_path = [elt.split('[')[0] for elt in path.split('/')]
brute_path = '/'.join(split_path)
return brute_path
def get_ws_name(root, tree):
""" Mapping between worksheet's name and worksheet's number in the xml file"""
d_wsname = {}
d_inv_wsname = {}
for node in root.xpath('worksheets/worksheet[@name]'):
ws_id = tree.getpath(node).split('/')[-1]
ws_name = node.get('name')
d_wsname[ws_id] = ws_name
d_inv_wsname[ws_name] = ws_id
return d_wsname, d_inv_wsname
####### Get nodes of interest #########
def get_alias(root):
""" Get the aliases nodes """
s_alias = set()
xpath = "/workbook/datasources/datasource/column/aliases/alias"
for node in root.xpath(xpath):
ds_node = get_parent(node,3)
ds_id = ds_node.attrib['name']
col_node = get_parent(node,2)
col_id = col_node.attrib['name']
alias_id = node.attrib['key']
node_id = (ds_id,col_id,alias_id)
node_text = node.attrib['value']
node_type = 'alias'
alias_node = textual_node(node_id, node_text, node_type)
s_alias.add(alias_node)
return s_alias
def get_member(root):
""" Get the member nodes"""
s_member = set()
xpath = "/workbook/datasources/datasource/column/members/member[@alias]"
for node in root.xpath(xpath):
ds_node = get_parent(node,3)
ds_id = ds_node.attrib['name']
col_node = get_parent(node, 2)
col_id = col_node.attrib['name']
member_id = node.attrib['value']
node_id = (ds_id, col_id, member_id)
node_text = node.attrib['alias']
node_type = 'member'
member_node = textual_node(node_id, node_text, node_type)
s_member.add(member_node)
return s_member
def get_title_format(tree,root):
""" Get the title text"""
s = set()
xpath = '//format[@attr="title"][@value]'
for node in root.xpath(xpath):
ws_id = get_parent(node, 4).attrib['name']
fm_id = node.attrib['field']
node_id = (ws_id, fm_id)
node_text = node.get('value')
node_type = 'title_'+node.tag
title_format_node = textual_node(node_id, node_text, node_type)
s.add(title_format_node)
return s
def get_formatted_format(tree,root):
""" Get the formatted text """
raw = r"(?P<type>^.)(?:\"(?P<prefix>[^\"]*)\"){0,1}(?P<format>[^\"]*)(?:\"(?P<suffix>[^\"]*)\"){0,1};(?P<negformat>.)(?:\"(?P=prefix)\"){0,1}(?P=format)(?:\"(?P=suffix)\"){0,1}"
pattern = re.compile(raw)
s_format = set()
xpath = '//format[@attr="text-format"]' # xpath = "/workbook/worksheets//table/style/style-rule/format[@field][@attr='text-format']"
for node in root.xpath(xpath):
fm_text = node.attrib['value']
matches = pattern.search(fm_text)
if matches:
ws_id = get_parent(node,4).attrib['name']
fm_id = node.attrib['field']
node_id = (ws_id,fm_id)
match_dic = matches.groupdict()
node_prefix,node_suffix = match_dic['prefix'], match_dic['suffix']
if node_prefix:
if node_prefix.strip()!='€':
node_type = "{}_prefix".format(node.tag)
format_prefix_node = textual_node(node_id, node_prefix, node_type)
s_format.add(format_prefix_node)
if node_suffix:
if node_suffix.strip() != '€':
node_type = "{}_suffix".format(node.tag)
format_suffix_node = textual_node(node_id, node_suffix, node_type)
s_format.add(format_suffix_node)
return s_format
def get_caption(root):
""" Get the column names (original and calculated ones)"""
s_caption = set()
for node in root.xpath('/workbook/datasources/datasource/column[@caption]'):
node_id = node.get('name')
node_text = node.get('caption')
node_type = node.tag
caption_node = textual_node(node_id, node_text, node_type)
s_caption.add(caption_node)
return s_caption
def get_tooltip_label(root, tree, d_wsname):
"""" Get the tooltip and label """
s_tooltip_label = set()
nodes_type = ['customized-tooltip','customized-label']
for node_type in nodes_type:
ancestor_path = '//worksheets/*/table/panes/pane/{node_type}/formatted-text/run/ancestor::pane'.format(node_type=node_type)
for ancestor in root.xpath(ancestor_path):
try:
pane_id = ancestor.attrib["id"]
except:
pane_id=''
finally:
run_path="{node_type}/formatted-text/run".format(node_type=node_type)
for node in ancestor.xpath(run_path):
if not any(substring == node.text.strip() for substring in stop_words): # 'Æ', '<['
ws_id = tree.getpath(node).split('/')[3]
ws_name = d_wsname[ws_id]
run_index = tree.getpath(node)[-2:-1]
node_id = (ws_name,pane_id,run_index)
node_text = node.text
node_type = tree.getpath(node).split('/')[-3]
tooltip_label_node = textual_node(node_id, node_text, node_type)
s_tooltip_label.add(tooltip_label_node)
return s_tooltip_label
def get_zone(root,tree):
""" Get the textual zones """
s_zone = set()
xpath_run = "/workbook/dashboards/dashboard/zones//zone/formatted-text/run"
for run_node in root.xpath(xpath_run):
if not any(substring == run_node.text.strip() for substring in stop_words):
previous_zone_node = get_parent(run_node,2)
pane_id = previous_zone_node.attrib['id']
run_index = tree.getpath(run_node)[-2:-1]
node_id = (pane_id,run_index)
node_text = run_node.text
node_type = tree.getpath(run_node).split('/')[-3].split('[')[0]
zone_node = textual_node(node_id,node_text,node_type)
s_zone.add(zone_node)
return s_zone
def get_node_info(tree,node):
info = " tree.getpath(node): {path} \n node.items(): {attr} \n node.text: {text}".format(path=tree.getpath(node),attr=node.items(),text=node.text)
print(info)
def get_ancestor_by_name(root,tree,node,namefield):
path = tree.getpath(node)
brute_path = clean_path(path)
hier = brute_path.split('/')[1:]
for i,elt in enumerate(hier):
if namefield==elt:
return get_parent(node,len(hier)-i-1)
return False
############### Update Nodes #############################
def update_wblocal(root,lang):
dic_lang = {'FR':'fr_FR','EN':'en_GB','ES':'es_ES'}
root.attrib['locale'] = dic_lang[lang]
return root
def update_nodes(df, root):
root_trad = deepcopy(root)
tree_trad = etree.ElementTree(root_trad)
d_wsname, d_inv_wsname = get_ws_name(root_trad, tree_trad)
d_func = {"customized-tooltip": update_customized_node, "customized-label": update_customized_node,
"column": update_caption_node,"zone":update_zone_node,"alias":update_alias_node,
"member":update_member_node,"format_prefix":update_format_node,"format_suffix":update_format_node,
"title_format":update_title_format_node}
d_args = {"customized-tooltip": (d_inv_wsname, root_trad, tree_trad), "customized-label": (d_inv_wsname, root_trad, tree_trad),
"column": (root_trad, tree_trad) ,"zone":(root_trad, tree_trad),"alias":(root_trad,tree_trad),
"member":(root_trad,tree_trad),"format_suffix":(root_trad,tree_trad),"format_prefix":(root_trad,tree_trad),
"title_format":(root_trad,tree_trad)}
for index, row in df.iterrows():
node_type = row['node_type']
d_func[node_type](row,*d_args[node_type])
return root_trad, tree_trad
def update_title_format_node(row,root,tree):
""" Get the title text"""
xpath = '//format[@attr="title"][@value]'
brute_paths, _ = get_brute_path(root, tree, xpath)
node_id = literal_eval(row['node_id'])
ws_id, fm_id = node_id
dic = {'worksheet': ('name', ws_id), 'format': ('field', fm_id)}
for brute_path in brute_paths:
nodes = iterpath(root,tree,brute_path,dic)
for node in nodes:
print("Avant title_format: ", node.attrib['value'])
node.attrib['value'] = row.to_replace
print("Après title_format:", node.attrib['value'])
return root, tree
def update_format_node(row,root,tree):
node_id = literal_eval(row['node_id'])
raw = r"(?P<type>^.)(?:\"(?P<prefix>[^\"]*)\"){0,1}(?P<format>[^\"]*)(?:\"(?P<suffix>[^\"]*)\"){0,1};(?P<negformat>.)(?:\"(?P=prefix)\"){0,1}(?P=format)(?:\"(?P=suffix)\"){0,1}"
pattern = re.compile(raw)
xpath = '//format[@attr="text-format"][@field]' # xpath = "/workbook/worksheets//table/style/style-rule/format[@field][@attr='text-format']"
brute_paths,_ = get_brute_path(root,tree,xpath)
ws_id,fm_id = node_id
dic = {'worksheet':('name',ws_id),'format':('field',fm_id)}
for brute_path in brute_paths:
nodes = iterpath(root,tree,brute_path,dic)
for node in nodes:
node_text = node.attrib['value']
print("Avant format: ", node_text)
dic = pattern.search(node_text).groupdict()
dic_pattern = {'format_prefix':dic['prefix'],'format_suffix':dic['suffix']}
pattern = dic_pattern[row.node_type]
new_text = node_text.replace(pattern,row.to_replace)
node.attrib['value'] = new_text
print("Après format:", node.attrib['value'])
return root,tree
def update_alias_node(row,root,tree):
node_id = literal_eval(row['node_id'])
ds_id, col_id,alias_id = node_id
xpath = "//alias"
brute_paths,_ = get_brute_path(root,tree,xpath)
dic = {'datasource':('name',ds_id),'datasource-dependencies':('datasource',ds_id),\
'column':('name',col_id),'alias':('key',alias_id)}
for brute_path in brute_paths:
nodes = iterpath(root,tree,brute_path,dic)
for node in nodes:
print("Avant alias: ", node.attrib['value'])
node.attrib['value'] = row.to_replace
print("Après Alias:", node.attrib['value'])
return root,tree
def iterpath(node,tree,rightpath,dic,nodes=None):
""" This recursive algorithm traverse the tree till the last element of the specified path
(from top to bottom) looking if (key,value) pair of nodes match with dictionary """
if nodes == None:
nodes =[]
node_tag = node.tag
if len(rightpath.split('/'))>1:
_, rightpath = rightpath.split("{}/".format(node_tag))
if rightpath:
next_tag,*_ = rightpath.split('/')
xpath = "{}/{}".format(tree.getpath(node),next_tag)
if node_tag in dic.keys():
key, value = dic[node_tag]
if node.attrib[key] != value:
return False
for child_node in node.xpath(xpath):
iterpath(child_node, tree, rightpath, dic,nodes)
else: #last node of the original specified path (fisrt call to the function)
key, key_id = dic[node_tag]
try:
if node.attrib[key] == key_id:
nodes.append(node)
except KeyError:
print("KeyError:",node.items())
return nodes
def update_caption_node(row, root_trad, tree_trad):
node_id = row['node_id']
xpath_request = '//*[@name="{name}"][@caption]'.format(name=node_id)
for node in root_trad.xpath(xpath_request):
try:
print("Avant caption:", node.get('caption'))
node.attrib["caption"] = row[-1]
print("Après caption:", node.get('caption'))
except Exception as e:
print("Exception",e)
return root_trad, tree_trad
def update_customized_node(row, d_inv_wsname, root_trad, tree_trad):
""" Se sert de la dernière colonne du dataframe, comme colonne de remplacement (à améliorer)"""
node_type = row['node_type']
node_id = literal_eval(row['node_id'])
wsname = node_id[0]
pane_id = node_id[1]
run_index = node_id[2]
ws = d_inv_wsname[wsname]
if pane_id:
xpath_pane = '//worksheets/{worksheet}/table/panes/pane[@id={pane_id}]'.format(worksheet=ws,pane_id=pane_id)
else:
xpath_pane = '//worksheets/{worksheet}/table/panes/pane'.format(worksheet=ws)
if run_index != 'u':
xpath_run = '//{node_type}/formatted-text/run[{run_index}]'.format(node_type=node_type,run_index=run_index)
elif run_index == 'u':
xpath_run = '//{node_type}/formatted-text/run'.format(node_type=node_type)
for run_node in root_trad.xpath(xpath_pane+xpath_run):
try:
print('Avant run:', run_node.text)
run_node.text = row[-1]
print('Après run:', run_node.text)
except Exception as e:
print(">",repr(e))
return root_trad, tree_trad
def update_zone_node(row, root_trad, tree_trad):
node_id = literal_eval(row['node_id'])
zone_id = node_id[0]
run_index = node_id[1]
if run_index != 'u':
xpath_run = '/workbook/dashboards/dashboard/zones//zone[@id={zone_id}]/formatted-text/run[{run_index}]'.format(zone_id=zone_id,run_index=run_index)
elif run_index == 'u':
xpath_run = '/workbook/dashboards/dashboard/zones//zone[@id={zone_id}]/formatted-text/run'.format(zone_id=zone_id)
for run_node in root_trad.xpath(xpath_run):
try:
print('Avant run:', run_node.text)
run_node.text = row[-1]
print('Après run:', run_node.text)
except Exception as e:
print(">", repr(e))
return root_trad, tree_trad
def update_member_node(row, root, tree):
node_id = literal_eval(row['node_id'])
ds_id, col_id,member_id = node_id
xpath = "//member"
brute_paths,_ = get_brute_path(root,tree,xpath)
dic = {'datasource':('name',ds_id),'column':('name',col_id),'member':('value',member_id)}
for brute_path in brute_paths:
nodes = iterpath(root,tree,brute_path,dic)
for node in nodes:
print("Avant member: ", node.attrib['alias'])
node.attrib['alias'] = row.to_replace
print("Après member:", node.attrib['alias'])
return root,tree
# Write to file
def replace_apostroph(filepath):
filepath2 = filepath.split('.twb')[0]+'2.twb'
with open(filepath) as fp_in:
with open(filepath2,'w') as fp_out:
for line in fp_in:
fp_out.write(line.replace("'", ''').replace('"', "'").replace('/>',' />'))
def rootToXml(view_name,output_dir, language_target, tree_trad):
twb_output = '{view}_{languageTarget}.twb'.format(view=view_name, languageTarget=language_target)
# tree_trad = etree.ElementTree(root_trad)
filepath = os.path.join(output_dir, twb_output)
tree_trad.write(filepath,encoding ='utf-8', pretty_print=True)
#replace_apostroph(filepath)
print("Writed to {filepath}".format(filepath=filepath))发布于 2019-11-05 15:26:09
根据我对Python文档的看法/经验和研究,我建议改进以下方法:
首先,namedtuple必须与大写名称一起使用,就像类一样。
Textual_Node = namedtuple('Textual_Node', ['id', 'label', 'type'])这有助于确定创建新数据类对象的操作。namedtuple() Python文档
第二,尝试将与一个数据函数相关的所有函数组合到类中。
例如,在attr中使用root的所有函数都可以组合成一个函数,在改进之后使用self,它们会攻击root。它可以看起来像:
class ClassName():
def __init__(self, root):
self.root
def get_caption(self):
do_somethink_with_root(self.root)
...第三,对各个相关代码块进行一行缩进。
第四,使用logging模块保存有关脚本执行的信息。
请试着在代码中使用指针。例如,它可能是一个pylint或flake8,用于制作更多的pythonic代码样式,而Pydocstyle将执行更多的可理解代码和文档字符串操作。作为请求Google提供Python代码样式,只在文档行中使用英语。
https://codereview.stackexchange.com/questions/231839
复制相似问题