我有一本这样的字典:
source = {
'Section 1' : {
'range' : [0, 200],
'template' : 'ID-LOA-XXX',
'nomenclature': True
},
'Section 2' : {
'range' : [201, 800],
'template' : 'ID-EPI-XXX',
'nomenclature': False,
'Subsection 1' : {
'range' : [0, 400],
'template' : 'ID-EPI-S1-XXX',
'nomenclature': False,
'Subsubsection 1' : {
'range' : [0, 400],
'template' : 'ID-EPI-S12-XXX',
'nomenclature': False
}
},
'Subsection 2' : {
'range' : [0, 400],
'template' : 'ID-EPI-S2-XXX',
'nomenclature': False
}
},
# etc.
}它是从JSON文件中加载的。我想把它整理成下面的字典:
target = {
'Section 1' : {
'range' : [0, 200],
'template' : 'ID-LOA-XXX',
'nomenclature': True,
'location' : './Section 1/'
},
'Section 2' : {
'range' : [201, 800],
'template' : 'ID-EPI-XXX',
'nomenclature': False,
'location' : './Section 2/'
},
'Subsection 1' : {
'range' : [0, 400],
'template' : 'ID-EPI-S1-XXX',
'nomenclature': False,
'location' : './Section 2/Subsection 1/'
},
'Subsubsection 1' : {
'range' : [0, 400],
'template' : 'ID-EPI-S12-XXX',
'nomenclature': False,
'location' : './Section 2/Subsection 1/Subsubsection 1'
},
'Subsection 2' : {
'range' : [0, 400],
'template' : 'ID-EPI-S2-XXX',
'nomenclature': False,
'location' : './Section 2/Subsection 2/'
},
# etc.
}我可能能够更改原始JSON文件的生成方式,但我不想去那里。
JSON文件的单词:每个部分至少包含三个键,并且可能包含其他键。这些其他键被解释为包含在当前部分中的子部分,每个子节都是具有相同属性的dict。这一模式在原则上可能会无限深。
我还想做一些断言:
'range'、'template'和'nomenclature')到目前为止,我只做了这些检查:
import json
key_requirements = {
"nomenclature": lambda x : isinstance(x, bool),
"template" : lambda x : isinstance(x, str) and "X" in x,
"range" : lambda x : isinstance(x, list) and len(x)==2 and all([isinstance(y,int) for y in x]) and x[1] > x[0]
}
def checkSection(section):
for key in section:
if key not in key_requirements:
checkSection(section[key])
elif not key_requirements[key]( section[key] ):
# error: assertion failed
pass
else:
# error: key not present
pass
for key in source # json.load(open(myJsonFile))
checkSection(data[key])但是目前,再多的咖啡也无法让我想出一种高效、优雅、节奏感的方法来将所期望的转化成这个方案。
有什么建议或想法吗?
发布于 2014-07-14 11:51:29
这个问题需要递归遍历,除非您需要一些第三方库(是的,它有解决方案),否则您将需要一个简单的本地递归遍历。
NOte路径语义可能与您的不同,因为我在windows上。
Implementation
def flatten(source):
target = {}
def helper(src, path ='.', last_key = None):
if last_key:
target[last_key] = {}
target[last_key]['location'] = path
for key, value in src.items():
if isinstance(value, dict):
helper(value, os.path.join(path, key), key)
else:
target[last_key][key] = value
helper(source)
return target输出
>>> pprint.pprint(source)
{'Section 1': {'nomenclature': True,
'range': [0, 200],
'template': 'ID-LOA-XXX'},
'Section 2': {'Subsection 1': {'Subsubsection 1': {'nomenclature': False,
'range': [0, 400],
'template': 'ID-EPI-S12-XXX'},
'nomenclature': False,
'range': [0, 400],
'template': 'ID-EPI-S1-XXX'},
'Subsection 2': {'nomenclature': False,
'range': [0, 400],
'template': 'ID-EPI-S2-XXX'},
'nomenclature': False,
'range': [201, 800],
'template': 'ID-EPI-XXX'}}
>>> pprint.pprint(flatten(source))
{'Section 1': {'location': '\\Section 1',
'nomenclature': True,
'range': [0, 200],
'template': 'ID-LOA-XXX'},
'Section 2': {'location': '\\Section 2',
'nomenclature': False,
'range': [201, 800],
'template': 'ID-EPI-XXX'},
'Subsection 1': {'location': '\\Section 2\\Subsection 1',
'nomenclature': False,
'range': [0, 400],
'template': 'ID-EPI-S1-XXX'},
'Subsection 2': {'location': '\\Section 2\\Subsection 2',
'nomenclature': False,
'range': [0, 400],
'template': 'ID-EPI-S2-XXX'},
'Subsubsection 1': {'location': '\\Section 2\\Subsection 1\\Subsubsection 1',
'nomenclature': False,
'range': [0, 400],
'template': 'ID-EPI-S12-XXX'}}发布于 2014-07-14 08:55:27
这适用于您的情况:
output = {}
for key, value in source.iteritems():
item = {}
for nested_key, nested_value in value.iteritems():
if type(nested_value) == type({}):
nested_item = {}
for nested_key_2, nested_value_2 in nested_value.iteritems():
nested_item[nested_key_2] = nested_value_2
output[nested_key] = nested_item
else:
item[nested_key] = nested_value
output[key] = item发布于 2014-07-14 10:26:59
我最终得到了这样的解决方案:
import os
key_requirements = {
"nomenclature": lambda x : isinstance(x, bool),
"template" : lambda x : isinstance(x, str) and "X" in x,
"range" : lambda x : isinstance(x, list) and len(x)==2 and all([isinstance(y,int) for y in x]) and x[1] > x[0]
}
def checkAndFlattenData(data):
def merge_dicts(dict1,dict2):
return dict(list(dict1.items()) + list(dict2.items()))
def check_section(section, section_content):
section_out = {
'range' : section_content['range'],
'template': section_content['template'],
'location': section
}
nested_section_out = {}
for key,value in section_content.iteritems():
if key not in key_requirements:
if not isinstance(value,dict):
# error: invalid key
pass
else:
nested_section_out[key], recurse_out = check_section(key,value)
nested_section_out = merge_dicts(nested_section_out, recurse_out)
elif not key_requirements[key](value):
print "ASSERTION FAILED!"# error: field assertion failed
pass
for key in nested_section_out:
nested_section_out[key]['location'] = os.path.join(section, nested_section_out[key]['location'])
return section_out, nested_section_out
new_data = {}
for key,value in data.iteritems():
new_data[key], nested_data = check_section(key, value)
new_data = merge_dicts(new_data, nested_data)
for key,value in new_data.iteritems():
new_data[key]['location'] = os.path.join('.', new_data[key]['location'])
return new_data
target = checkAndFlattenData(source)但我不禁觉得,这一切都可以做得更多(和/或更有效)...If任何人都有任何建议,不要犹豫复制粘贴这个,并作出改进,在一个独立的答案,所以我可以接受。
https://stackoverflow.com/questions/24732554
复制相似问题