fieldict(filename)读取DOT格式的文件,并返回一个字典,其中DOT CMPLID转换为整数作为键,元组作为键的相应值。元组的格式为:(制造商,日期,崩溃,城市,州)
fieldict("DOT500.txt")[416]
('DAIMLERCHRYSLER CORPORATION', datetime.date(1995, 1, 9), False, 'ARCADIA', 到目前为止,我已经试过了
from collections import defaultdict
import datetime
def fieldict(filename):
with open(filename) as f:
x=[line.split('\t')[0].strip() for line in f] #list of complaint numbers
y= line.split('\t') #list of full complaints
d={}
for j in x:
Y= True
N= False
d[j] = tuple(y[2],datetime.date(y[7]), y[6], y[12], y[13]) #dict with number of complaint as key and tuple with index as values
return d没有运气..。
'11\t958128\tDAIMLERCHRYSLER CORPORATION\tDODGE\tSHADOW\t1990\tY\t19941117\tN\t0\t0\tENGINE AND ENGINE COOLING:ENGINE\tWILMINGTON \tDE\t1B3XT44KXLN\t19950103\t19950103\t\t1\tENGINE MOTOR MOUNTS FAILED, RESULTING IN ENGINE NOISE. *AK\tEVOQ\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tV\t\r\n'不带字符标记的条目显示:
11 958128 DAIMLERCHRYSLER CORPORATION DODGE SHADOW 1990 Y 19941117 N 0 0 ENGINE AND ENGINE COOLING:ENGINE WILMINGTON DE 1B3XT44KXLN 19950103 19950103 1 ENGINE MOTOR MOUNTS FAILED, RESULTING IN ENGINE NOISE. *AK EVOQ 发布于 2012-10-31 16:56:38
看起来您想要与csv模块交朋友,因为这看起来像是标签格式的csv文本。csv.reader()有一个.next()方法,当您在for循环中抛出它时将调用该方法,因此您可以逐行遍历该文件。
一般情况下,请阅读PEP8,并使用易于理解的变量名。对于python,如果它开始感到困难,这是一个好兆头,通常会有更好的方法。
import csv
import datetime
def _build_datetime(line)
year_idx = x
month_idx = y
day_idx = z
indexes = (year_idx, month_idx, day_idx)
result_datetime = None
if all(line[idx] for idx in indexes): # check that expected values are populated
int_values = [int(line[idx]) for idx in indexes]
result_datetime = datetime.date(*int_values)
return result_datetime
def format2dict(filename):
complaints = {}
with open(filename, "rb") as in_f:
reader = csv.reader(in_f, delimiter='\t')
complaint_id_idx = 0
manufacturer_idx = 2
crash_idx = x
city_idx = 12
state_idx = 13
for line in reader:
complaint_id = int(line[complaint_id_idx])
data= (
line[manufacturer_idx],
_build_datetime(line),
line[crash_idx],
line[city_idx],
line[state_idx],
)
complaints[complaint_id] = data
return complaints
if __name__ == "__main__":
formatted_data = format2dict("DOT500.txt")发布于 2012-10-31 13:14:33
注意:换行符的修剪由读者决定。
完成此任务的一种简单方法是使用dict(zip(headers,data_list))
假设您的样本数据如下所示
joe\tSan Francisco\tapple
frank\tNew York City\torange
tim\tHawaii\tpineapple你可以这样做:
results = []
headers = ['person','place','fruit']
for line in open('datafile.txt').readlines():
record = line.split('\t')
results.append(dict(zip(headers,record)))它将为每一行创建一个字典,并将其附加到“results”的末尾。
看起来像这样:
[{'fruit': 'apple\n', 'person': 'joe', 'place': 'San Francisco'},
{'fruit': 'orange\n', 'person': 'frank', 'place': 'New York City'},
{'fruit': 'pineapple\n', 'person': 'tim', 'place': 'Hawaii'}]发布于 2012-10-31 14:37:10
使用line.split('\t')将文本拆分成多个片段是正确的。
import datetime
a = '11\t958128\tDAIMLERCHRYSLER CORPORATION\tDODGE\tSHADOW\t1990\tY\t19941117\tN\t0\t0\tENGINE AND ENGINE COOLING:ENGINE\tWILMINGTON \tDE\t1B3XT44KXLN\t19950103\t19950103\t\t1\tENGINE MOTOR MOUNTS FAILED, RESULTING IN ENGINE NOISE. *AK\tEVOQ\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tV\t'
fields = a.split('\t')
recordNum = fields[0]
mfr = fields[2]
recDate = datetime.date(int(fields[5]),1,2)
make = fields[4]
DOTrecord = recordNum,mfr, recDate,make
print DOTrecordhttps://stackoverflow.com/questions/13151005
复制相似问题