from bs4 import BeautifulSoup
with open('./Desktop/Payscale.html') as web:
soup = BeautifulSoup(web, 'html.parser')
data = soup.find_all('td', 'data-table__cell')
print(data)
title_list = []
value_list = []
for each in data:
title = each.find('span', 'data-table__title').get_text()
value = each.find('span', 'data-table__value').get_text()
title_list.append(title)
value_list.append(value)
print(title_list)
print(value_list)#产出:
['Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:', 'Rank:', 'Major:', 'Degree Type:', 'Early Career Pay:', 'Mid-Career Pay:', '% High Meaning:']
['1', 'Petroleum Engineering', 'Bachelors', '$93,200', '$187,300', '67%', '2', 'Operations Research & Industrial Engineering', 'Bachelors', '$84,800', '$170,400', '28%', '3', 'Electrical Engineering & Computer Science (EECS)', 'Bachelors', '$108,500', '$159,300', '46%', '4', 'Interaction Design', 'Bachelors', '$68,300', '$155,800', '55%', '5', 'Public Accounting', 'Bachelors', '$59,800', '$147,700', '47%', '6', 'Operations Research', 'Bachelors', '$83,500', '$147,400', '54%', '7', 'Applied Economics and Management', 'Bachelors', '$66,100', '$146,400', '67%', '8', 'Business Computing (BC)', 'Bachelors', '$73,000', '$143,600', '-', '9', 'Actuarial Mathematics', 'Bachelors', '$64,300', '$143,400', '51%', '10', 'Electrical Power Engineering', 'Bachelors', '$76,100', '$142,600', '68%', '11', 'Information & Computer Science', 'Bachelors', '$58,600', '$140,900', '62%', '12', 'Aeronautics & Astronautics', 'Bachelors', '$77,600', '$139,600', '56%', '13', 'Systems Engineering', 'Bachelors', '$77,700', '$139,200', '53%', '14', 'Econometrics', 'Bachelors', '$64,200', '$139,000', '35%', '15', 'Pharmacy', 'Bachelors', '$68,600', '$138,700', '78%', '16', 'Aerospace Studies', 'Bachelors', '$55,800', '$136,600', '-', '17', 'Building Science', 'Bachelors', '$53,800', '$135,900', '53%', '17', 'Chemical Engineering', 'Bachelors', '$76,900', '$135,900', '55%', '19', 'Cognitive Science', 'Bachelors', '$68,700', '$135,200', '42%', '20', 'Actuarial Science', 'Bachelors', '$67,700', '$134,400', '43%', '21', 'Political Economy', 'Bachelors', '$65,100', '$133,500', '29%', '22', 'Computer Systems Engineering', 'Bachelors', '$79,000', '$133,200', '51%', '23', 'Electrical & Computer Engineering (ECE)', 'Bachelors', '$78,100', '$131,600', '49%', '24', 'Computer Science (CS) & Engineering', 'Bachelors', '$79,400', '$131,300', '44%', '24', 'Marine Engineering', 'Bachelors', '$79,900', '$131,300', '60%']#这里我有两个列表,一个有键,另一个有值,按索引排列得很好
#选项A(使用zip将完整的列表组合成dict)-
raw_dict = dict(zip(title_list, value_list))
print(raw_dict)#仅以字典的形式返回最后一次迭代
#选项B(使用列表创建字典)-
dict2 = {title_list[i]: value_list[i] for i in range(len(title_list))}
print(dict2)#再次返回最后一次迭代
#选项C(跳过清单,直接理解)-
for i in range(len(title_list)):
data_dict = {each.find('span', 'data-table__title').get_text(): each.find('span', 'data-table__value').get_text() for each in data}
print(data_dict)#最后,我再次只剩下最后一次输出迭代
#输出(所有三个选项):
{'Rank:': '24', 'Major:': 'Marine Engineering', 'Degree Type:': 'Bachelors', 'Early Career Pay:': '$79,900', 'Mid-Career Pay:': '$131,300', '% High Meaning:': '60%'}发布于 2022-08-31 13:08:18
问题是,在title_list中,您有重复的元素,当您使用它们作为键时,您每次都会重写它们。
如果你只想做一件事,你可以这样做
raw_dict = {}
for i, title in enumerate(title_list):
if title in raw_dict:
raw_dict[title].append(value_list[i])
else:
raw_dict[title] = [value_list[i]]这样你就能得到
{
'Rank:': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '17', '19', '20', '21', '22', '23', '24', '24'],
'Major:': ['Petroleum Engineering', 'Operations Research & Industrial Engineering', 'Electrical Engineering & Computer Science (EECS)', 'Interaction Design', 'Public Accounting', 'Operations Research', 'Applied Economics and Management', 'Business Computing (BC)', 'Actuarial Mathematics', 'Electrical Power Engineering', 'Information & Computer Science', 'Aeronautics & Astronautics', 'Systems Engineering', 'Econometrics', 'Pharmacy', 'Aerospace Studies', 'Building Science', 'Chemical Engineering', 'Cognitive Science', 'Actuarial Science', 'Political Economy', 'Computer Systems Engineering', 'Electrical & Computer Engineering (ECE)', 'Computer Science (CS) & Engineering', 'Marine Engineering'],
'Degree Type:': ['Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors', 'Bachelors'],
'Early Career Pay:': ['$93,200', '$84,800', '$108,500', '$68,300', '$59,800', '$83,500', '$66,100', '$73,000', '$64,300', '$76,100', '$58,600', '$77,600', '$77,700', '$64,200', '$68,600', '$55,800', '$53,800', '$76,900', '$68,700', '$67,700', '$65,100', '$79,000', '$78,100', '$79,400', '$79,900'],
'Mid-Career Pay:': ['$187,300', '$170,400', '$159,300', '$155,800', '$147,700', '$147,400', '$146,400', '$143,600', '$143,400', '$142,600', '$140,900', '$139,600', '$139,200', '$139,000', '$138,700', '$136,600', '$135,900', '$135,900', '$135,200', '$134,400', '$133,500', '$133,200', '$131,600', '$131,300', '$131,300'],
'% High Meaning:': ['67%', '28%', '46%', '55%', '47%', '54%', '67%', '-', '51%', '68%', '62%', '56%', '53%', '35%', '78%', '-', '53%', '55%', '42%', '43%', '29%', '51%', '49%', '44%', '60%']
}https://stackoverflow.com/questions/73556201
复制相似问题