Business problem:我有一个代表给定学生学术历史…的词典列表他们参加的课程,当他们参加的时候,他们的成绩是什么(空白表示这个班在进行中)等等。我需要找到任何重复的尝试在一个特定的班级,并且只保持最高的等级。
我到目前为止尝试过的
acad_hist = [{‘crse_id’: u'GRG 302P0', ‘grade’: u’’}, {‘crse_id’: u’URB 3010', ‘grade’: u’B+‘},
{‘crse_id’: u'GRG 302P0', ‘grade’: u’D‘}]
grade_list = ['CR', 'D-', 'D', 'D+', 'C-', 'C', 'C+', 'B-', 'B', 'B+', 'A-', 'A', 'A+']有人能给我提供缺失的部分,甚至更好的方法来做到这一点吗?
更新--我最终得到的解决方案(我从接受的答案中得到的想法的改编)
def scrub_for_duplicate_courses(acad_hist_condensed, acad_hist_list):
"""
Looks for duplicate courses that may have been taken, and if any are found, will look for the one with the highest
grade and keep that one, deleting the other course from the lists before returning them.
"""
# -------------------------------------------
# set logging params
# -------------------------------------------
logger = logging.getLogger(__name__)
# -----------------------------------------------------------------------------------------------------
# the grade_list is in order of ascending priority/value...a blank grade indicates "in-progress", and
# will therefore replace any class instance that has a grade.
# -----------------------------------------------------------------------------------------------------
grade_list = ['CR', 'D-', 'D', 'D+', 'C-', 'C', 'C+', 'B-', 'B', 'B+', 'A-', 'A', 'A+', '']
# converting the grade_list in to a more efficient, weighted dict
grade_list = dict(zip(grade_list, range(len(grade_list))))
seen_courses = {}
for course in acad_hist_condensed[:]:
# -----------------------------------------------------------------------------------------------------
# one of the two keys checked for below should exist in the list, but not both
# -----------------------------------------------------------------------------------------------------
key = ''
if 'compressed_hist_crse_id' in course:
key = 'compressed_hist_crse_id'
elif 'compressed_ovrd_crse_id' in course:
key = 'compressed_ovrd_crse_id'
cid = course[key]
grade = course['grade']
if cid not in seen_courses:
seen_courses[cid] = grade
else:
# ---------------------------------------------------------------------------------------------------------
# if we get here, a duplicate course_id has been found in the acad_hist_condensed list, so now we'll want
# to determine which one has the lowest grade, and remove that course instance from both lists.
# ---------------------------------------------------------------------------------------------------------
if grade_list.get(seen_courses[cid], 0) < grade_list.get(grade, 0):
seen_courses[cid] = grade # this will overlay the grade for the record already in seen_courses
grade_for_rec_to_remove = seen_courses[cid]
crse_id_for_rec_to_remove = cid
else:
grade_for_rec_to_remove = grade
crse_id_for_rec_to_remove = cid
# -----------------------------------------------------------------------------------------------------
# find the rec in acad_hist_condensed that needs removal
# -----------------------------------------------------------------------------------------------------
for rec in acad_hist_condensed:
if rec[key] == crse_id_for_rec_to_remove and rec['grade'] == grade_for_rec_to_remove:
acad_hist_condensed.remove(rec)
for rec in acad_hist_list:
if rec == crse_id_for_rec_to_remove:
acad_hist_list.remove(rec)
break # just want to remove one occurrence
return acad_hist_condensed, acad_hist_list发布于 2014-07-22 13:45:51
一个简单的解决方案是迭代每个学生的课程历史,并计算每门课程…中的最高分数
acad_hist = [{'crse_id': u'GRG 302P0', 'grade': u''}, {'crse_id': u'URB 3010', 'grade': u'B+'}, {'crse_id': u'GRG 302P0', 'grade': u'D'}]
grade_list = ['CR', 'D-', 'D', 'D+', 'C-', 'C', 'C+', 'B-', 'B', 'B+', 'A-', 'A', 'A+']
#let's turn grade_list into something more efficient:
grade_list = dict(zip(grade_list, range(len(grade_list)))) # 'CR' == 0, 'D-' == 1
courses = {} # keys will be crse_id, values will be grade.
for course in acad_hist:
cid = course['crse_id']
g = course['grade']
if cid not in courses:
courses[cid] = g
else:
if grade_list.get(courses[cid], 0) < grade_list.get(g,0):
courses[cid] = g 产出如下:
{u'GRG 302P0': u'D', u'URB 3010': u'B+'}如果需要的话,可以将其重写回原来的格式
发布于 2014-07-22 15:22:28
这可以使用迭代器Lego (即ifilter、sorted、groupby和max)来完成。
def find_best_grades(history):
def course(course_grade):
return course_grade['crse_id']
def grade(course_grade):
return GRADES[course_grade['grade']]
def has_grade(course_grade):
return bool(course_grade['grade'])
# 1) Remove course grades without grades.
# 2) Sort the history so that grades for the same course are
# consecutive (this allows groupby to work).
# 3) Group grades for the same course together.
# 4) Use max to select the high grade obtains for a course.
return [max(course_grades, key=grade)
for _, course_grades in
groupby(sorted(ifilter(has_grade, history), key=course),
key=course)]单调完整代码
from itertools import groupby, ifilter
COURSE_ID = 'crse_id'
GRADE = 'grade'
ACADEMIC_HISTORY = [
{
COURSE_ID: 'GRG 302P0',
GRADE : 'B',
},
{
COURSE_ID: 'GRG 302P0',
GRADE : '',
},
{
COURSE_ID: 'URB 3010',
GRADE : 'B+',
},
{
COURSE_ID: 'GRG 302P0',
GRADE : 'D',
},
]
GRADES = [
'CR',
'D-',
'D' ,
'D+',
'C-',
'C' ,
'C+',
'B-',
'B' ,
'B+',
'A-',
'A' ,
'A+',
]
GRADES = dict(zip(GRADES, range(len(GRADES))))
def find_best_grades(history):
def course(course_grade):
return course_grade['crse_id']
def grade(course_grade):
return GRADES[course_grade['grade']]
def has_grade(course_grade):
return bool(course_grade['grade'])
# 1) Remove course grades without grades.
# 2) Sort the history so that grades for the same course are
# consecutive (this allows groupby to work).
# 3) Group grades for the same course together.
# 4) Use max to select the high grade obtains for a course.
return [max(course_grades, key=grade)
for _, course_grades in
groupby(sorted(ifilter(has_grade, history), key=course),
key=course)]
best_grades = find_best_grades(ACADEMIC_HISTORY)
print best_gradeshttps://stackoverflow.com/questions/24888770
复制相似问题