我有一个导入到脚本中的data (小示例)。
LastName StartTime EndTime Duration Period TeamAbbrev Position
Bouwmeester 0:00 0:37 0:37 1 STL D
Schwartz 0:00 0:40 0:40 1 STL W
Foligno 0:00 0:40 0:40 1 MIN W
Pietrangelo 0:00 0:48 0:48 1 STL D
Suter 0:00 0:40 0:40 1 MIN D
Staal 0:00 0:40 0:40 1 MIN C
Niederreiter 0:00 0:40 0:40 1 MIN W
Allen 0:00 20:00 20:00 1 STL G
Steen 0:00 0:30 0:30 1 STL W
Tarasenko 0:30 1:27 0:57 1 STL W
Parayko 0:37 1:43 1:06 1 STL D这就是脚本
import csv
from itertools import combinations, product
#Header = LastName StartTime EndTime Duration Period TeamAbbrev Position
#Import Game
with open('2017020397.csv', newline='') as f:
next(f)
skaters = '\n'.join(' '.join(row) for row in csv.reader(f))
data = skaters.splitlines()
def to_secs(ms):
''' Convert a mm:ss string to seconds '''
m, s = map(int, ms.split(':'))
return 60 * m + s
# Store a list of (start, end) times for each player
players = {}
for row in data:
name, start, end = row.split(None, 3)[:3]
times = to_secs(start), to_secs(end)
players.setdefault(name, []).append(times)
for t in players.items():
print(t)
print()
# Determine the amount of overlapping time for each combination of players
for p1, p2, p3 in combinations(sorted(players), 3):
total = 0
# Check each pair of times for this pair of players
for t1, t2, t3 in product(players[p1], players[p2], players[p3]):
# Compute the overlap in this pair of times and
# add it to the total for this pair of players
start, end = zip(t1, t2, t3)
total += max(0, min(end) - max(start))
if total:
print(p1, p2, p3, total)输出:
Allen Niederreiter Pietrangelo 5481
Allen Niederreiter Prosser 2088
Allen Niederreiter Reilly 1464这样做的目的是看看哪个队友在一起玩。从Output中,我们可以看到来自STL的Allen与来自MIN的Niederreiter配对。我只想让像combinations这样的团队合并。TeamAbbrev是如何识别的。另一项规定是,TeamAbbrev将根据当晚的比赛情况逐场改变比赛。欢迎任何建议,谢谢!
编辑:如果int()比TeamAbbrev的str()更容易,我可以抓取teamId,它是一个数字。
发布于 2017-12-09 12:32:25
在dataʼ中行的ʼ后添加...
teams = row.split()[4]
# if the number of occurrences of the first item (which is a team)... is equal to the length of the list of teams, then, all the players are from the same team.
if teams.count(teams[0]) == len(teams):
#same lines, but one indentation block due to the `if` condition. 发布于 2017-12-18 10:28:39
你的问题不容易回答,但我会试一试。我做了一些假设:
<
现在有两个文件:
2017020397.csv
LastName,StartTime,EndTime,Duration,Period,TeamAbbrev,Position
Bouwmeester,0:00,0:37,0:37,1,STL,D
Schwartz,0:00,0:40,0:40,1,STL,W
Foligno,0:00,0:40,0:40,1,MIN,W
Pietrangelo,0:00,0:48,0:48,1,STL,D
Suter,0:00,0:40,0:40,1,MIN,D
Staal,0:00,0:40,0:40,1,MIN,C
Niederreiter,0:00,0:40,0:40,1,MIN,W
Allen,0:00,20:00,20:00,1,STL,G
Steen,0:00,0:30,0:30,1,STL,W
Tarasenko,0:30,1:27,0:57,1,STL,W
Parayko,0:37,1:43,1:06,1,STL,Dsolution.py
import csv
import re
import itertools
pattern_time = r"(\d{1,2}):(\d{1,2})"
time_tester = re.compile(pattern_time)
def convert_to_seconds(time_string):
''' Convert a mm:ss string to seconds '''
pattern_found = time_tester.match(time_string)
if pattern_found:
time_string_separated = pattern_found.group(1, 2)
minutes, seconds = map(int, time_string_separated)
return 60 * minutes + seconds
else:
# We have a problem
return 0
file_name = '2017020397.csv'
teams = {}
number_of_players_to_compare = 3
with open(file_name, newline='') as source_file:
csv_file = csv.DictReader(source_file)
for row in csv_file:
if row['TeamAbbrev'] not in teams:
teams[row['TeamAbbrev']] = {}
current_team = teams[row['TeamAbbrev']]
if row['Period'] not in current_team:
current_team[row['Period']] = {}
current_team_period = current_team[row['Period']]
if row['LastName'] not in current_team_period:
current_team_period[row['LastName']] = []
current_skater = current_team_period[row['LastName']]
times_recorded = {'StartTime': convert_to_seconds(row['StartTime']),
'EndTime': convert_to_seconds(row['EndTime'])}
current_skater.append(times_recorded)
for (current_team_to_show, current_periods) in teams.items():
current_periods_sorted = sorted(current_periods)
for current_period_name in current_periods_sorted:
print("\nFor team", current_team_to_show, "in period", current_period_name, ":")
current_period = current_periods[current_period_name]
current_players = sorted(current_period)
for current_player_combination in itertools.combinations(current_players, number_of_players_to_compare):
total = 0
for times_this_combination in itertools.product(*(current_period[x] for x in current_player_combination)):
start_times = (x['StartTime'] for x in times_this_combination)
end_times = (x['EndTime'] for x in times_this_combination)
total += max(0, min(end_times) - max(start_times))
print(" ".join(current_player_combination), total)下面是关于我如何做到这一点的一些评论:
DictReader,这样我就不必跳过第一行,并且能够通过它的列名获取行的每一部分。如果你有问题,请不要犹豫,尽管问。
https://stackoverflow.com/questions/47725016
复制相似问题