我编写了Python代码,它使用了多个if条件和一个for循环。守则的主要目的是在一定条件下产生一个交通灯系统。
Red = -1
Yellow = 0
Green = 1它需要4个月(m0、m1、m2、m3)和dataframe作为输入,在每一行中运行条件,然后返回-1、0或1。

代码将月份1与月份0,月2与月1,月3与月2进行比较。
供投入:
if month+1 < Month for any value, then red else green.例如,如果2020年7月的收入低于2020年6月,则输入为红色,否则为绿色。在三个比较的基础上,计算了计算结果。结果可能是1-,0或1。
我编写的代码运行良好,但没有以任何方式进行优化。有更好的方法吗?
这将是一个O(n)操作,但至少应该有一种用Python简明地编写它的方法。或者,如果代码也可以在操作上得到改进。
def getTrafficLightData(df, dimension, m1, m2, m3, m4):
'''
Inputs -
Dataframe
dimension = on which we want to calculate traffic light system
m1, m2, m3, m4 - Could be any for months but we have taken consecutive months for Traffic Light System.
Example Call - getTrafficLightData(report6_TLS_data, "Revenue_","2020-6","2020-7","2020-8","2020-9")
'''
TFS_df = pd.DataFrame(columns=[dimension + "_TLS"])
if dimension == "Overstrike_":
suffix = "%"
for i in range(len(df)):
if (
(
df[dimension + m1 + suffix].iloc[i]
> df[dimension + m2 + suffix].iloc[i]
)
and (
df[dimension + m2 + suffix].iloc[i]
> df[dimension + m3 + suffix].iloc[i]
)
and (
df[dimension + m3 + suffix].iloc[i]
> df[dimension + m4 + suffix].iloc[i]
)
):
TFS_df.loc[i] = [1]
elif (
(
df[dimension + m1 + suffix].iloc[i]
< df[dimension + m2 + suffix].iloc[i]
)
and (
df[dimension + m2 + suffix].iloc[i]
< df[dimension + m3 + suffix].iloc[i]
)
and (
df[dimension + m3 + suffix].iloc[i]
< df[dimension + m4 + suffix].iloc[i]
)
):
TFS_df.loc[i] = [-1]
elif (
(
df[dimension + m1 + suffix].iloc[i]
< df[dimension + m2 + suffix].iloc[i]
)
and (
df[dimension + m2 + suffix].iloc[i]
> df[dimension + m3 + suffix].iloc[i]
)
and (
df[dimension + m3 + suffix].iloc[i]
< df[dimension + m4 + suffix].iloc[i]
)
):
TFS_df.loc[i] = [-1]
elif (
(
df[dimension + m2 + suffix].iloc[i]
> df[dimension + m2 + suffix].iloc[i]
)
and (
df[dimension + m2 + suffix].iloc[i]
< df[dimension + m3 + suffix].iloc[i]
)
and (
df[dimension + m3 + suffix].iloc[i]
< df[dimension + m4 + suffix].iloc[i]
)
):
TFS_df.loc[i] = [-1]
elif (
(
df[dimension + m1 + suffix].iloc[i]
> df[dimension + m2 + suffix].iloc[i]
)
and (
df[dimension + m2 + suffix].iloc[i]
> df[dimension + m3 + suffix].iloc[i]
)
and (
df[dimension + m3 + suffix].iloc[i]
< df[dimension + m4 + suffix].iloc[i]
)
):
TFS_df.loc[i] = [0]
elif (
(
df[dimension + m1 + suffix].iloc[i]
> df[dimension + m2 + suffix].iloc[i]
)
and (
df[dimension + m2 + suffix].iloc[i]
< df[dimension + m3 + suffix].iloc[i]
)
and (
df[dimension + m3 + suffix].iloc[i]
> df[dimension + m4 + suffix].iloc[i]
)
):
TFS_df.loc[i] = [1]
elif (
(
df[dimension + m1 + suffix].iloc[i]
< df[dimension + m2 + suffix].iloc[i]
)
and (
df[dimension + m2 + suffix].iloc[i]
> df[dimension + m3 + suffix].iloc[i]
)
and (
df[dimension + m3 + suffix].iloc[i]
> df[dimension + m4 + suffix].iloc[i]
)
):
TFS_df.loc[i] = [1]
elif (
(
df[dimension + m1 + suffix].iloc[i]
< df[dimension + m2 + suffix].iloc[i]
)
and ( #
df[dimension + m2 + suffix].iloc[i]
< df[dimension + m3 + suffix].iloc[i]
)
and ( #
df[dimension + m3 + suffix].iloc[i]
> df[dimension + m4 + suffix].iloc[i]
)
):
TFS_df.loc[i] = [0]
else:
TFS_df.loc[i] = [0]
return TFS_df
else:
if dimension == "Margin_":
suffix = "%"
else:
suffix = ""
for i in range(len(df)):
if (
(
df[dimension + m1 + suffix].iloc[i]
> df[dimension + m2 + suffix].iloc[i]
)
and (
df[dimension + m2 + suffix].iloc[i]
> df[dimension + m3 + suffix].iloc[i]
)
and (
df[dimension + m3 + suffix].iloc[i]
> df[dimension + m4 + suffix].iloc[i]
)
):
TFS_df.loc[i] = [-1]
elif (
(
df[dimension + m1 + suffix].iloc[i]
< df[dimension + m2 + suffix].iloc[i]
)
and (
df[dimension + m2 + suffix].iloc[i]
< df[dimension + m3 + suffix].iloc[i]
)
and (
df[dimension + m3 + suffix].iloc[i]
< df[dimension + m4 + suffix].iloc[i]
)
):
TFS_df.loc[i] = [1]
elif (
(
df[dimension + m1 + suffix].iloc[i]
< df[dimension + m2 + suffix].iloc[i]
)
and (
df[dimension + m2 + suffix].iloc[i]
> df[dimension + m3 + suffix].iloc[i]
)
and (
df[dimension + m3 + suffix].iloc[i]
< df[dimension + m4 + suffix].iloc[i]
)
):
TFS_df.loc[i] = [1]
elif (
(
df[dimension + m1 + suffix].iloc[i]
> df[dimension + m2 + suffix].iloc[i]
)
and (
df[dimension + m2 + suffix].iloc[i]
< df[dimension + m3 + suffix].iloc[i]
)
and (
df[dimension + m3 + suffix].iloc[i]
< df[dimension + m4 + suffix].iloc[i]
)
):
TFS_df.loc[i] = [1]
elif (
(
df[dimension + m1 + suffix].iloc[i]
> df[dimension + m2 + suffix].iloc[i]
)
and (
df[dimension + m2 + suffix].iloc[i]
> df[dimension + m3 + suffix].iloc[i]
)
and (
df[dimension + m3 + suffix].iloc[i]
< df[dimension + m4 + suffix].iloc[i]
)
):
TFS_df.loc[i] = [0]
elif (
(
df[dimension + m1 + suffix].iloc[i]
> df[dimension + m2 + suffix].iloc[i]
)
and (
df[dimension + m2 + suffix].iloc[i]
< df[dimension + m3 + suffix].iloc[i]
)
and (
df[dimension + m3 + suffix].iloc[i]
> df[dimension + m4 + suffix].iloc[i]
)
):
TFS_df.loc[i] = [-1]
elif (
(
df[dimension + m1 + suffix].iloc[i]
< df[dimension + m2 + suffix].iloc[i]
)
and (
df[dimension + m2 + suffix].iloc[i]
> df[dimension + m3 + suffix].iloc[i]
)
and (
df[dimension + m3 + suffix].iloc[i]
> df[dimension + m4 + suffix].iloc[i]
)
):
TFS_df.loc[i] = [-1]
elif (
(
df[dimension + m1 + suffix].iloc[i]
< df[dimension + m2 + suffix].iloc[i]
)
and (
df[dimension + m2 + suffix].iloc[i]
< df[dimension + m3 + suffix].iloc[i]
)
and (
df[dimension + m3 + suffix].iloc[i]
> df[dimension + m4 + suffix].iloc[i]
)
):
TFS_df.loc[i] = [0]
else:
TFS_df.loc[i] = [0]
return TFS_df这个函数的调用方式如下-
report6_TLS_data['Revenue_TLS']=getTrafficLightData(report6_TLS_data, "Revenue_","2020-6","2020-7","2020-8","2020-9")
report6_TLS_data["Margin_TLS"]=getTrafficLightData(report6_TLS_data, "Margin_","2020-6","2020-7","2020-8","2020-9")
report6_TLS_data["Overstrike_TLS"]=getTrafficLightData(report6_TLS_data, "Overstrike_","2020-6","2020-7","2020-8","2020-9")任何指示都会有帮助。
输入数据的形式是-
ym PART NUMBER BranchCode Revenue_2019-1 Revenue_2019-10 Revenue_2019-11 Revenue_2019-12 Revenue_2019-2 Revenue_2019-3 Revenue_2019-4 Revenue_2019-5 Revenue_2019-6 Revenue_2019-7 Revenue_2019-8 Revenue_2019-9 Revenue_2020-1 Revenue_2020-2 Revenue_2020-3 Revenue_2020-4 Revenue_2020-5 Revenue_2020-6 Revenue_2020-7 Revenue_2020-8 Revenue_2020-9 Margin_2019-1 Margin_2019-10 Margin_2019-11 Margin_2019-12 Margin_2019-2 Margin_2019-3 Margin_2019-4 Margin_2019-5 Margin_2019-6 Margin_2019-7 Margin_2019-8 Margin_2019-9 Margin_2020-1 Margin_2020-2 Margin_2020-3 Margin_2020-4 Margin_2020-5 Margin_2020-6 Margin_2020-7 Margin_2020-8 Margin_2020-9 Overstrike_2019-1 Overstrike_2019-10 Overstrike_2019-11 Overstrike_2019-12 Overstrike_2019-2 Overstrike_2019-3 Overstrike_2019-4 Overstrike_2019-5 Overstrike_2019-6 Overstrike_2019-7 Overstrike_2019-8 Overstrike_2019-9 Overstrike_2020-1 Overstrike_2020-2 Overstrike_2020-3 Overstrike_2020-4 Overstrike_2020-5 Overstrike_2020-6 Overstrike_2020-7 Overstrike_2020-8 Overstrike_2020-9 Transactions_2019-1 Transactions_2019-10 Transactions_2019-11 Transactions_2019-12 Transactions_2019-2 Transactions_2019-3 Transactions_2019-4 Transactions_2019-5 Transactions_2019-6 Transactions_2019-7 Transactions_2019-8 Transactions_2019-9 Transactions_2020-1 Transactions_2020-2 Transactions_2020-3 Transactions_2020-4 Transactions_2020-5 Transactions_2020-6 Transactions_2020-7 Transactions_2020-8 Transactions_2020-9 Margin_2019-1% Margin_2019-10% Margin_2019-11% Margin_2019-12% Margin_2019-2% Margin_2019-3% Margin_2019-4% Margin_2019-5% Margin_2019-6% Margin_2019-7% Margin_2019-8% Margin_2019-9% Margin_2020-1% Margin_2020-2% Margin_2020-3% Margin_2020-4% Margin_2020-5% Margin_2020-6% Margin_2020-7% Margin_2020-8% Margin_2020-9% Overstrike_2019-1% Overstrike_2019-10% Overstrike_2019-11% Overstrike_2019-12% Overstrike_2019-2% Overstrike_2019-3% Overstrike_2019-4% Overstrike_2019-5% Overstrike_2019-6% Overstrike_2019-7% Overstrike_2019-8% Overstrike_2019-9% Overstrike_2020-1% Overstrike_2020-2% Overstrike_2020-3% Overstrike_2020-4% Overstrike_2020-5% Overstrike_2020-6% Overstrike_2020-7% Overstrike_2020-8% Overstrike_2020-9%
0 BAGG001 BC 71.75 90.00 20.25 43.50 42.50 30.00 70.00 44.25 45.00 46.75 129.50 58.00 81.00 36.00 33.25 0.75 15.00 24.75 0.00 0.00 2.50 32.97 39.15 8.95 14.31 18.95 7.86 30.68 19.27 19.74 18.12 59.38 22.30 34.95 17.59 14.10 0.32 6.35 5.30 0.00 0.00 1.06 0.00 0.00 0.00 1.00 0.00 1.00 0.00 0.00 3.00 3.00 1.00 1.00 2.00 0.00 0.00 0.00 0.00 2.00 0.00 0.00 0.00 8 16 5 9 5 6 12 7 10 7 13 10 13 5 11 1 2 4 0 0 1 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 0.00 0.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 0.00 0.00 1.00
1 BAGG001 PK 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 25.50 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 9.90 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00发布于 2020-10-28 23:10:46
重要的是要理解Pandas是Numpy的包装器,两者都面向向量化。尽量避免循环和重复,这种方法确实可以简化。值得说的是
如果任何值都是month+1 < Month,则红色else绿色。
绝对不是发生了什么。目前,我已经提出了一个解决方案,它使用查找表处理固定的月数。你可以在必要的时候调整它。它基于您展示的表,将比较解释为字段中的位。
否则:不要在dimension字符串中使用下划线;不要在输出数据中生成双下划线;添加类型提示;并接受月份作为变量参数或元组。
import numpy as np
import pandas as pd
lookup = np.array((-1, 0, -1, 1, -1, 1, 0, 1))
to_binary = np.array((4, 2, 1)).T
def get_traffic_light_data_new(
df: pd.DataFrame,
dimension: str,
*months: str,
) -> np.array:
assert len(months) == 4
by_month = df[[f'{dimension}_{month}' for month in months]].to_numpy()
signs = by_month[:, 1:] - by_month[:, :-1] > 0
index = np.matmul(signs, to_binary)
return lookup[index]
def test():
scenarios = pd.DataFrame(
[
[3, 2, 1, 0],
[1, 2, 3, 4],
[1, 2, 1.5, 1.7],
[2, 1, 3, 4],
[3, 2, 1, 4],
[3, 2, 2.5, 2.4],
[5, 6, 1, 0],
[3, 4, 5, 4],
],
columns=(
'Revenue_2020-6',
'Revenue_2020-7',
'Revenue_2020-8',
'Revenue_2020-9',
),
)
args = (
scenarios,
'Revenue',
'2020-6',
'2020-7',
'2020-8',
'2020-9',
)
new_result = get_traffic_light_data_new(*args)
print(new_result)
if __name__ == '__main__':
test()https://codereview.stackexchange.com/questions/251266
复制相似问题