我有一个python Dataframe,它由2000行组成(每行代表一天)。然后,我尝试使用for循环遍历Dataframe0:252并计算一些信号并保存到列表中,然后我需要在Dataframe1:253和Dataframe2:254上执行相同的操作,直到Dataframe1746:2000。
这花了我大约15分钟在一个由2000行组成的文件上,我有1000份副本。有没有人给我一些建议,告诉我怎样才能做得更好?
谢谢你。
部分代码示例如下:
data = pd.read_excel("17只港股在不同的日期“,sheet_name=Sheet_Name,engine=‘sheets.xlsx’,parse_dates=True,index_col='Date')
# Start to divide the data into each year and loop over
for adjusting_factor in range(len(data)):
data = pd.read_excel("17 HK Stocks in different sheets.xlsx",sheet_name=Sheet_Name,engine='openpyxl',parse_dates=True,index_col='Date')
data['Date'] = pd.to_datetime(data.index)
data['Date'] = data['Date'].apply(mpl_dates.date2num)
data = data.loc[:,['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
data['Indication_Close_Rule'] = 0
data['Resistance_Validation'] = 0
data['Higher_Contracting_Range'] = 0
#data['Volume_Rolling_50'] = data['Volume'].rolling(50).mean()
#data['Volume_Validation'] = 0
data['VCP_Signal'] = 0
data['VCP_Signal_Change'] = 0
total_numbers_signal = 0
# This is the rolling of the dataframe in a range of 252 days
data = data[(0 + adjusting_factor):252+adjusting_factor]
if len(data) < 252:
break
for j in range(4,data.shape[0]):
if data['High'][j] < data['High'][j-1] and data['High'][j-1] < data['High'][j-2] and data['High'][j-2] > data['High'][j-3] and data['High'][j-3] > data['High'][j-4] :
if len(zigzag_list) == 0:
date_carrier_high = str(data.index.values[j-2])
zigzag_list.append((data['High'][j-2],j-2,'H',date_carrier_high[0:10]))
Current_Three_Points_For_Top.append((data['High'][j-2],j-2+adjusting_factor,'H',date_carrier_high[0:10]))
elif len(zigzag_list) != 0:
if zigzag_list[-1][2] == 'L' and zigzag_list[-1][0] < data['High'][j-2]:
date_carrier_high = str(data.index.values[j-2])
zigzag_list.append((data['High'][j-2],j-2,'H',date_carrier_high[0:10]))
Current_Three_Points_For_Top.append((data['High'][j-2],j-2+adjusting_factor,'H',date_carrier_high[0:10]))
elif zigzag_list[-1][2] == 'L' and zigzag_list[-1][0] > data['High'][j-2]:
pass
elif zigzag_list[-1][2] == 'H' and zigzag_list[-1][0] > data['High'][j-2]:
pass
elif zigzag_list[-1][2] == 'H' and zigzag_list[-1][0] < data['High'][j-2]:
zigzag_list = zigzag_list[:-1]
date_carrier_high = str(data.index.values[j-2])
zigzag_list.append((data['High'][j-2],j-2,'H',date_carrier_high[0:10]))
Current_Three_Points_For_Top = Current_Three_Points_For_Top[:-1]
Current_Three_Points_For_Top.append((data['High'][j-2],j-2+adjusting_factor,'H',date_carrier_high[0:10]))
elif data['High'][j] > data['High'][j-1] and data['High'][j-1] < data['High'][j-2] and data['High'][j-2] > data['High'][j-3] and data['High'][j-3] > data['High'][j-4] :
if len(zigzag_list) == 0:
date_carrier_high = str(data.index.values[j-2])
zigzag_list.append((data['High'][j-2],j-2,'H',date_carrier_high[0:10]))
Current_Three_Points_For_Top.append((data['High'][j-2],j-2+adjusting_factor,'H',date_carrier_high[0:10]))
elif len(zigzag_list) != 0:
if zigzag_list[-1][2] == 'L' and zigzag_list[-1][0] < data['High'][j-2]:
date_carrier_high = str(data.index.values[j-2])
zigzag_list.append((data['High'][j-2],j-2,'H',date_carrier_high[0:10]))
Current_Three_Points_For_Top.append((data['High'][j-2],j-2+adjusting_factor,'H',date_carrier_high[0:10]))
elif zigzag_list[-1][2] == 'L' and zigzag_list[-1][0] > data['High'][j-2]:
pass
elif zigzag_list[-1][2] == 'H' and zigzag_list[-1][0] > data['High'][j-2]:
pass
elif zigzag_list[-1][2] == 'H' and zigzag_list[-1][0] < data['High'][j-2]:
zigzag_list = zigzag_list[:-1]
date_carrier_high = str(data.index.values[j-2])
zigzag_list.append((data['High'][j-2],j-2,'H',date_carrier_high[0:10]))
Current_Three_Points_For_Top = Current_Three_Points_For_Top[:-1]
Current_Three_Points_For_Top.append((data['High'][j-2],j-2+adjusting_factor,'H',date_carrier_high[0:10]))发布于 2020-12-02 17:39:13
正如有人在评论中建议的那样,在dataframe上使用rolling()方法。
示例:
import io
import pandas as pd
import numpy as np
# Create dataframe
s = """
A B C
1 5 7
5 9 2
3 8 7
2 8 3
2 6 4
"""
df = pd.read_csv(io.StringIO(s), delimiter=' ')
rolled = df.rolling(window=2)
# your custom function to apply to the window
def f(arr):
# change this
return np.sum(arr)
print(df)
print(rolled.apply(f).dropna())输出:
A B C
0 1 5 7
1 5 9 2
2 3 8 7
3 2 8 3
4 2 6 4
A B C
1 6.0 14.0 9.0
2 8.0 17.0 9.0
3 5.0 16.0 10.0
4 4.0 14.0 7.0https://stackoverflow.com/questions/65105131
复制相似问题