我想要删除基于时间步长条件的重复。我想删除基于'id‘和'y’列值的重复值,每500000个时间步骤(也就是说,我想删除那些具有负'y‘值的重复id’‘)。我的数据有多达1000万的时间步长值。我已经在下面的代码中尝试过了。有更好的方法吗?图像是我的df_initial_conv数据集.康夫
(df_ df_initial_conv.loc[(df_initial_conv'TIMESTEP‘_conv’‘TIMESTEP’< 500000)] df2 = df_initial_conv.loc[(df_initial_conv'TIMESTEP‘>= 500000和(df_ df_initial_conv.loc[(df_initial_conv'TIMESTEP’_conv‘’TIMESTEP‘< 1000000)] df3 =df_initial_conv.loc[(df_initial_conv’TIMESTEP‘>= 1000000和( df_initial_ conv’‘TIMESTEP’< 1500000)] df4 =df_initial_df_initial_conv.loc[(df_initial_conv'TIMESTEP‘[(df_initial_conv’‘TIMESTEP’>= 1500000)和(df_initial_conv‘’TIMESTEP‘< 2000000)] df5 = df_initial_conv.loc[(df_initial_conv'TIMESTEP’>= 2000000和(df_initial_conv‘’TIMESTEP‘< 2500000) df6 =df_initial_conv.loc[(df_initial_conv’TIMESTEP‘>= 2500000和(df_initial_conv’‘TIMESTEP’<3000 000)] df7 = df_initial_conv.loc[(df _initial_conv‘’TIMESTEP‘< 2500000)]_initial_conv‘’TIMESTEP‘df_initial_conv.loc[(df_initial_conv'TIMESTEP’_ >= 3000000和(df_initial_conv‘’TIMESTEP‘< 3500000)] df8 = >= 3500000和(df_initial_conv’‘TIMESTEP’< 4000000)] df9 = df_initial_conv.loc[(df_initial_conv'TIMESTEP‘>= 4000000和(df_initial_conv’‘TIMESTEP’<4500 000)] df10 =df_initial_conv.loc[(df_initial_conv‘’TIMESTEP‘< 4000000) ]df9=df_initial_conv.loc[(df_initial_conv’TIMESTEP‘>=4000000和(df_initial_conv’‘TIMESTEP’<4500 000)]df10=df_initial_conv.loc[(df_initial_conv‘’TIMESTEP‘<4000000)]df9=df_initial_conv.loc[(df_initial_conv’TIMESTEP‘>=4000000和(df_初始化_conv’‘TIMESTEP’<4500 000)]df10=df_initial_conv.loc[(df_初始化_conv‘’TIMESTEP‘df_initial_conv.loc[(df_initial_conv'TIMESTEP’4500000‘&(df_initial_conv’‘TIMESTEP’< 5000000)] df11 = df_initial_conv.loc[(df_initial_conv'TIMESTEP‘>= 5000000和(df_initial_conv’‘TIMESTEP’< 5500000) df12 = df_initial_conv.loc[(df_initial_conv'TIMESTEP‘>= 5500000和(df_initial_conv’‘TIMESTEP’<6000 000)] df13 =df_initial_conv.loc[(df_initial_conv‘TIMESTEP’>= 6000000) &(df_initial_ conv‘’TIMESTEP‘< 6500000) [ df14 = df_initial_conv.loc[(df_initial_conv'TIMESTEP’>= 6500000]和(df_initial_conv‘’TIMESTEP‘< 7000000) df15 = df_initial_conv.loc[(df_initial_conv'TIMESTEP’>= 7000000和(df_initial_conv‘’TIMESTEP‘< 7500000)] df16 = df_initial_conv.loc[(df_initial_conv'TIMESTEP’>= 7500000) &(df_initial_conv‘’TIMESTEP‘<8000000] df17 = df_initial_conv.loc[(df_initial_conv'TIMESTEP’>= 8000000&(df_initial_conv‘’TIMESTEP‘< 8500000)] df18 = df_initial_conv.loc[(df_initial_conv'TIMESTEP’>= 8500000和(df_initial_conv‘’TIMESTEP‘< 9000000)] df19 = df_initial_conv.loc[(df_initial_conv'TIMESTEP’>= 9000000)和(df_initial_conv‘’TIMESTEP‘< 9500000 )( df_initial_conv.loc[(df_initial_conv'TIMESTEP‘_ df20 _conv’‘TIMESTEP’<= 10000000)]
df_negatives1 = df1.query('y < 0') df_nonduplicate1 = df_negatives1.drop_duplicates(subset="id")
df_negatives2 = df2.query('y < 0') df_nonduplicate2 = df_negatives2.drop_duplicates(subset="id")
df_negatives3 = df3.query('y < 0') df_nonduplicate3 = df_negatives3.drop_duplicates(subset="id")
df_negatives4 =df4.查询(‘y< 0') df_nonduplicate4 = df_negatives4.drop_duplicates(subset="id")
df_negatives5 = df5.query('y < 0') df_nonduplicate5 = df_negatives5.drop_duplicates(subset="id")
df_negatives6 =df6.查询(‘y< 0') df_nonduplicate6 = df_negatives6.drop_duplicates(subset="id")
df_negatives7 =df7.查询(‘y< 0') df_nonduplicate7 = df_negatives7.drop_duplicates(subset="id")
df_negatives8 = df8.query('y < 0') df_nonduplicate8 = df_negatives8.drop_duplicates(subset="id")
df_negatives9 =df9查询(‘y< 0') df_nonduplicate9 = df_negatives9.drop_duplicates(subset="id")
df_negatives10 = df10.query('y < 0') df_nonduplicate10 = df_negatives10.drop_duplicates(subset="id")
df_negatives11 =df11查询(‘y< 0') df_nonduplicate11 = df_negatives11.drop_duplicates(subset="id")
df_negatives12 = df12.query('y < 0') df_nonduplicate12 = df_negatives12.drop_duplicates(subset="id")
df_negatives13 =df13.查询(‘y< 0') df_nonduplicate13 = df_negatives13.drop_duplicates(subset="id")
df_negatives14 =df14查询(‘y< 0') df_nonduplicate14 = df_negatives14.drop_duplicates(subset="id")
df_negatives15 =df15查询(‘y< 0') df_nonduplicate15 = df_negatives15.drop_duplicates(subset="id")
df_negatives16 =df16.查询(‘y< 0') df_nonduplicate16 = df_negatives16.drop_duplicates(subset="id")
df_negatives17 =id 17.查询(‘y< 0') df_nonduplicate17 = df_negatives17.drop_duplicates(subset="id")
df_negatives18 =df18.查询(‘y< 0') df_nonduplicate18 = df_negatives18.drop_duplicates(subset="id")
df_negatives19 =df19.查询(‘y< 0') df_nonduplicate19 = df_negatives19.drop_duplicates(subset="id")
df_negatives20 =id 20查询(‘y< 0') df_nonduplicate20 = df_negatives20.drop_duplicates(subset="id")
df_nonduplicat_final = pd.concat(df_nonduplicate1、df_nonduplicate2、df_nonduplicate3、df_nonduplicate4、df_nonduplicate5、df_nonduplicate6、df_nonduplicate7、df_nonduplicate8、df_nonduplicate9、df_nonduplicate10、df_nonduplicate11、df_nonduplicate12、df_nonduplicate13、df_nonduplicate14、df_nonduplicate15、df_nonduplicate13、df_nonduplicate13 19#、en20 20#、en21#)
发布于 2022-06-11 13:59:38
我们可以用groupby
cut_op = pd.cut(df_initial_conv['TIMESTEP'] , np.arange(0,1500000,50000))
out = df_initial_conv.query('y < 0').groupby(cut_op).head(1)https://stackoverflow.com/questions/72584910
复制相似问题