这可能是一个非常简单的问题,因为我仍然在探索Python。在这个问题上,我用的是numpy。更新日期为09/30/21:采用并修改了以下代码,以供将来参考。我还在循环中为数量少于所需大小的类添加了一个elif。有些代码可能是不必要的。
new_array = test_array.copy()
uniques, counts = np.unique(new_array, return_counts=True)
print("classes:", uniques, "counts:", counts)
for unique, count in zip(uniques, counts):
#print (unique, count)
if unique != 0 and count > 3:
ids = np.random.choice(count, count-3, replace=False)
new_array[tuple(i[ids] for i in np.where(new_array == unique))] = 0
elif unique != 0 and count <= 3:
ids = np.random.choice(count, count, replace=False)
new_array[tuple(i[ids] for i in np.where(new_array == unique))] = unique下面是的原始问题.
假设我有这样一个2D数组:
test_array = np.array([[0,0,0,0,0],
[1,1,1,1,1],
[0,0,0,0,0],
[2,2,2,4,4],
[4,4,4,2,2],
[0,0,0,0,0]])
print("existing classes:", np.unique(test_array))
# "existing classes: [0 1 2 4]"现在,我希望在每个类中保持一个固定大小的(例如,2个值),即!= 0 (在本例中为两个1s、两个2s和两个4s),并将其余的替换为0。其中被替换的值是随机的,每次运行(或从一个种子)。
例如,使用run 1,我将拥有
([[0,0,0,0,0],
[1,0,0,1,0],
[0,0,0,0,0],
[2,0,0,0,4],
[4,0,0,2,0],
[0,0,0,0,0]])如果再运行一次,可能是
([[0,0,0,0,0],
[1,1,0,0,0],
[0,0,0,0,0],
[2,0,2,0,4],
[4,0,0,0,0],
[0,0,0,0,0]])等等,有人能帮我吗?
发布于 2021-09-30 05:41:07
我的策略是
创建一个初始化为所有零的新数组( value
诀窍是保持索引的适当形状,以便保留原始数组的形状。
import numpy as np
test_array = np.array([[0,0,0,0,0],
[1,1,1,1,1],
[0,0,0,0,0],
[2,2,2,4,4],
[4,4,4,2,2],
[0,0,0,0,0]])
def sample_classes(arr, n_keep=2, random_state=42):
classes, counts = np.unique(test_array, return_counts=True)
rng = np.random.default_rng(random_state)
out = np.zeros_like(arr)
for klass, count in zip(classes, counts):
# Find locations of the class elements
indexes = np.nonzero(arr == klass)
# Sample up to n_keep elements of the class
keep_idx = rng.choice(count, n_keep, replace=False)
# Select the kept elements and reformat for indexing the output array and retaining its shape
keep_idx_reshape = tuple(ind[keep_idx] for ind in indexes)
out[keep_idx_reshape] = klass
return out你可以把它当作
In [3]: sample_classes(test_array) [3/1174]
Out[3]:
array([[0, 0, 0, 0, 0],
[0, 1, 1, 0, 0],
[0, 0, 0, 0, 0],
[2, 0, 0, 4, 0],
[4, 0, 0, 2, 0],
[0, 0, 0, 0, 0]])
In [4]: sample_classes(test_array, n_keep=3)
Out[4]:
array([[0, 0, 0, 0, 0],
[1, 0, 1, 1, 0],
[0, 0, 0, 0, 0],
[0, 2, 0, 4, 0],
[4, 4, 0, 2, 2],
[0, 0, 0, 0, 0]])
In [5]: sample_classes(test_array, random_state=88)
Out[5]:
array([[0, 0, 0, 0, 0],
[0, 0, 1, 1, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[4, 0, 4, 2, 2],
[0, 0, 0, 0, 0]])
In [6]: sample_classes(test_array, random_state=88, n_keep=4)
Out[6]:
array([[0, 0, 0, 0, 0],
[0, 1, 1, 1, 1],
[0, 0, 0, 0, 0],
[2, 2, 0, 4, 4],
[4, 4, 0, 2, 2],
[0, 0, 0, 0, 0]])发布于 2021-09-30 04:38:09
这里是我不那么优雅的解决方案:
def unique(arr, num=2, seed=None):
np.random.seed(seed)
vals = {}
for i, row in enumerate(arr):
for j, val in enumerate(row):
if val in vals and val != 0:
vals[val].append((i, j))
elif val != 0:
vals[val] = [(i, j)]
new = np.zeros_like(arr)
for val in vals:
np.random.shuffle(vals[val])
while len(vals[val]) > num:
vals[val].pop()
for row, col in vals[val]:
new[row,col] = val
return newhttps://stackoverflow.com/questions/69386041
复制相似问题