我研究了使用PCA和自动编码器从以下链接中使用代码进行异常检测的问题:用于异常检测和状态监测的机器学习和我尝试运行使用PCA与Mahalanobis距离的代码部分,但是,如果我运行该代码,我总是会得到异常消息,结果是出现错误'numpy.ndarray' object is not callable的协方差矩阵函数部分的问题。我试图创建新的变量,将数据转换为NumPy,但是什么也没有起作用,是什么导致了这个错误?
代码:
def cov_matrix(data, verbose=False):
# data = pd.DataFrame(data).to_numpy()
print('calculating the covaraince matrix')
covariance_matrix = np.cov(data, rowvar=False)
print('Done the covaraince matrix')
if is_pos_def(covariance_matrix):
inv_covariance_matrix = np.linalg.inv(covariance_matrix)
if is_pos_def(inv_covariance_matrix):
return covariance_matrix, inv_covariance_matrix
else:
print("Error: Inverse of Covariance Matrix is not positive definite!")
else:
print("Error: Covariance Matrix is not positive definite!")
def MahalanobisDist(inv_cov_matrix, mean_distr, data, verbose=False):
inv_covariance_matrix = inv_cov_matrix
vars_mean = mean_distr
diff = data - vars_mean
md = []
for i in range(len(diff)):
md.append(np.sqrt(diff[i].dot(inv_covariance_matrix).dot(diff[i])))
return md
def MD_detectOutliers(dist, extreme=False, verbose=False):
k = 3. if extreme else 2.
threshold = np.mean(dist) * k
outliers = []
for i in range(len(dist)):
if dist[i] >= threshold:
outliers.append(i) # index of the outlier
return np.array(outliers)
def MD_threshold(dist, extreme=False, verbose=False):
k = 3. if extreme else 2.
threshold = np.mean(dist) * k
return threshold
#### Main code:
# Inputting the training and test dataframes:
data_train = np.array(principalDf_C0.values)
data_test_C1 = np.array(principalDf_C1.values)
data_test_C2 = np.array(principalDf_C2.values)
data_test_C3 = np.array(principalDf_C4.values)
data_test_C4 = np.array(principalDf_C5.values)
print('Training Dataframe: ', data_train[:,])
print('Test1 Dataframe: ', data_test_C1)
print('Test2 Dataframe: ', data_test_C2)
print('Test3 Dataframe: ', data_test_C3)
print('Test4 Dataframe: ', data_test_C4)
data_train_df = pd.DataFrame(principalDf_C0.values)
data_test_df_C1 = pd.DataFrame(principalDf_C1.values)
data_test_df_C2 = pd.DataFrame(principalDf_C2.values)
data_test_df_C3 = pd.DataFrame(principalDf_C4.values)
data_test_df_C4 = pd.DataFrame(principalDf_C5.values)
# Calculating the covariance matrix:
cov_matrix, inv_cov_matrix = cov_matrix(data=data_train)
# Calculating the mean value for the input variables:
mean_distr = data_train_df.mean(axis=0)
# Calculating the Mahalanobis distance and threshold value to flag datapoints as an anomaly:
dist_test_C1 = MahalanobisDist(inv_cov_matrix, mean_distr, data_test_df_C1, verbose=True)
dist_test_C2 = MahalanobisDist(inv_cov_matrix, mean_distr, data_test_df_C2, verbose=True)
dist_test_C3 = MahalanobisDist(inv_cov_matrix, mean_distr, data_test_df_C3, verbose=True)
dist_test_C4 = MahalanobisDist(inv_cov_matrix, mean_distr, data_test_df_C4, verbose=True)
dist_train = MahalanobisDist(inv_cov_matrix, mean_distr, data_train_df, verbose=True)
threshold = MD_threshold(dist_train, extreme = True)
# Distribution of Threshold value for flagging an anomaly:
plt.figure()
sns.distplot(np.square(dist_train),bins = 10, kde= False)
# plt.xlim([0.0,15])
plt.show()
plt.figure()
sns.distplot(dist_train, bins = 10, kde= True, color = 'green');
# plt.xlim([0.0,5])
plt.xlabel('Mahalanobis dist')
plt.show()
anomaly_train = pd.DataFrame(index=data_train_df.index)
anomaly_train['Mob dist']= dist_train
anomaly_train['Thresh'] = threshold
# If Mob dist above threshold: Flag as anomaly
anomaly_train['Anomaly'] = anomaly_train['Mob dist'] > anomaly_train['Thresh']
anomaly_train.index = X_train_PCA.index
anomaly_C1 = pd.DataFrame(index=data_test_df_C1.index)
anomaly_C1['Mob dist']= dist_test_C1
anomaly_C1['Thresh'] = threshold
# If Mob dist above threshold: Flag as anomaly
anomaly_C1['Anomaly'] = anomaly_C1['Mob dist'] > anomaly_C1['Thresh']
anomaly_C1.index = data_test_df_C1.index
anomaly_C1.head()
anomaly_C2 = pd.DataFrame(index=data_test_df_C2.index)
anomaly_C2['Mob dist']= dist_test_C2
anomaly_C2['Thresh'] = threshold
# If Mob dist above threshold: Flag as anomaly
anomaly_C2['Anomaly'] = anomaly_C2['Mob dist'] > anomaly_C2['Thresh']
anomaly_C2.index = data_test_df_C2.index
anomaly_C2.head()
anomaly_C3 = pd.DataFrame(index=data_test_df_C3.index)
anomaly_C3['Mob dist']= dist_test_C3
anomaly_C3['Thresh'] = threshold
# If Mob dist above threshold: Flag as anomaly
anomaly_C3['Anomaly'] = anomaly_C3['Mob dist'] > anomaly_C3['Thresh']
anomaly_C3.index = data_test_df_C3.index
anomaly_C3.head()
anomaly_C4 = pd.DataFrame(index=data_test_df_C4.index)
anomaly_C4['Mob dist']= dist_test_C4
anomaly_C4['Thresh'] = threshold
# If Mob dist above threshold: Flag as anomaly
anomaly_C4['Anomaly'] = anomaly_C4['Mob dist'] > anomaly_C4['Thresh']
anomaly_C4.index = data_test_df_C4.index
anomaly_C4.head()
final_scored = pd.concat([anomaly_train, anomaly_C1, anomaly_C2, anomaly_C3, anomaly_C4])
print(final_scored)
except Exception:
print('Cannot implement Anomaly detection using Mahalanobis distance metric')
pass发布于 2021-09-27 12:35:54
根据您的注释,变量cov_matrix和函数cov_matrix()之间存在名称空间冲突。
把那条线改成。
matrix, inv_matrix = cov_matrix(data=data_train)并相应地更新代码,或重命名cov_matrix()。一个好的惯例是返回事物的函数应该以它们的名字命名,例如generate_cov_matrix()或calculate_cov_matrix().*。
(是的,正如编写的那样,代码应该运行一次,因为AFAICS之后您不会再次调用cov_matrix(),但我猜您正在使用持久解释器会话,并且在cov_matrix()被覆盖后再次对代码进行评估。)
*本公约假定函数的存在会产生副作用,并会异常地返回。当然,如果您是从功能上编写的,并且有副作用是例外而不是规则,那么您可能希望将其反转,或者完全遵循另一种约定。
发布于 2021-09-27 12:39:58
我的猜测是,您遇到了一个问题,在这个问题中,您有一个名为cov_matrix的变量和一个名为cov_matrix的函数。在某种程度上,我认为您用变量(即numpy.ndarray )覆盖了函数。稍后,您尝试调用函数cov_matrix(),但对象实际上是变量,即numpy数组。
https://stackoverflow.com/questions/69346610
复制相似问题