谢谢同事,2问
请帮助我填写此数据可视化后4-5分钟的调查https://uol.onlinesurveys.ac.uk/post-covid-life
。
请检查我的代码以改进
from math import sqrt
from collections import Counter
def euclidean_distance(obs1,obs2):
distance=0
for i in range(len(obs1)):
distance+=(obs1[i]-obs2[i])**2
return sqrt(distance)
# Locate the most similar neighbors
def get_neighbors(dataset, test, size_neighbors):
distances = list()
for X_trains in X_train:
dist = euclidean_distance(X_trains, X_test)
distances.append((X_train, dist))
distances.sort(key=lambda tup: tup[1])
neighbors = list()
for i in range(size_neighbors):
neighbors.append(distances[i][0])
return neighbors
neighbors = get_neighbors(X_train, X_test, 13)
for neighbor in neighbors:
print(neighbor)发布于 2021-05-28 19:06:05
我相信您需要numpy,请继续尝试以下代码:
import numpy as np
class KNearestNeighbor:
def __init__(self, k):
self.k = k
self.eps = 1e-8
def train(self, X, y):
self.X_train = X
self.y_train = y
def predict(self, X_test, num_loops=0):
if num_loops == 0:
distances = self.compute_distance_vectorized(X_test)
elif num_loops == 1:
distances = self.compute_distance_one_loop(X_test)
else:
distances = self.compute_distance_two_loops(X_test)
return self.predict_labels(distances)
def compute_distance_two_loops(self, X_test):
"""
Inefficient naive implementation, use only
as a way of understanding what kNN is doing
"""
num_test = X_test.shape[0]
num_train = self.X_train.shape[0]
distances = np.zeros((num_test, num_train))
for i in range(num_test):
for j in range(num_train):
# (Taking sqrt is not necessary: min distance won't change since sqrt is monotone)
distances[i, j] = np.sqrt(
self.eps + np.sum((X_test[i, :] - self.X_train[j, :]) ** 2)
)
return distances
def compute_distance_one_loop(self, X_test):
"""
Much better than two-loops but not as fast as fully vectorized version.
Utilize Numpy broadcasting in X_train - X_test[i,:]
"""
num_test = X_test.shape[0]
num_train = self.X_train.shape[0]
distances = np.zeros((num_test, num_train))
for i in range(num_test):
# (Taking sqrt is not necessary: min distance won't change since sqrt is monotone)
distances[i, :] = np.sqrt(
self.eps + np.sum((self.X_train - X_test[i, :]) ** 2, axis=1)
)
return distances
def compute_distance_vectorized(self, X_test):
"""
Can be tricky to understand this, we utilize heavy
vecotorization as well as numpy broadcasting.
Idea: if we have two vectors a, b (two examples)
and for vectors we can compute (a-b)^2 = a^2 - 2a (dot) b + b^2
expanding on this and doing so for every vector lends to the
heavy vectorized formula for all examples at the same time.
"""
X_test_squared = np.sum(X_test ** 2, axis=1, keepdims=True)
X_train_squared = np.sum(self.X_train ** 2, axis=1, keepdims=True)
two_X_test_X_train = np.dot(X_test, self.X_train.T)
# (Taking sqrt is not necessary: min distance won't change since sqrt is monotone)
return np.sqrt(
self.eps + X_test_squared - 2 * two_X_test_X_train + X_train_squared.T
)
def predict_labels(self, distances):
num_test = distances.shape[0]
y_pred = np.zeros(num_test)
for i in range(num_test):
y_indices = np.argsort(distances[i, :])
k_closest_classes = self.y_train[y_indices[: self.k]].astype(int)
y_pred[i] = np.argmax(np.bincount(k_closest_classes))
return y_pred
if __name__ == "__main__":
X = np.array([[1, 1], [3, 1], [1, 4], [2, 4], [3, 3], [5, 1]])
y = np.array([0, 0, 0, 1, 1, 1])
KNN = KNearestNeighbor(k=1)
KNN.train(X, y)
y_pred = KNN.predict(X, num_loops=0)
print(f"Accuracy: {sum(y_pred == y) / y.shape[0]}")https://stackoverflow.com/questions/67744245
复制相似问题