代码:
import os
import pandas as pd
data_folder = os.path.join(os.path.expanduser("~"),"data","Ads")
data_filename = os.path.join(data_folder,"ad.data")
def convert_number(x):
try:
return float(x)
except ValueError:
return np.nan
from collections import defaultdict
converters = defaultdict(convert_number)
converters[1558] = lambda x:1 if x.strip() == "ad." else 0
ads = pd.read_csv(data_filename,header=None,converters=converters)
ads[:5]
x = ads.drop(1558,axis=1).values
y = ads[1558]
from sklearn.decomposition import PCA
pca = PCA(n_components=5)
xd = pca.fit_transform(x)
import numpy as np
np.set_printoptions(precision=3,suppress=True)
pca.explained_variance_ratio_错误:
ValueError Traceback (most recent call last)
<ipython-input-10-f726f2ff6f29> in <module>()
1 from sklearn.decomposition import PCA
2 pca = PCA(n_components=5)
----> 3 xd = pca.fit_transform(x)
4 import numpy as np
5 np.set_printoptions(precision=3,suppress=True)
/home/kongnian/anaconda3/lib/python3.5/site-packages/sklearn/decomposition/pca.py in fit_transform(self, X, y)
239
240 """
--> 241 U, S, V = self._fit(X)
242 U = U[:, :self.n_components_]
243
/home/kongnian/anaconda3/lib/python3.5/site-packages/sklearn/decomposition/pca.py in _fit(self, X)
266 requested.
267 """
--> 268 X = check_array(X)
269 n_samples, n_features = X.shape
270 X = as_float_array(X, copy=self.copy)
/home/kongnian/anaconda3/lib/python3.5/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
371 force_all_finite)
372 else:
--> 373 array = np.array(array, dtype=dtype, order=order, copy=copy)
374
375 if ensure_2d:
ValueError: could not convert string to float: '?'数据集: http://archive.ics.uci.edu/ml/datasets/Internet+Advertisements下载广告数据集os信息: Linux ubuntu 4.4.0-40-泛型#60-Ubuntu 9月23日16:45:45 UTC 2016 x86_64 GNU/Linux
发布于 2016-09-29 09:34:01
链接到的数据源包含?符号--我猜想这些都是缺失的值。我建议在csv阶段阅读时过滤它们,如下所示:
ads = pd.read_csv(data_filename,header=None,converters=converters, na_values='?')您可能会发现更多关于如何处理熊猫博士中缺少的值的信息。
https://stackoverflow.com/questions/39763709
复制相似问题