import pandas as pd
import numpy as np
import sys
auto = pd.read_csv(
"https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data",
names=['MPG', 'Cylinders', 'Displacement', 'Horse power',
'Weight', 'Acceleration', 'Model Year', 'Origin', 'Car Name']
)
auto.head()

我需要清理这些数据,但我一直在输出这些数据,需要一些帮助。我是这里的初学者,我想不通
发布于 2017-03-25 02:53:16
如果您查看该文件,分隔符不是恒定的,而是空格的变体。sep = '\s+‘提供了所需的输出。
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data"
df = pd.read_csv(url, sep = '\s+',names = ['MPG','Cylinders','Displacement','Horse power','Weight','Acceleration','Model Year','Origin','Car Name'])
df.head()
MPG Cylinders Displacement Horse power Weight Acceleration Model Year Origin Car Name
0 18 8 307 130.0 3504 12.0 70 1 chevrolet chevelle malibu
1 15 8 350 165.0 3693 11.5 70 1 buick skylark 320
2 18 8 318 150.0 3436 11.0 70 1 plymouth satellite
3 16 8 304 150.0 3433 12.0 70 1 amc rebel sst
4 17 8 302 140.0 3449 10.5 70 1 ford torino
发布于 2017-03-25 02:54:08
使用delim_whitespace参数:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
cols = ['MPG', 'Cylinders', 'Displacement', 'Horse power', 'Weight',
'Acceleration', 'Model Year', 'Origin', 'Car Name']
auto = pd.read_csv(url, names=cols, delim_whitespace=True)
auto.head()
Out:
MPG Cylinders Displacement Horse power Weight Acceleration \
0 18.0 8 307.0 130.0 3504.0 12.0
1 15.0 8 350.0 165.0 3693.0 11.5
2 18.0 8 318.0 150.0 3436.0 11.0
3 16.0 8 304.0 150.0 3433.0 12.0
4 17.0 8 302.0 140.0 3449.0 10.5
Model Year Origin Car Name
0 70 1 chevrolet chevelle malibu
1 70 1 buick skylark 320
2 70 1 plymouth satellite
3 70 1 amc rebel sst
4 70 1 ford torino https://stackoverflow.com/questions/43007040
复制相似问题