我试图读取大约190列的一系列文件,并希望为每一列提供tits类型(即字符、日期、数字等),因为vroom似乎并不总是以数字的形式读取第一行中的负数。希望这是一个可用的最小示例,它仍然使用数据。这些数据来自商品期货交易委员会(CFTC)的交易员承诺报告。
我的例子如下:
library(vroom)
url2='[https://www.cftc.gov/files/dea/history/com_disagg_txt_2018.zip][1]'
download.file(url2,'CFTC_COT.zip')
unzip('CFTC_COT.zip', exdir = 'CFTC_COT')
data <- vroom('CFTC_COT/c_year.txt') #Woorks
data2 <- vroom( 'CFTC_COT/c_year.txt'
, col_types = c(col_character(),col_double(), col_date(format = ""),col_double(),rep(col_character(),3), rep(col_double(),177), rep( col_character(),6)))
spec(data)规范数据如下:
cols(
Market_and_Exchange_Names = col_character(),
As_of_Date_In_Form_YYMMDD = col_double(),
`Report_Date_as_YYYY-MM-DD` = col_date(format = ""),
CFTC_Contract_Market_Code = col_character(),
CFTC_Market_Code = col_character(),
CFTC_Region_Code = col_character(),
CFTC_Commodity_Code = col_character(),
Open_Interest_All = col_double(),
Prod_Merc_Positions_Long_All = col_double(),
Prod_Merc_Positions_Short_All = col_double(),
Swap_Positions_Long_All = col_double(),
Swap__Positions_Short_All = col_double(),
Swap__Positions_Spread_All = col_double(),
M_Money_Positions_Long_All = col_double(),
M_Money_Positions_Short_All = col_double(),
M_Money_Positions_Spread_All = col_double(),
Other_Rept_Positions_Long_All = col_double(),
Other_Rept_Positions_Short_All = col_double(),
Other_Rept_Positions_Spread_All = col_double(),
Tot_Rept_Positions_Long_All = col_double(),
Tot_Rept_Positions_Short_All = col_double(),
NonRept_Positions_Long_All = col_double(),
NonRept_Positions_Short_All = col_double(),
Open_Interest_Old = col_double(),
Prod_Merc_Positions_Long_Old = col_double(),
Prod_Merc_Positions_Short_Old = col_double(),
Swap_Positions_Long_Old = col_double(),
Swap__Positions_Short_Old = col_double(),
Swap__Positions_Spread_Old = col_double(),
M_Money_Positions_Long_Old = col_double(),
M_Money_Positions_Short_Old = col_double(),
M_Money_Positions_Spread_Old = col_double(),
Other_Rept_Positions_Long_Old = col_double(),
Other_Rept_Positions_Short_Old = col_double(),
Other_Rept_Positions_Spread_Old = col_double(),
Tot_Rept_Positions_Long_Old = col_double(),
Tot_Rept_Positions_Short_Old = col_double(),
NonRept_Positions_Long_Old = col_double(),
NonRept_Positions_Short_Old = col_double(),
Open_Interest_Other = col_double(),
Prod_Merc_Positions_Long_Other = col_double(),
Prod_Merc_Positions_Short_Other = col_double(),
Swap_Positions_Long_Other = col_double(),
Swap__Positions_Short_Other = col_double(),
Swap__Positions_Spread_Other = col_double(),
M_Money_Positions_Long_Other = col_double(),
M_Money_Positions_Short_Other = col_double(),
M_Money_Positions_Spread_Other = col_double(),
Other_Rept_Positions_Long_Other = col_double(),
Other_Rept_Positions_Short_Other = col_double(),
Other_Rept_Positions_Spread_Other = col_double(),
Tot_Rept_Positions_Long_Other = col_double(),
Tot_Rept_Positions_Short_Other = col_double(),
NonRept_Positions_Long_Other = col_double(),
NonRept_Positions_Short_Other = col_double(),
Change_in_Open_Interest_All = col_character(),
Change_in_Prod_Merc_Long_All = col_character(),
Change_in_Prod_Merc_Short_All = col_character(),
Change_in_Swap_Long_All = col_character(),
Change_in_Swap_Short_All = col_character(),
Change_in_Swap_Spread_All = col_character(),
Change_in_M_Money_Long_All = col_character(),
Change_in_M_Money_Short_All = col_character(),
Change_in_M_Money_Spread_All = col_character(),
Change_in_Other_Rept_Long_All = col_character(),
Change_in_Other_Rept_Short_All = col_character(),
Change_in_Other_Rept_Spread_All = col_character(),
Change_in_Tot_Rept_Long_All = col_character(),
Change_in_Tot_Rept_Short_All = col_character(),
Change_in_NonRept_Long_All = col_character(),
Change_in_NonRept_Short_All = col_character(),
Pct_of_Open_Interest_All = col_double(),
Pct_of_OI_Prod_Merc_Long_All = col_double(),
Pct_of_OI_Prod_Merc_Short_All = col_double(),
Pct_of_OI_Swap_Long_All = col_double(),
Pct_of_OI_Swap_Short_All = col_double(),
Pct_of_OI_Swap_Spread_All = col_double(),
Pct_of_OI_M_Money_Long_All = col_double(),
Pct_of_OI_M_Money_Short_All = col_double(),
Pct_of_OI_M_Money_Spread_All = col_double(),
Pct_of_OI_Other_Rept_Long_All = col_double(),
Pct_of_OI_Other_Rept_Short_All = col_double(),
Pct_of_OI_Other_Rept_Spread_All = col_double(),
Pct_of_OI_Tot_Rept_Long_All = col_double(),
Pct_of_OI_Tot_Rept_Short_All = col_double(),
Pct_of_OI_NonRept_Long_All = col_double(),
Pct_of_OI_NonRept_Short_All = col_double(),
Pct_of_Open_Interest_Old = col_double(),
Pct_of_OI_Prod_Merc_Long_Old = col_double(),
Pct_of_OI_Prod_Merc_Short_Old = col_double(),
Pct_of_OI_Swap_Long_Old = col_double(),
Pct_of_OI_Swap_Short_Old = col_double(),
Pct_of_OI_Swap_Spread_Old = col_double(),
Pct_of_OI_M_Money_Long_Old = col_double(),
Pct_of_OI_M_Money_Short_Old = col_double(),
Pct_of_OI_M_Money_Spread_Old = col_double(),
Pct_of_OI_Other_Rept_Long_Old = col_double(),
Pct_of_OI_Other_Rept_Short_Old = col_double(),
Pct_of_OI_Other_Rept_Spread_Old = col_double(),
Pct_of_OI_Tot_Rept_Long_Old = col_double(),
Pct_of_OI_Tot_Rept_Short_Old = col_double(),
Pct_of_OI_NonRept_Long_Old = col_double(),
Pct_of_OI_NonRept_Short_Old = col_double(),
Pct_of_Open_Interest_Other = col_double(),
Pct_of_OI_Prod_Merc_Long_Other = col_double(),
Pct_of_OI_Prod_Merc_Short_Other = col_double(),
Pct_of_OI_Swap_Long_Other = col_double(),
Pct_of_OI_Swap_Short_Other = col_double(),
Pct_of_OI_Swap_Spread_Other = col_double(),
Pct_of_OI_M_Money_Long_Other = col_double(),
Pct_of_OI_M_Money_Short_Other = col_double(),
Pct_of_OI_M_Money_Spread_Other = col_double(),
Pct_of_OI_Other_Rept_Long_Other = col_double(),
Pct_of_OI_Other_Rept_Short_Other = col_double(),
Pct_of_OI_Other_Rept_Spread_Other = col_double(),
Pct_of_OI_Tot_Rept_Long_Other = col_double(),
Pct_of_OI_Tot_Rept_Short_Other = col_double(),
Pct_of_OI_NonRept_Long_Other = col_double(),
Pct_of_OI_NonRept_Short_Other = col_double(),
Traders_Tot_All = col_double(),
Traders_Prod_Merc_Long_All = col_character(),
Traders_Prod_Merc_Short_All = col_character(),
Traders_Swap_Long_All = col_character(),
Traders_Swap_Short_All = col_character(),
Traders_Swap_Spread_All = col_character(),
Traders_M_Money_Long_All = col_character(),
Traders_M_Money_Short_All = col_character(),
Traders_M_Money_Spread_All = col_character(),
Traders_Other_Rept_Long_All = col_character(),
Traders_Other_Rept_Short_All = col_character(),
Traders_Other_Rept_Spread_All = col_character(),
Traders_Tot_Rept_Long_All = col_double(),
Traders_Tot_Rept_Short_All = col_double(),
Traders_Tot_Old = col_double(),
Traders_Prod_Merc_Long_Old = col_character(),
Traders_Prod_Merc_Short_Old = col_character(),
Traders_Swap_Long_Old = col_character(),
Traders_Swap_Short_Old = col_character(),
Traders_Swap_Spread_Old = col_character(),
Traders_M_Money_Long_Old = col_character(),
Traders_M_Money_Short_Old = col_character(),
Traders_M_Money_Spread_Old = col_character(),
Traders_Other_Rept_Long_Old = col_character(),
Traders_Other_Rept_Short_Old = col_character(),
Traders_Other_Rept_Spread_Old = col_character(),
Traders_Tot_Rept_Long_Old = col_double(),
Traders_Tot_Rept_Short_Old = col_double(),
Traders_Tot_Other = col_double(),
Traders_Prod_Merc_Long_Other = col_double(),
Traders_Prod_Merc_Short_Other = col_double(),
Traders_Swap_Long_Other = col_character(),
Traders_Swap_Short_Other = col_character(),
Traders_Swap_Spread_Other = col_character(),
Traders_M_Money_Long_Other = col_character(),
Traders_M_Money_Short_Other = col_character(),
Traders_M_Money_Spread_Other = col_character(),
Traders_Other_Rept_Long_Other = col_double(),
Traders_Other_Rept_Short_Other = col_character(),
Traders_Other_Rept_Spread_Other = col_double(),
Traders_Tot_Rept_Long_Other = col_double(),
Traders_Tot_Rept_Short_Other = col_double(),
Conc_Gross_LE_4_TDR_Long_All = col_double(),
Conc_Gross_LE_4_TDR_Short_All = col_double(),
Conc_Gross_LE_8_TDR_Long_All = col_double(),
Conc_Gross_LE_8_TDR_Short_All = col_double(),
Conc_Net_LE_4_TDR_Long_All = col_double(),
Conc_Net_LE_4_TDR_Short_All = col_double(),
Conc_Net_LE_8_TDR_Long_All = col_double(),
Conc_Net_LE_8_TDR_Short_All = col_double(),
Conc_Gross_LE_4_TDR_Long_Old = col_double(),
Conc_Gross_LE_4_TDR_Short_Old = col_double(),
Conc_Gross_LE_8_TDR_Long_Old = col_double(),
Conc_Gross_LE_8_TDR_Short_Old = col_double(),
Conc_Net_LE_4_TDR_Long_Old = col_double(),
Conc_Net_LE_4_TDR_Short_Old = col_double(),
Conc_Net_LE_8_TDR_Long_Old = col_double(),
Conc_Net_LE_8_TDR_Short_Old = col_double(),
Conc_Gross_LE_4_TDR_Long_Other = col_double(),
Conc_Gross_LE_4_TDR_Short_Other = col_double(),
Conc_Gross_LE_8_TDR_Long_Other = col_double(),
Conc_Gross_LE_8_TDR_Short_Other = col_double(),
Conc_Net_LE_4_TDR_Long_Other = col_double(),
Conc_Net_LE_4_TDR_Short_Other = col_double(),
Conc_Net_LE_8_TDR_Long_Other = col_double(),
Conc_Net_LE_8_TDR_Short_Other = col_double(),
Contract_Units = col_character(),
CFTC_Contract_Market_Code_Quotes = col_character(),
CFTC_Market_Code_Quotes = col_character(),
CFTC_Commodity_Code_Quotes = col_character(),
CFTC_SubGroup_Code = col_character(),
FutOnly_or_Combined = col_character(),
.delim = ","
)我可以下载数据,并在数据文件下读取,但是当我试图通过定义列类型读取数据时,它失败了,出现了以下消息: Error: Unitory快捷方式:
从vroom的文档中,我可以用name = col_type()来定义每个列,但是想要分组定义它们,因为有177个列应该是一行双倍,但是它开始有一些列作为col_character()。
为了帮助再次澄清,我正在寻找如何使用rep()或类似的函数定义列类型,并在一行中使用rep()或类似的函数,而不是定义每个列的名称,这样我就可以同时在许多不同的文件中读取。非常感谢你的帮助。
发布于 2020-09-21 18:34:35
因此,问题是与日期格式一样(笑话:P)。因此,我们把它作为字符读取,然后用lubridate进行格式化。
data2 <- vroom( 'CFTC_COT/c_year.txt'
, col_types = c(col_character(),col_double(), col_character(),col_double(),rep(col_character(),3), rep(col_double(),177), rep( col_character(),6)))
data2$`Report_Date_as_YYYY-MM-DD` = lubridate::ymd(data2$`Report_Date_as_YYYY-MM-DD`)发布于 2020-09-22 06:10:59
谢谢波尔卡让我从不同的角度思考这个问题。最有效的办法是:
data2 <- vroom( 'CFTC_COT/c_year.txt'
, col_types =
cols(
Market_and_Exchange_Names = col_character(),
As_of_Date_In_Form_YYMMDD = col_double(),
`Report_Date_as_YYYY-MM-DD` = col_date(format = ""),
CFTC_Contract_Market_Code = col_character(),
CFTC_Market_Code = col_character(),
CFTC_Region_Code = col_character(),
CFTC_Commodity_Code = col_character(),
Open_Interest_All = col_double(),
Prod_Merc_Positions_Long_All = col_double(),
Prod_Merc_Positions_Short_All = col_double(),
Swap_Positions_Long_All = col_double(),
Swap__Positions_Short_All = col_double(),
Swap__Positions_Spread_All = col_double(),
M_Money_Positions_Long_All = col_double(),
M_Money_Positions_Short_All = col_double(),
M_Money_Positions_Spread_All = col_double(),
Other_Rept_Positions_Long_All = col_double(),
Other_Rept_Positions_Short_All = col_double(),
Other_Rept_Positions_Spread_All = col_double(),
Tot_Rept_Positions_Long_All = col_double(),
Tot_Rept_Positions_Short_All = col_double(),
NonRept_Positions_Long_All = col_double(),
NonRept_Positions_Short_All = col_double(),
Open_Interest_Old = col_double(),
Prod_Merc_Positions_Long_Old = col_double(),
Prod_Merc_Positions_Short_Old = col_double(),
Swap_Positions_Long_Old = col_double(),
Swap__Positions_Short_Old = col_double(),
Swap__Positions_Spread_Old = col_double(),
M_Money_Positions_Long_Old = col_double(),
M_Money_Positions_Short_Old = col_double(),
M_Money_Positions_Spread_Old = col_double(),
Other_Rept_Positions_Long_Old = col_double(),
Other_Rept_Positions_Short_Old = col_double(),
Other_Rept_Positions_Spread_Old = col_double(),
Tot_Rept_Positions_Long_Old = col_double(),
Tot_Rept_Positions_Short_Old = col_double(),
NonRept_Positions_Long_Old = col_double(),
NonRept_Positions_Short_Old = col_double(),
Open_Interest_Other = col_double(),
Prod_Merc_Positions_Long_Other = col_double(),
Prod_Merc_Positions_Short_Other = col_double(),
Swap_Positions_Long_Other = col_double(),
Swap__Positions_Short_Other = col_double(),
Swap__Positions_Spread_Other = col_double(),
M_Money_Positions_Long_Other = col_double(),
M_Money_Positions_Short_Other = col_double(),
M_Money_Positions_Spread_Other = col_double(),
Other_Rept_Positions_Long_Other = col_double(),
Other_Rept_Positions_Short_Other = col_double(),
Other_Rept_Positions_Spread_Other = col_double(),
Tot_Rept_Positions_Long_Other = col_double(),
Tot_Rept_Positions_Short_Other = col_double(),
NonRept_Positions_Long_Other = col_double(),
NonRept_Positions_Short_Other = col_double(),
Change_in_Open_Interest_All = col_double(),
Change_in_Prod_Merc_Long_All = col_double(),
Change_in_Prod_Merc_Short_All = col_double(),
Change_in_Swap_Long_All = col_double(),
Change_in_Swap_Short_All = col_double(),
Change_in_Swap_Spread_All = col_double(),
Change_in_M_Money_Long_All = col_double(),
Change_in_M_Money_Short_All = col_double(),
Change_in_M_Money_Spread_All = col_double(),
Change_in_Other_Rept_Long_All = col_double(),
Change_in_Other_Rept_Short_All = col_double(),
Change_in_Other_Rept_Spread_All = col_double(),
Change_in_Tot_Rept_Long_All = col_double(),
Change_in_Tot_Rept_Short_All = col_double(),
Change_in_NonRept_Long_All = col_double(),
Change_in_NonRept_Short_All = col_double(),
Pct_of_Open_Interest_All = col_double(),
Pct_of_OI_Prod_Merc_Long_All = col_double(),
Pct_of_OI_Prod_Merc_Short_All = col_double(),
Pct_of_OI_Swap_Long_All = col_double(),
Pct_of_OI_Swap_Short_All = col_double(),
Pct_of_OI_Swap_Spread_All = col_double(),
Pct_of_OI_M_Money_Long_All = col_double(),
Pct_of_OI_M_Money_Short_All = col_double(),
Pct_of_OI_M_Money_Spread_All = col_double(),
Pct_of_OI_Other_Rept_Long_All = col_double(),
Pct_of_OI_Other_Rept_Short_All = col_double(),
Pct_of_OI_Other_Rept_Spread_All = col_double(),
Pct_of_OI_Tot_Rept_Long_All = col_double(),
Pct_of_OI_Tot_Rept_Short_All = col_double(),
Pct_of_OI_NonRept_Long_All = col_double(),
Pct_of_OI_NonRept_Short_All = col_double(),
Pct_of_Open_Interest_Old = col_double(),
Pct_of_OI_Prod_Merc_Long_Old = col_double(),
Pct_of_OI_Prod_Merc_Short_Old = col_double(),
Pct_of_OI_Swap_Long_Old = col_double(),
Pct_of_OI_Swap_Short_Old = col_double(),
Pct_of_OI_Swap_Spread_Old = col_double(),
Pct_of_OI_M_Money_Long_Old = col_double(),
Pct_of_OI_M_Money_Short_Old = col_double(),
Pct_of_OI_M_Money_Spread_Old = col_double(),
Pct_of_OI_Other_Rept_Long_Old = col_double(),
Pct_of_OI_Other_Rept_Short_Old = col_double(),
Pct_of_OI_Other_Rept_Spread_Old = col_double(),
Pct_of_OI_Tot_Rept_Long_Old = col_double(),
Pct_of_OI_Tot_Rept_Short_Old = col_double(),
Pct_of_OI_NonRept_Long_Old = col_double(),
Pct_of_OI_NonRept_Short_Old = col_double(),
Pct_of_Open_Interest_Other = col_double(),
Pct_of_OI_Prod_Merc_Long_Other = col_double(),
Pct_of_OI_Prod_Merc_Short_Other = col_double(),
Pct_of_OI_Swap_Long_Other = col_double(),
Pct_of_OI_Swap_Short_Other = col_double(),
Pct_of_OI_Swap_Spread_Other = col_double(),
Pct_of_OI_M_Money_Long_Other = col_double(),
Pct_of_OI_M_Money_Short_Other = col_double(),
Pct_of_OI_M_Money_Spread_Other = col_double(),
Pct_of_OI_Other_Rept_Long_Other = col_double(),
Pct_of_OI_Other_Rept_Short_Other = col_double(),
Pct_of_OI_Other_Rept_Spread_Other = col_double(),
Pct_of_OI_Tot_Rept_Long_Other = col_double(),
Pct_of_OI_Tot_Rept_Short_Other = col_double(),
Pct_of_OI_NonRept_Long_Other = col_double(),
Pct_of_OI_NonRept_Short_Other = col_double(),
Traders_Tot_All = col_double(),
Traders_Prod_Merc_Long_All = col_double(),
Traders_Prod_Merc_Short_All = col_double(),
Traders_Swap_Long_All = col_double(),
Traders_Swap_Short_All = col_double(),
Traders_Swap_Spread_All = col_double(),
Traders_M_Money_Long_All = col_double(),
Traders_M_Money_Short_All = col_double(),
Traders_M_Money_Spread_All = col_double(),
Traders_Other_Rept_Long_All = col_double(),
Traders_Other_Rept_Short_All = col_double(),
Traders_Other_Rept_Spread_All = col_double(),
Traders_Tot_Rept_Long_All = col_double(),
Traders_Tot_Rept_Short_All = col_double(),
Traders_Tot_Old = col_double(),
Traders_Prod_Merc_Long_Old = col_double(),
Traders_Prod_Merc_Short_Old = col_double(),
Traders_Swap_Long_Old = col_double(),
Traders_Swap_Short_Old = col_double(),
Traders_Swap_Spread_Old = col_double(),
Traders_M_Money_Long_Old = col_double(),
Traders_M_Money_Short_Old = col_double(),
Traders_M_Money_Spread_Old = col_double(),
Traders_Other_Rept_Long_Old = col_double(),
Traders_Other_Rept_Short_Old = col_double(),
Traders_Other_Rept_Spread_Old = col_double(),
Traders_Tot_Rept_Long_Old = col_double(),
Traders_Tot_Rept_Short_Old = col_double(),
Traders_Tot_Other = col_double(),
Traders_Prod_Merc_Long_Other = col_double(),
Traders_Prod_Merc_Short_Other = col_double(),
Traders_Swap_Long_Other = col_double(),
Traders_Swap_Short_Other = col_double(),
Traders_Swap_Spread_Other = col_double(),
Traders_M_Money_Long_Other = col_double(),
Traders_M_Money_Short_Other = col_double(),
Traders_M_Money_Spread_Other = col_double(),
Traders_Other_Rept_Long_Other = col_double(),
Traders_Other_Rept_Short_Other = col_double(),
Traders_Other_Rept_Spread_Other = col_double(),
Traders_Tot_Rept_Long_Other = col_double(),
Traders_Tot_Rept_Short_Other = col_double(),
Conc_Gross_LE_4_TDR_Long_All = col_double(),
Conc_Gross_LE_4_TDR_Short_All = col_double(),
Conc_Gross_LE_8_TDR_Long_All = col_double(),
Conc_Gross_LE_8_TDR_Short_All = col_double(),
Conc_Net_LE_4_TDR_Long_All = col_double(),
Conc_Net_LE_4_TDR_Short_All = col_double(),
Conc_Net_LE_8_TDR_Long_All = col_double(),
Conc_Net_LE_8_TDR_Short_All = col_double(),
Conc_Gross_LE_4_TDR_Long_Old = col_double(),
Conc_Gross_LE_4_TDR_Short_Old = col_double(),
Conc_Gross_LE_8_TDR_Long_Old = col_double(),
Conc_Gross_LE_8_TDR_Short_Old = col_double(),
Conc_Net_LE_4_TDR_Long_Old = col_double(),
Conc_Net_LE_4_TDR_Short_Old = col_double(),
Conc_Net_LE_8_TDR_Long_Old = col_double(),
Conc_Net_LE_8_TDR_Short_Old = col_double(),
Conc_Gross_LE_4_TDR_Long_Other = col_double(),
Conc_Gross_LE_4_TDR_Short_Other = col_double(),
Conc_Gross_LE_8_TDR_Long_Other = col_double(),
Conc_Gross_LE_8_TDR_Short_Other = col_double(),
Conc_Net_LE_4_TDR_Long_Other = col_double(),
Conc_Net_LE_4_TDR_Short_Other = col_double(),
Conc_Net_LE_8_TDR_Long_Other = col_double(),
Conc_Net_LE_8_TDR_Short_Other = col_double(),
Contract_Units = col_character(),
CFTC_Contract_Market_Code_Quotes = col_character(),
CFTC_Market_Code_Quotes = col_character(),
CFTC_Commodity_Code_Quotes = col_character(),
CFTC_SubGroup_Code = col_character(),
FutOnly_or_Combined = col_character(),
.delim = ","
)
)我只是复制了spec()数据并将其粘贴到col_types中。然后,在我有问题的地方,把它作为一个字符读入,我做了一个查找替换,并将它们转换为col_double()。它比我想要的更好,因为如果对列进行更改,就会产生一个错误。
不过,它确实会产生一些令人讨厌的长代码:)。
https://stackoverflow.com/questions/63997953
复制相似问题