尝试获取多个.txt (GC)文件,取出两列数据并将它们分配给我最终可以传递给GCalignR的对象。是否有更好的方法为GCalignR处理这些数据?
引起问题的辅助功能:
“”“
# nonempty strings after splitting a row by one space
get_nonempty_splits = function(row) {
s = strsplit(row, "[[:space:]]")
s = s[[1]]
l = c()
for (i in 1:length(s)) {
if (s[i] != "") {
l = c(l, s[i])
}
}
return(l)
}
# filenames have .txt, names do not
filenames_to_names = function(x) {
l = c()
for (i in 1:length(x)) {
x1 = strsplit(x, "[.]")[[i]][1]
l = c(l, x1)
}
return(l)
}
# get data row indices
get_data_row_inds = function(df) {
ind_start = 0
ind_end = 0
for (i in 1:length(df)) {
row = df[i]
# find start
if (grepl("----", row)) {
stopifnot(ind_start == 0) # assert ind_start not set
ind_start = i+1
}
# find end
if (i == length(df) && ind_end == 0) {
ind_end = length(df)
} else if (grepl("Totals", row) && grepl(":", row)) {
stopifnot(ind_end == 0) # assert ind_end not set
ind_end = i-1
}
}
stopifnot(ind_start != 0)
stopifnot(ind_end != 0)
return(ind_start:ind_end)
}“”“
“”“
path_to_raw_data = "/Users/input"
path_to_processed_data = "."
# get paths for all .txt files in pathToRawData directory
paths = list.files(path_to_raw_data, full.name=TRUE, pattern = ".txt")
filenames = list.files(path_to_raw_data, full.name=FALSE, pattern = ".txt")
names = filenames_to_names(filenames) # without .txt extension
# get data from text file
processed_data = list()
df_lengths = c()
for (i in 1:length(paths)) { # i indexes the raw files
path = paths[i]
df = read.delim(path, fileEncoding= 'UTF-16LE', header=TRUE)
df = df[[1]]
inds = get_data_row_inds(df)
df_lengths = c(df_lengths, length(inds))
times = c()
areas = c()
for (j in inds) { # j indexes the data rows of a raw file
row = df[j]
row = get_nonempty_splits(row)
time = row_to_time(row)
area = row_to_area(row)
times = c(times, time)
areas = c(areas, area)
}
pairs = data.frame(time = times, area = areas)
processed_data[[i]] = pairs
}“”“
获取此错误:str拆分中的错误(行,"[:space:]"):非字符参数
有什么解决办法的建议吗?是文件编码吗?数据=list()不返回任何.?
输入标题:“
dput(头(Df))结构(c(59L,53L,45L,48L,47L,52L),.Label = c(“Inj体积:1l ",”* Report *“,”面积百分比报告“,“1 1.353 1 BB 2.85703 2.453 e-5?",”2 1.952 1 BV 4411.39551 0.03787?",“3 2.058 1 VV 4693.20215 0.04029?",”4 2.089 1 VV 6614.89502 0.05679?",“5 2.139 2 0.00000 0.00000 NG ",”6 2.452 2 0.00000 0.00000 1,3 DNB ",“7 3.149 2 0.00000 0.00000 2,4-DNT,“8 3.315 1 VV S 1.15784e7 99.39858?",”9 3.347 1 VV S 5169.44629 0.04438?",“# min %",”10 3.372 1 VV S 2.09449e4 0.17981?",“11 3.466 1 VV S 2535.17432 0.02176?",“12 3.547 1 VB S 2.45685e4 0.21092?",”13 3.602 1 BV T 451.00174 0.00387?",“14 3.686 1 VV T 40.45324 0.00035?",”15 3.734 1 VV T 13.40936 0.00012?",“16 3.819 1 VB T 508.57788 0.00437?",”17 4.119 1 BB 13.01144 0.00011?",“18 4.856 2 0.00000 0.00000 TNT ",”19 4.975 2 0.00000 0.00000 TNB ",“20 5.549 2 0.00000 0.00000 4-上午-DNT ",”21 5.869 2 0.00000 0.00000 RDX ",“22 5.943 2 0.00000 0.00000 2-Am-DNT ",”23 6.516 2 0.00000 0.00000四元",“24 11.716 1 BB 1.75858 1.510 e-5?",”25 14.243 1 BB 2.55644 2.195 e-5?",“26 16.654 1 BB 3.81723 3.277 e-5?",”27 18.826 1 BB 2.58369 2.218 e-5?",“28 20.800 1 BB 1.51171 1.298 e-5?",”29 24.159 1 BB 1.78975 1.536 e-5?",“30 24.269 1 BB 1.81180 1.555e-5?",”31 25.053 1 BB 2.96617 2.546 e-5?",“32 25.658 1 BB 6.15337 5.283 e-5?",”33 25.809 1 BB 3.89435 3.343 e-5?",“34 26.577 1 BB 4.02199 3.453 e-5?",”35 26.885 1 BB 2.48416 2.133 e-5?",“36 27.219 1 BB 14.88012 0.00013?",”37 27.465 1 BB 3.59732 3.088 e-5?",“38 29.377 1 BB 18.55422 0.00016?",”39 32.554 1 BB 17.15620 0.00015?","----|-------|---|------|----------|--------|-------------------------","=====================================================================","2警告或错误:","Acq。仪器:仪器1位置:11小瓶,"Acq“。接线员: HHV Seq。线: 2",“杯”。数据修改:2019年3月12日(星期二)- 6:13:25 PM、“稀释: 1.0000”、“不对ISTDs使用乘数和稀释系数”、“注射日期:24-2月20日、14:37:34 Inj : 1”、“仪器1 2/24/2020 3:13:35 PM HHV”、“最后一次更改: 2/6/2020 12:59:45下午被HHV",“方法: C:\Chem32\1\DATA\IPOULIN\VOC_TEST_1 2020-02-24 13-49-15\VOC_TEST_HV.M”、“方法信息: VOC”、“乘数: 1.0000”、“峰值RetTime Sig类型面积名称”、“示例名称: P1U1十六进制022420",“序列文件: C:\Chem32\1\DATA\IPOULIN\VOC_TEST_1 2020-02-24 13-49-15\VOC_TEST_1.S",”信号1: FID1 B ",“按保留时间排序",”总计: 1.16485e7“,”警告:未找到已校准的化合物“,”警告:校准警告(见校准表列表)“,类=”因子“)
发布于 2020-10-12 18:10:30
通过添加: df =read.delim(路径,fileEncoding= 'UTF-16LE',header=TRUE,stringsAsFactors = FALSE)进行求解。"stringsAsFactors = FALSE“是解决方案。谢谢。
https://stackoverflow.com/questions/64214969
复制相似问题