首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >R:避免循环

R:避免循环
EN

Stack Overflow用户
提问于 2015-11-30 08:13:49
回答 2查看 296关注 0票数 0

我加载一个json格式文件。示例数据sampleData(并非所有变量都包含在示例中):

代码语言:javascript
复制
[[1]]
[[1]]$id
[1] "000018ac-04ef-4270-81e6-9e3cb8274d31"

[[1]]$currentCompany
[1] ""

[[1]]$skills
list()


[[2]]
[[2]]$id
[1] "00000259-7c1c-4db6-9a2a-6d450626fbac"

[[2]]$currentCompany
[1] "Super Market 2"

[[2]]$skills
[[2]]$skills[[1]]
[[2]]$skills[[1]]$name
[1] "OpenGL"

[2]]$skills
[[2]]$skills[[1]]
[[2]]$skills[[1]]$type
[1] "link"    

[2]]$skills
[[2]]$skills[[2]]
[[2]]$skills[[2]]$name
[1] "C"

[2]]$skills
[[2]]$skills[[2]]
[[2]]$skills[[2]]$type
[1] "link"

因此,我想从这个文件中提取出列表中的技能。我使用以下代码:

代码语言:javascript
复制
skill <- list()
for (i in 1:length(sampleData){
    skill[i][1] <- 'empty'
    for (j in 1:length(sampleData[[i]]$skills)){
        if (length(sampleData[[i]]$skills)==0){
            skill[[i]][j] <- NA
        }else{
            skill[[i]][j] <- json[[i]]$skills[[j]]$name  
        }

   }
}

结果:

代码语言:javascript
复制
[[1]]
[1] NA

[[2]]
[1] "OpenGL"               "C"   

结果没问题。我的问题是有什么办法使它更有效率或更优雅吗?例如,不使用循环?此外,我使用了这行代码skill[i][1] <- 'empty'原因,而没有在错误内部初始化弹出的ups Error intmp[[i]] : subscript out of bounds。有什么可以避免这种转储初始化的吗?

EN

回答 2

Stack Overflow用户

回答已采纳

发布于 2015-11-30 13:48:51

这个怎么样:

代码语言:javascript
复制
sampleData <-
  list(
    list(
      id = "000018ac-04ef-4270-81e6-9e3cb8274d31",
      curruentCompany = "",
      skills = list()
    ),
    list(
      id = "00000259-7c1c-4db6-9a2a-6d450626fbac",
      curruentCompany = "",
      skills = list(
        list( name = "OpenGL" ),
        list( name = "C" )
      )
    )   
  )

A <- unlist(sampleData,recursive=FALSE)
n <- which(names(A)=="skills")

skill_L <- A[n]
skill <- lapply(skill_L,unlist)

skill_L中,技能仍然是列表,在skill中它们是向量:

代码语言:javascript
复制
> skill_L
$skills
list()

$skills
$skills[[1]]
$skills[[1]]$name
[1] "OpenGL"


$skills[[2]]
$skills[[2]]$name
[1] "C"



> skill
$skills
NULL

$skills
    name     name 
"OpenGL"      "C" 

> 

“没有技能”的例子是由NULL而不是NA代表的。如果这不合适,则需要来自apply-family的另一个函数应用程序:

代码语言:javascript
复制
> skill[sapply(skill,is.null)] <- NA
> skill
$skills
[1] NA

$skills
    name     name 
"OpenGL"      "C" 

> 

解决方案中出现subscript out of bounds错误的原因可能是R通过for-loop运行

代码语言:javascript
复制
for (i in 1:0){...}

不是0次而是两次,一次是i=1,一次是i=0。例如,看看我和我们的R巨人中的一个关于这个话题的heated debate

基准测试:

代码语言:javascript
复制
#--------------------------------------------------------------

mra68 <- function(data)
{
  A <- unlist(data,recursive=FALSE)  
  skill_vec <- lapply(A[which(names(A)=="skills")],unlist)
  skill_vec[sapply(skill_vec,is.null)] <- NA
  skill_vec
}

#--------------------------------------------------------------

thothal <- function(sampleData) {
  lapply(sampleData, function(l) {
    if (length(l$skills) == 0) {
      NA
    } else {
      unlist(l$skills)
    }
  })
}

#--------------------------------------------------------------

createData <- function(length.out) {
  ret <- vector("list", length.out)
  for (i in seq_len(length.out)) {
    skills.n <- sample(0:10, 1)
    skills <- vector("list", skills.n)
    for (j in seq_len(skills.n)) {
      skills[[j]] <- list(name = paste0("skill = ", j, ", id = ", i))
    }
    ret[[i]] <- list(id = paste("id", i),
                     currentCompany = paste("Company", i),
                     skills = skills)
  }
  ret
}
#--------------------------------------------------------------

library(microbenchmark)

data <- createData(1e+4)

microbenchmark(mra68(data),
               thothal(data), unit = "s")

-

代码语言:javascript
复制
> microbenchmark(mra68(data),
+                thothal(data), unit = "s")
Unit: seconds
          expr       min        lq      mean    median        uq       max neval
   mra68(data) 0.1531227 0.2078011 0.2779383 0.2946479 0.3299953 0.5017640   100
 thothal(data) 0.1664253 0.2260693 0.3168359 0.3277690 0.3726730 0.7728963   100
> 

如果NULL而不是NA没有问题的话:

代码语言:javascript
复制
mra68_NULL <- function(data)
{
  A <- unlist(data,recursive=FALSE)  
  skill_vec <- lapply(A[which(names(A)=="skills")],unlist)
  skill_vec
}

代码语言:javascript
复制
> microbenchmark(mra68_NULL(data),
+                mra68(data),
+                thothal(data), unit = "s" )
Unit: seconds
             expr       min        lq      mean    median        uq       max neval
 mra68_NULL(data) 0.1019852 0.1439472 0.2177345 0.2501026 0.2554921 0.3492776   100
      mra68(data) 0.1141832 0.1871851 0.2647803 0.2884579 0.3091937 0.7359363   100
    thothal(data) 0.1216594 0.2031117 0.2885294 0.3148583 0.3344948 0.8078336   100
> 

有几个字段:

代码语言:javascript
复制
sampleData <-
  list(
    list(
      id = "000018ac-04ef-4270-81e6-9e3cb8274d31",
      curruentCompany = "",
      skills = list()
    ),
    list(
      id = "00000259-7c1c-4db6-9a2a-6d450626fbac",
      curruentCompany = "",
      skills = list(
        list( type = "link",
              name = "OpenGL" ),
        list( name = "C" )
      )
    ),
    list(
      id = "00000259-7c1c-4db6-9a2a-6d450626fbac",
      curruentCompany = "",
      skills = list(
        list( aaa  = "X" ),
        list( type = "Link",
              name = "abc",
              bbb  = "xyz" ),
        list( name = "E",
              aaa  = "123" )
      )
    )
  )

#--------------------------------------------------------------

A <- unlist(sampleData,recursive=FALSE)
n <- which(names(A)=="skills")

skill_L <- A[n]

f <- function(x){
  if (length(x)>0){x[sapply(x,is.null)] <- NA} else {x<-NA}
  return(x)
}

skill <- list(
  name = lapply( lapply(sapply(skill_L,sapply,"[","name"),f), unlist ),
  type = lapply( lapply(sapply(skill_L,sapply,"[","type"),f), unlist ),
  aaa  = lapply( lapply(sapply(skill_L,sapply,"[","aaa" ),f), unlist ),
  bbb  = lapply( lapply(sapply(skill_L,sapply,"[","bbb" ),f), unlist )
)

代码语言:javascript
复制
> skill$name
$skills
[1] NA

$skills
    name     name 
"OpenGL"      "C" 

$skills
 <NA>  name  name 
   NA "abc"   "E" 

代码语言:javascript
复制
> skill$type
$skills
[1] NA

$skills
  type   <NA> 
"link"     NA 

$skills
  <NA>   type   <NA> 
    NA "Link"     NA 

代码语言:javascript
复制
> skill$aaa
$skills
[1] NA

$skills
<NA> <NA> 
  NA   NA 

$skills
  aaa  <NA>   aaa 
  "X"    NA "123" 

代码语言:javascript
复制
> skill$bbb
$skills
[1] NA

$skills
<NA> <NA> 
  NA   NA 

$skills
 <NA>   bbb  <NA> 
   NA "xyz"    NA 
票数 1
EN

Stack Overflow用户

发布于 2015-11-30 08:38:25

您可以使用lapply (不过,这也是一个循环),但是代码变得更短了:

代码语言:javascript
复制
sampleData <- list(list(id = "abc",
                        currentCompany = "",
                        skills = list()),
                   list(id = "abc2",
                        currentCompany = "xyz",
                        skills = list(list(name = "OpenGL"),
                                 list(name = "C"))))

lapply(sampleData, function(l) {
   if (length(l$skills) == 0) {
      NA
   } else {
      unlist(l$skills)
   }
})
# [[1]]
# [1] NA

# [[2]]
#     name     name 
# "OpenGL"      "C"

更新

快速(和肮脏)基准测试结果表明,lapply大约快50%。注:createData也可以做得更聪明。

代码语言:javascript
复制
library(microbenchmark)
microbenchmark(lps(createData(1e4)), lply(createData(1e4)), unit = "s")
# Unit: seconds
#                     expr       min        lq      mean    median        uq
#   lps(createData(10000)) 1.1829743 1.2541602 1.3069261 1.2873486 1.3478340
#  lply(createData(10000)) 0.5331418 0.5613532 0.6009065 0.5926779 0.6294598
#        max neval cld
#  1.4960136   100   b
#  0.8075482   100  a 

代码语言:javascript
复制
createData <- function(length.out) {
   ret <- vector("list", length.out)
   for (i in seq_len(length.out)) {
      skills.n <- sample(0:10, 1)
      skills <- vector("list", skills.n)
      for (j in seq_len(skills.n)) {
         skills[[j]] <- list(name = paste0("skill = ", j, ", id = ", i))
      }
      ret[[i]] <- list(id = paste("id", i),
                       currentCompany = paste("Company", i),
                       skills = skills)
   }
   ret
}

lply <- function(sampleData) {
   lapply(sampleData, function(l) {
      if (length(l$skills) == 0) {
         NA
      } else {
         unlist(l$skills)
      }
   })
}

lps <- function(sampleData) {
   skill <- list()
   for (i in 1:length(sampleData)){
      skill[i][1] <- 'empty'
      for (j in 1:length(sampleData[[i]]$skills)){
         if (length(sampleData[[i]]$skills) == 0){
            skill[[i]][j] <- NA
         }else{
            skill[[i]][j] <- sampleData[[i]]$skills[[j]]$name  
         }
      }
   }
   skill
}
票数 1
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/33994066

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档