首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >data.frame中最常见的元素

data.frame中最常见的元素
EN

Stack Overflow用户
提问于 2020-08-20 13:25:26
回答 3查看 109关注 0票数 1

请在下面找到我的样本数据。我想从名为“最频繁”的专栏中识别出最频繁的数字。

代码语言:javascript
复制
structure(list(name = c("act1_25", "act1_26", "act1_27", "act1_28", 
    "act1_29", "act1_30", "act1_31", "act1_32", "act1_33", "act1_34", 
    "act1_35", "act1_36", "act1_37", "act1_38", "act1_39", "act1_40", 
    "act1_41", "act1_42", "act1_43", "act1_44", "act1_45", "act1_46", 
    "act1_47", "act1_48", "serial"), `Most frequent` = c("111,110,9120,310,3110,210,1110", 
    "210,3110,110,310,9120,9210,8210,3819,1110,111", "210,110,310,8210,3110,1110", 
    "210,9120,110,310,8210,1110,3819", "210,9120,310,110,1110,111", 
    "9120,110,1110,3830", "110,1110,3210,310", "210,110,1110,8210,310", 
    "1110,310,8210,110", "210,310,1110,8210,110", "310,1110,8210,210,110", 
    "3210,9120,1110,8210,110", "1110,210,310,110,3830", "1110,210,310,110,3210,3830", 
    "1110,8210,110,3830,3210", "1110,310,110,3210,5190", "1110,110,5190", 
    "1110,3210,3830,310,8210,110,5190", "1110,8210,310,210,1120,110", 
    "1110,8210,310", "1110,8210,310", "1110,8210,310,110", "1110,8210,310,110", 
    "210,1110,8210,5190,110", "27080618")), class = c("tbl_df", "tbl", 
    "data.frame"), row.names = c(NA, -25L))

Example of Output

Most frequent: 110
EN

回答 3

Stack Overflow用户

回答已采纳

发布于 2020-08-20 13:30:25

我建议采用一种tidyverse方法:

代码语言:javascript
复制
library(tidyr)
library(dplyr)
#Separate rows
df %>% separate_rows(`Most frequent`,2,sep = ',') %>%
  group_by(`Most frequent`) %>%
  summarise(N=n()) %>% arrange(desc(N))

输出:

代码语言:javascript
复制
   Most frequent  N
1           1110 24
2            110 22
3            310 19
4           8210 16
5            210 12
6           3210  6
7           9120  6
8           3830  5
9           5190  4
10           111  3
11          3110  3
12          3819  2
13          1120  1
14      27080618  1
15          9210  1

这类似于@AllanCameron的结果。

票数 2
EN

Stack Overflow用户

发布于 2020-08-20 13:27:26

你能做到的

代码语言:javascript
复制
names(which.max(table(unlist(strsplit(df$`Most frequent`, ",")))))
#> [1] "1110"

或者,如果您想要该值及其实际计数,可以这样做:

代码语言:javascript
复制
rev(sort(table(unlist(strsplit(df$`Most frequent`, ",")))))[1]
#> 1110 
#>   24 

您可以看到,1110实际上有24条条目,而110条中有22条:

代码语言:javascript
复制
rev(sort(table(unlist(strsplit(df$`Most frequent`, ",")))))
#>    1110      110      310     8210      210     9120     3210     3830 
#>      24       22       19       16       12        6        6        5 
#>    5190     3110      111     3819     9210 27080618     1120 
#>       4        3        3        2        1        1        1 
票数 4
EN

Stack Overflow用户

发布于 2020-08-20 13:43:07

代码语言:javascript
复制
df <- structure(list(name = c("act1_25", "act1_26", "act1_27", "act1_28", 
    "act1_29", "act1_30", "act1_31", "act1_32", "act1_33", "act1_34", 
    "act1_35", "act1_36", "act1_37", "act1_38", "act1_39", "act1_40", 
    "act1_41", "act1_42", "act1_43", "act1_44", "act1_45", "act1_46", 
    "act1_47", "act1_48", "serial"), `Most frequent` = c("111,110,9120,310,3110,210,1110", 
    "210,3110,110,310,9120,9210,8210,3819,1110,111", "210,110,310,8210,3110,1110", 
    "210,9120,110,310,8210,1110,3819", "210,9120,310,110,1110,111", 
    "9120,110,1110,3830", "110,1110,3210,310", "210,110,1110,8210,310", 
    "1110,310,8210,110", "210,310,1110,8210,110", "310,1110,8210,210,110", 
    "3210,9120,1110,8210,110", "1110,210,310,110,3830", "1110,210,310,110,3210,3830", 
    "1110,8210,110,3830,3210", "1110,310,110,3210,5190", "1110,110,5190", 
    "1110,3210,3830,310,8210,110,5190", "1110,8210,310,210,1120,110", 
    "1110,8210,310", "1110,8210,310", "1110,8210,310,110", "1110,8210,310,110", 
    "210,1110,8210,5190,110", "27080618")), class = c("tbl_df", "tbl", 
    "data.frame"), row.names = c(NA, -25L))

library(dplyr)
library(tidyr)

df %>% 
  #//separate Most frequent using , as the separator 
  separate_rows(`Most frequent`, sep = ",") %>%
  #//count the unique values of the Most Frequent variable with results in new column named frequency
  count(`Most frequent`, name = "frequency") %>% 
  #//arrange frequency column in descending order
  arrange(desc(frequency))

#> # A tibble: 15 x 2
#>    `Most frequent` frequency
#>    <chr>               <int>
#>  1 1110                   24
#>  2 110                    22
#>  3 310                    19
#>  4 8210                   16
#>  5 210                    12
#>  6 3210                    6
#>  7 9120                    6
#>  8 3830                    5
#>  9 5190                    4
#> 10 111                     3
#> 11 3110                    3
#> 12 3819                    2
#> 13 1120                    1
#> 14 27080618                1
#> 15 9210                    1
票数 1
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/63506125

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档