文章/答案/技术大牛

发布

问data.frame中最常见的元素
EN

Stack Overflow用户

提问于 2020-08-20 13:25:26

回答 3查看 109关注 0票数 1

请在下面找到我的样本数据。我想从名为“最频繁”的专栏中识别出最频繁的数字。

structure(list(name = c("act1_25", "act1_26", "act1_27", "act1_28", 
    "act1_29", "act1_30", "act1_31", "act1_32", "act1_33", "act1_34", 
    "act1_35", "act1_36", "act1_37", "act1_38", "act1_39", "act1_40", 
    "act1_41", "act1_42", "act1_43", "act1_44", "act1_45", "act1_46", 
    "act1_47", "act1_48", "serial"), `Most frequent` = c("111,110,9120,310,3110,210,1110", 
    "210,3110,110,310,9120,9210,8210,3819,1110,111", "210,110,310,8210,3110,1110", 
    "210,9120,110,310,8210,1110,3819", "210,9120,310,110,1110,111", 
    "9120,110,1110,3830", "110,1110,3210,310", "210,110,1110,8210,310", 
    "1110,310,8210,110", "210,310,1110,8210,110", "310,1110,8210,210,110", 
    "3210,9120,1110,8210,110", "1110,210,310,110,3830", "1110,210,310,110,3210,3830", 
    "1110,8210,110,3830,3210", "1110,310,110,3210,5190", "1110,110,5190", 
    "1110,3210,3830,310,8210,110,5190", "1110,8210,310,210,1120,110", 
    "1110,8210,310", "1110,8210,310", "1110,8210,310,110", "1110,8210,310,110", 
    "210,1110,8210,5190,110", "27080618")), class = c("tbl_df", "tbl", 
    "data.frame"), row.names = c(NA, -25L))

Example of Output

Most frequent: 110

dataframe

回答 3

Stack Overflow用户

回答已采纳

发布于 2020-08-20 13:30:25

我建议采用一种tidyverse方法：

library(tidyr)
library(dplyr)
#Separate rows
df %>% separate_rows(`Most frequent`,2,sep = ',') %>%
  group_by(`Most frequent`) %>%
  summarise(N=n()) %>% arrange(desc(N))

输出：

   Most frequent  N
1           1110 24
2            110 22
3            310 19
4           8210 16
5            210 12
6           3210  6
7           9120  6
8           3830  5
9           5190  4
10           111  3
11          3110  3
12          3819  2
13          1120  1
14      27080618  1
15          9210  1

这类似于@AllanCameron的结果。

票数 2

Stack Overflow用户

发布于 2020-08-20 13:27:26

你能做到的

names(which.max(table(unlist(strsplit(df$`Most frequent`, ",")))))
#> [1] "1110"

或者，如果您想要该值及其实际计数，可以这样做：

rev(sort(table(unlist(strsplit(df$`Most frequent`, ",")))))[1]
#> 1110 
#>   24

您可以看到，1110实际上有24条条目，而110条中有22条：

rev(sort(table(unlist(strsplit(df$`Most frequent`, ",")))))
#>    1110      110      310     8210      210     9120     3210     3830 
#>      24       22       19       16       12        6        6        5 
#>    5190     3110      111     3819     9210 27080618     1120 
#>       4        3        3        2        1        1        1

票数 4

Stack Overflow用户

发布于 2020-08-20 13:43:07

df <- structure(list(name = c("act1_25", "act1_26", "act1_27", "act1_28", 
    "act1_29", "act1_30", "act1_31", "act1_32", "act1_33", "act1_34", 
    "act1_35", "act1_36", "act1_37", "act1_38", "act1_39", "act1_40", 
    "act1_41", "act1_42", "act1_43", "act1_44", "act1_45", "act1_46", 
    "act1_47", "act1_48", "serial"), `Most frequent` = c("111,110,9120,310,3110,210,1110", 
    "210,3110,110,310,9120,9210,8210,3819,1110,111", "210,110,310,8210,3110,1110", 
    "210,9120,110,310,8210,1110,3819", "210,9120,310,110,1110,111", 
    "9120,110,1110,3830", "110,1110,3210,310", "210,110,1110,8210,310", 
    "1110,310,8210,110", "210,310,1110,8210,110", "310,1110,8210,210,110", 
    "3210,9120,1110,8210,110", "1110,210,310,110,3830", "1110,210,310,110,3210,3830", 
    "1110,8210,110,3830,3210", "1110,310,110,3210,5190", "1110,110,5190", 
    "1110,3210,3830,310,8210,110,5190", "1110,8210,310,210,1120,110", 
    "1110,8210,310", "1110,8210,310", "1110,8210,310,110", "1110,8210,310,110", 
    "210,1110,8210,5190,110", "27080618")), class = c("tbl_df", "tbl", 
    "data.frame"), row.names = c(NA, -25L))

library(dplyr)
library(tidyr)

df %>% 
  #//separate Most frequent using , as the separator 
  separate_rows(`Most frequent`, sep = ",") %>%
  #//count the unique values of the Most Frequent variable with results in new column named frequency
  count(`Most frequent`, name = "frequency") %>% 
  #//arrange frequency column in descending order
  arrange(desc(frequency))

#> # A tibble: 15 x 2
#>    `Most frequent` frequency
#>    <chr>               <int>
#>  1 1110                   24
#>  2 110                    22
#>  3 310                    19
#>  4 8210                   16
#>  5 210                    12
#>  6 3210                    6
#>  7 9120                    6
#>  8 3830                    5
#>  9 5190                    4
#> 10 111                     3
#> 11 3110                    3
#> 12 3819                    2
#> 13 1120                    1
#> 14 27080618                1
#> 15 9210                    1

票数 1

页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://stackoverflow.com/questions/63506125

复制

相似问题

问data.frame中最常见的元素
EN

回答 3

Stack Overflow用户

Stack Overflow用户

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问data.frame中最常见的元素EN

回答 3

Stack Overflow用户

Stack Overflow用户

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问data.frame中最常见的元素
EN