我有两个数据处理程序,名为df1和df2。这两种数据都包含不同类型的教育测试问题,其中度量某些内容,并具有特定的格式。
df2<- structure(list(Measures = c("space and shape", "space and shape",
"space and shape", "space and shape", "asdaf"), Format = c("Constructed Response Expert",
"Constructed Response Manual", "Simple Multiple Choice", "Constructed Response Auto-coded",
"asfas"), Number = c(40, 1, 22, 1, 0)), row.names = c("1", "2",
"4", "5", "6"), class = "data.frame")
df1<-structure(list(Measures = c("space and shape", "space and shape",
"space and shape", "space and shape", "space and shape", "change and relationships",
"change and relationships", "change and relationships", "change and relationships",
"change and relationships", "space and shape", "space and shape",
"space and shape", "space and shape", "uncertainty and data",
"quantity", "uncertainty and data", "uncertainty and data", "uncertainty and data",
"quantity", "change and relationships", "change and relationships",
"space and shape", "space and shape", "space and shape", "quantity",
"quantity", "quantity", "quantity", "quantity", "uncertainty and data",
"change and relationships", "quantity", "quantity", "uncertainty and data",
"change and relationships", "uncertainty and data", "quantity",
"change and relationships", "change and relationships", "quantity",
"quantity", "quantity", "quantity", "quantity", "quantity", "change and relationships",
"uncertainty and data", "change and relationships", "uncertainty and data",
"uncertainty and data", "uncertainty and data", "quantity", "quantity",
"quantity", "space and shape", "change and relationships", "quantity",
"space and shape", "space and shape", "change and relationships",
"change and relationships", "uncertainty and data", "uncertainty and data",
"quantity", "change and relationships", "quantity", "change and relationships",
"space and shape", "quantity", "quantity", "quantity", "space and shape",
"space and shape", "space and shape", "uncertainty and data",
"uncertainty and data", "uncertainty and data", "change and relationships",
"change and relationships", "change and relationships", "uncertainty and data",
"uncertainty and data", "uncertainty and data", "change and relationships",
"change and relationships", "change and relationships", "change and relationships",
"change and relationships", "uncertainty and data", "space and shape",
"space and shape", "uncertainty and data", "uncertainty and data",
"uncertainty and data", "uncertainty and data", "uncertainty and data",
"quantity", "quantity", "space and shape", "space and shape",
"space and shape", "space and shape", "change and relationships",
"space and shape", "space and shape", "quantity", "change and relationships",
"change and relationships"), Format = c("Constructed Response Expert",
"Constructed Response Manual", "Constructed Response Expert",
"Simple Multiple Choice", "Constructed Response Auto-coded",
"Constructed Response Expert", "Constructed Response Expert",
"Constructed Response Expert", "Complex Multiple Choice", "Complex Multiple Choice",
"Complex Multiple Choice", "Simple Multiple Choice", "Constructed Response Expert",
"Constructed Response Expert", "Complex Multiple Choice", "Constructed Response Manual",
"Simple Multiple Choice", "Complex Multiple Choice", "Simple Multiple Choice",
"Constructed Response Manual", "Constructed Response Manual",
"Constructed Response Expert", "Simple Multiple Choice", "Constructed Response Expert",
"Constructed Response Auto-coded", "Constructed Response Manual",
"Complex Multiple Choice", "Constructed Response Manual", "Simple Multiple Choice",
"Simple Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice",
"Complex Multiple Choice", "Simple Multiple Choice", "Constructed Response Auto-coded",
"Constructed Response Expert", "Constructed Response Manual",
"Constructed Response Manual", "Constructed Response Expert",
"Constructed Response Manual", "Complex Multiple Choice", "Constructed Response Expert",
"Simple Multiple Choice", "Constructed Response Expert", "Constructed Response Manual",
"Simple Multiple Choice", "Constructed Response Expert", "Simple Multiple Choice",
"Constructed Response Manual", "Simple Multiple Choice", "Simple Multiple Choice",
"Simple Multiple Choice", "Constructed Response Manual", "Constructed Response Manual",
"Simple Multiple Choice", "Simple Multiple Choice", "Constructed Response Expert",
"Constructed Response Manual", "Constructed Response Manual",
"Simple Multiple Choice", "Constructed Response Manual", "Constructed Response Expert",
"Simple Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice",
"Constructed Response Expert", "Constructed Response Manual",
"Simple Multiple Choice", "Constructed Response Expert", "Simple Multiple Choice",
"Constructed Response Manual", "Constructed Response Expert",
"Complex Multiple Choice", "Complex Multiple Choice", "Constructed Response Expert",
"Constructed Response Expert", "Constructed Response Manual",
"Constructed Response Expert", "Constructed Response Manual",
"Constructed Response Expert", "Constructed Response Expert",
"Constructed Response Manual", "Constructed Response Expert",
"Constructed Response Expert", "Simple Multiple Choice", "Simple Multiple Choice",
"Constructed Response Manual", "Constructed Response Expert",
"Simple Multiple Choice", "Constructed Response Expert", "Constructed Response Manual",
"Complex Multiple Choice", "Constructed Response Manual", "Constructed Response Manual",
"Complex Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice",
"Simple Multiple Choice", "Constructed Response Manual", "Simple Multiple Choice",
"Constructed Response Expert", "Constructed Response Manual",
"Constructed Response Manual", "Constructed Response Expert",
"Constructed Response Manual", "Constructed Response Expert",
"Simple Multiple Choice", "Constructed Response Manual", "Complex Multiple Choice"
)), row.names = c(NA, -109L), class = "data.frame")我使用下面的代码在n中查找df1中df2中的行数,它完美地工作在中。
library(tidyverse)
inner_join(df1,df2) %>%
group_by(Measures, Format) %>%
slice(n=1:min(Number)) %>%
ungroup
Joining, by = c("Measures", "Format")
# A tibble: 17 x 3
Measures Format Number
<chr> <chr> <dbl>
1 space and shape Constructed Response Auto-coded 1
2 space and shape Constructed Response Expert 40
3 space and shape Constructed Response Expert 40
4 space and shape Constructed Response Expert 40
5 space and shape Constructed Response Expert 40
6 space and shape Constructed Response Expert 40
7 space and shape Constructed Response Expert 40
8 space and shape Constructed Response Expert 40
9 space and shape Constructed Response Expert 40
10 space and shape Constructed Response Expert 40
11 space and shape Constructed Response Manual 1
12 space and shape Simple Multiple Choice 22
13 space and shape Simple Multiple Choice 22
14 space and shape Simple Multiple Choice 22
15 space and shape Simple Multiple Choice 22
16 space and shape Simple Multiple Choice 22
17 space and shape Simple Multiple Choice 22但我也想知道其中有多少是而不是在df1中出现的。例如,我显然没有40个问题,即空间和形状构造的响应专家类型的。我想知道df2的每一行中有多少在df1中不可用。只有9种类型的空间和形状构造了响应专家类型的,但我想要其中的40种,这意味着我应该得到一个数据,其中表示I get 有31种空间和构造的响应专家类型问题。
发布于 2021-11-28 05:31:05
怎么样,
df1 %>% anti_join(df2)
Joining, by = c("Measures", "Format")
Measures Format
1 change and relationships Constructed Response Expert
2 change and relationships Constructed Response Expert
3 change and relationships Constructed Response Expert
4 change and relationships Complex Multiple Choice
5 change and relationships Complex Multiple Choice
6 space and shape Complex Multiple Choice
7 uncertainty and data Complex Multiple Choice
8 quantity Constructed Response Manualhttps://stackoverflow.com/questions/70139451
复制相似问题