首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >组合发生频率(2×2)

组合发生频率(2×2)
EN

Stack Overflow用户
提问于 2014-01-27 11:43:11
回答 3查看 151关注 0票数 2

我有关于顾客和他们去过的商店的数据(至少一次)。

代码语言:javascript
复制
Customer | Store

1   A
1   B
2   A
2   C
3   A
4   A
4   B
4   C

我想知道有多少用户访问了每个组合的两个商店。

如何转换以前的数据结构(用R)以获得以下结构?

代码语言:javascript
复制
Store 1 | Store 2 | Nb_Customer
A           B         2     (Customer 1 & 4 visited store A & B )
A           C         2     (Customer 2 & 4 visited store A & C)

编辑关于Henrik的解决方案:如你所见,我对结对有一个问题。

代码语言:javascript
复制
 # number of visits for each customer in each store 
> df <- data.frame(Customer=c(1,1,2,2,3,4,4,4), Store=c('A', 'B', 'A', 'C', 'A', 'A', 'B', 'C'))
> # number of visits for each customer in each store 
> tt <- with(df, table(df$Customer, df$Store))
> tt

    A B C
  1 1 1 0
  2 1 0 1
  3 1 0 0
  4 1 1 1
> 
> # number of stores
> n <- with(df, length(unique(df$Store)))
> n
[1] 3
> 
> # all pairs of column numbers, to be selected from the table tt
> cols <- with(df, combn(n, 2))
> cols
     [,1] [,2] [,3]
[1,]    1    1    2
[2,]    2    3    3
> 
> # pairs of stores
> pair <- t(with(df, combn(unique(df$Store), 2)))
> pair
     [,1] [,2]
[1,] "A"  "B" 
[2,] "1"  "3" 
[3,] "2"  "3" 
EN

回答 3

Stack Overflow用户

回答已采纳

发布于 2014-01-27 13:54:07

另一种可能性是:

代码语言:javascript
复制
# number of visits for each customer in each store 
tt <- with(df, table(Customer, Store))
tt

# number of stores
n <- with(df, length(unique(Store)))
n

# all pairs of column numbers, to be selected from the table tt
cols <- with(df, combn(n, 2))
cols

# pairs of stores
pair <- t(with(df, combn(unique(Store), 2)))
pair

# select pairs of columns from tt
# count number of rows for which each customer has visited more than one store
# combine the counts with names of stores from 'pairs' to a data frame
ll <- lapply(seq(ncol(cols)), function(x){
  tt2 <- tt[ , cols[ , x]]
  n_cust <- sum(rowSums(tt2) > 1)
  data.frame(store1 = pair[x, 1], store2 = pair[x, 2], n_cust = n_cust)
})
ll

# convert list to data frame
df2 <- do.call(rbind, ll)
df2

#   store1 store2 n_cust
# 1      A      B      2
# 2      A      C      2
# 3      B      C      1
票数 2
EN

Stack Overflow用户

发布于 2014-01-27 12:05:49

也许这不是最有效的方法,但它有效:

代码语言:javascript
复制
df <- data.frame(Customer=c(1,1,2,2,3,4,4,4), Store=c('A', 'B', 'A', 'C', 'A', 'A', 'B', 'C'))

cmb <- t(combn(unique(as.character(df$Store)),m=2))
count <- rep(0,nrow(cmb))

for (i in unique(df$Customer)){
  for (j in 1:nrow(cmb)){
    count[j] <- count[j]+as.numeric(all(cmb[j,] %in% df$Store[df$Customer==i]))
  }
}

res <- data.frame(Store1=cmb[,1], Store2=cmb[,2], Nb_customer=count)

      Store1 Store2 Nb_customer
1      A      B           2
2      A      C           2
3      B      C           1

编辑:

使用关联规则,您可以这样做:

代码语言:javascript
复制
# load library arulas
library(arules)
#original data frame
df <- data.frame(Customer=c(1,1,2,2,3,4,4,4), Store=c('A', 'B', 'A', 'C', 'A', 'A', 'B', 'C'))

# create list
a_list <- lapply(unique(df$Customer),function(x)df$Store[df$Customer==x])

## set transaction names
names(a_list) <- paste("Tr",unique(df$Customer), sep = "")
a_list

## coerce into transactions
trans <- as(a_list, "transactions")

# create association rules
rules <- apriori(trans, parameter=list(minlen=2, maxlen=2, ext=TRUE, originalSupport=FALSE))
# calculate frequency of pairs of stores
rules@quality$abs_support <- rules@quality$support*length(trans)
inspect(rules)


   lhs    rhs support confidence lhs.support lift abs_support
1 {B} => {A}     0.5          1         0.5    1           2
2 {C} => {A}     0.5          1         0.5    1           2

abs_support是共发生的数目。

票数 1
EN

Stack Overflow用户

发布于 2014-01-27 12:07:19

像这样吗?

代码语言:javascript
复制
d<-data.frame(v1=c(1,1,2,2,3,4,4,4),v2=c("A","B","A","C","A","A","B","C"))
df<-as.data.frame.matrix(table(d))
which(df$A==1 & df$B==1)
which(df$A==1 & df$C==1)
which(df$B==1 & df$C==1)
票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/21379992

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档