我有以下数据:
inputs <- c("Master", "Bachelor", "School")我希望每个单词的前1-2个字母都有所有可能的排列。
first_letter <- sapply(inputs, substr, start = 1, stop = 1)
"M" "B" "S"
second_letter <- sapply(inputs, substr, start = 1, stop = 2)
"Ma" "Ba" "Sc" 期望输出:
所有前几个字母的排列顺序,请参阅变量"all_order“的列(参见”我尝试了什么“一节)。同样在这两个变体中,要么取"first_letter“的第一个值,要么取第一个值"second_letter”,但不是同时取第一个值。
MBaS,MBS,MBSc,MBaSc MaBaS,MaBS,MaBSc,MaBaSc
SBaM,SBaMa,SBaM ScBM,ScBaM,ScBaMa,ScBaM
BSMa,BScM,BScMa BaSM,BaSMa,BaScM,BaScMa
.
(如果解释得够好,请告诉我。)
我尝试了什么:
combs <- combn(rep(seq(inputs), 2), 3)
keep <- !colSums(apply(combs, 2, duplicated))
all_order <- combs[, keep]
all_order
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] 1 1 1 1 2 2 3 1
[2,] 2 2 3 2 3 1 1 2
[3,] 3 3 2 3 1 3 2 3发布于 2021-12-30 20:32:24
我们会用gtools::permutations来计算.inputs的排列,然后使用expand.grid显示其中的所有组合。
首先,我们可以在输入的一个顺序上轻松地完成它:
expand.grid(c("M","Ma"), c("B","Ba"), c("S","Sc"))
# Var1 Var2 Var3
# 1 M B S
# 2 Ma B S
# 3 M Ba S
# 4 Ma Ba S
# 5 M B Sc
# 6 Ma B Sc
# 7 M Ba Sc
# 8 Ma Ba Sc
do.call(paste, c(expand.grid(c("M","Ma"), c("B","Ba"), c("S","Sc")), sep = ""))
# [1] "MBS" "MaBS" "MBaS" "MaBaS" "MBSc" "MaBSc" "MBaSc" "MaBaSc"这是一个订单(M < B < S),现在我们需要重新排列它们。我们可以手动调用所有订单,也可以使用gtools::permutations帮助。
inputlist <- lapply(inputs, substring, 1, 1:2)
str(inputlist)
# List of 3
# $ : chr [1:2] "M" "Ma"
# $ : chr [1:2] "B" "Ba"
# $ : chr [1:2] "S" "Sc"
perms <- gtools::permutations(3, 3)
perms
# [,1] [,2] [,3]
# [1,] 1 2 3
# [2,] 1 3 2
# [3,] 2 1 3
# [4,] 2 3 1
# [5,] 3 1 2
# [6,] 3 2 1
inputlist[perms[2,]]
# [[1]]
# [1] "M" "Ma"
# [[2]]
# [1] "S" "Sc"
# [[3]]
# [1] "B" "Ba"
inputlist[perms[3,]]
# [[1]]
# [1] "B" "Ba"
# [[2]]
# [1] "M" "Ma"
# [[3]]
# [1] "S" "Sc"导致
allperms <- do.call(rbind,
apply(gtools::permutations(3, 3), 1,
function(ind) do.call(expand.grid, inputlist[ind]))
)
head(allperms); tail(allperms)
# Var1 Var2 Var3
# 1 M B S
# 2 Ma B S
# 3 M Ba S
# 4 Ma Ba S
# 5 M B Sc
# 6 Ma B Sc
# Var1 Var2 Var3
# 43 S Ba M
# 44 Sc Ba M
# 45 S B Ma
# 46 Sc B Ma
# 47 S Ba Ma
# 48 Sc Ba Ma
do.call(paste, c(allperms, list(sep = "")))
# [1] "MBS" "MaBS" "MBaS" "MaBaS" "MBSc" "MaBSc" "MBaSc" "MaBaSc" "MSB" "MaSB" "MScB"
# [12] "MaScB" "MSBa" "MaSBa" "MScBa" "MaScBa" "BMS" "BaMS" "BMaS" "BaMaS" "BMSc" "BaMSc"
# [23] "BMaSc" "BaMaSc" "BSM" "BaSM" "BScM" "BaScM" "BSMa" "BaSMa" "BScMa" "BaScMa" "SMB"
# [34] "ScMB" "SMaB" "ScMaB" "SMBa" "ScMBa" "SMaBa" "ScMaBa" "SBM" "ScBM" "SBaM" "ScBaM"
# [45] "SBMa" "ScBMa" "SBaMa" "ScBaMa"发布于 2021-12-30 20:37:06
也许我们可以试试下面的代码
d <- do.call(
rbind,
combn(
c(first_letter, second_letter),
3,
pracma::perms,
simplify = FALSE
)
)
res <- do.call(
paste0,
data.frame(d)
)[apply(
`dim<-`(match(substr(d, 1, 1), first_letter), dim(d)),
1,
function(x) all(!duplicated(x))
)]这给
> res
[1] "SBM" "SMB" "BSM" "BMS" "MBS" "MSB" "ScBM" "ScMB"
[9] "BScM" "BMSc" "MBSc" "MScB" "BaSM" "BaMS" "SBaM" "SMBa"
[17] "MSBa" "MBaS" "ScBaM" "ScMBa" "BaScM" "BaMSc" "MBaSc" "MScBa"
[25] "MaSB" "MaBS" "SMaB" "SBMa" "BSMa" "BMaS" "ScMaB" "ScBMa"
[33] "MaScB" "MaBSc" "BMaSc" "BScMa" "BaMaS" "BaSMa" "MaBaS" "MaSBa"
[41] "SMaBa" "SBaMa" "ScBaMa" "ScMaBa" "BaScMa" "BaMaSc" "MaBaSc" "MaScBa"发布于 2021-12-30 20:37:30
您可以使用来自e1071包的函数e1071(概率论组统计部门的Misc函数(以前为: E1071))。
library(e1071)
res <- c(substr(inputs,1,1), substr(inputs,1,2))
res
[1] "M" "B" "S" "Ma" "Ba" "Sc"
perm <- unique(matrix(e1071::permutations(6), ncol=3))
# to exclude repetitions find 1,4 2,5 and 3,6
apply(matrix(res[perm], ncol=3), 1, paste, collapse="")[
rowSums(cbind(rowSums(perm == 1 | perm == 4)==2,
rowSums(perm == 2 | perm == 5)==2,
rowSums(perm == 3 | perm == 6)==2))==0]
[1] "MSBa" "SMBa" "SMaBa" "MaSBa" "SMaB" "BMaS" "SMB" "SBM"
[9] "MBS" "BMS" "BSM" "MSB" "MaBS" "MaSB" "BSMa" "SBMa"
[17] "SBaM" "MBaS" "MaBaS" "SBaMa" "BaSMa" "BaSM" "BaMS" "BaMaS"
[25] "BMaSc" "MBSc" "BMSc" "MaBSc" "MBaSc" "MaBaSc" "BaMSc" "BaMaSc"
[33] "MaScB" "BScM" "MScB" "BScMa" "BaScM" "BaScMa" "MScBa" "MaScBa"
[41] "ScBMa" "ScMB" "ScBM" "ScMaB" "ScMBa" "ScMaBa" "ScBaM" "ScBaMa"或者,您也可以用碱基R构造置换
res <- c(substr(inputs,1,1), substr(inputs,1,2))
res
[1] "M" "B" "S" "Ma" "Ba" "Sc"
perm <- as.matrix(expand.grid(1:6,1:6,1:6))
perm <- perm[colSums(apply(perm, 1, duplicated))==0,]
# to exclude repetitions find 1,4 2,5 and 3,6
apply(matrix(res[perm], ncol=3), 1, paste, collapse="")[
rowSums(cbind(rowSums(perm == 1 | perm == 4)==2,
rowSums(perm == 2 | perm == 5)==2,
rowSums(perm == 3 | perm == 6)==2))==0]
[1] "SBM" "ScBM" "BSM" "BaSM" "SBaM" "ScBaM" "BScM" "BaScM"
[9] "SMB" "ScMB" "MSB" "MaSB" "SMaB" "ScMaB" "MScB" "MaScB"
[17] "BMS" "BaMS" "MBS" "MaBS" "BMaS" "BaMaS" "MBaS" "MaBaS"
[25] "SBMa" "ScBMa" "BSMa" "BaSMa" "SBaMa" "ScBaMa" "BScMa" "BaScMa"
[33] "SMBa" "ScMBa" "MSBa" "MaSBa" "SMaBa" "ScMaBa" "MScBa" "MaScBa"
[41] "BMSc" "BaMSc" "MBSc" "MaBSc" "BMaSc" "BaMaSc" "MBaSc" "MaBaSc"https://stackoverflow.com/questions/70536196
复制相似问题