df <- data.frame(row.names = c('1s.u1','1s.u2','2s.u1','2s.u2','6s.u1'),fjri_deu_klcea= c('0','0','0','15','23'),hfue_klcea=c('2','2','0','156','45'),dji_dhi_ghcea_jk=c('456','0','0','15','15'),jdi_jdi_ghcea=c('1','2','3','4','100'),gz7_jfu_dcea_jdi=c('5','6','3','7','56'))
df
fjri_deu_klcea hfue_klcea dji_dhi_ghcea_jk jdi_jdi_ghcea gz7_jfu_dcea_jdi
1s.u1 0 2 456 1 5
1s.u2 0 2 0 2 6
2s.u1 0 0 0 3 3
2s.u2 15 156 15 4 7
6s.u1 23 45 15 100 56我想根据列名的cea部分来总结df。因此,具有相同cea部分的所有行都应该求和。df应该如下所示
klcea ghcea dcea
1s.u1 2 457 5
1s.u2 2 2 6
2s.u1 0 3 3
2s.u2 171 19 7
6s.u1 68 115 56我考虑首先获得一个名为cea的cea列,然后根据row.names和相应的cea将其汇总为类似with(df, ave(cea, row.names(df), FUN = sum))的内容
我不知道如何根据字符串中的模式生成新列。我猜grepl是有用的,但我想不出什么办法,我试过df$cea <- df[grepl(colnames(df),'cea'),],这是错的……
发布于 2020-10-28 19:43:43
使用基数R,您可以从名称中提取"cea“部分,并在split.default中使用它将数据帧拆分为多个列,然后我们可以使用rowSums对每个单独的数据帧求和。
sapply(split.default(df, sub('.*_(.*cea).*', '\\1', names(df))), rowSums)
# dcea ghcea klcea
#1s.u1 5 457 2
#1s.u2 6 2 2
#2s.u1 3 3 0
#2s.u2 7 19 171
#6s.u1 56 115 68其中sub部件返回:
sub('.*_(.*cea).*', '\\1', names(df))
#[1] "klcea" "klcea" "ghcea" "ghcea" "dcea" 发布于 2020-10-28 19:48:48
使用dplyr:
> df %>% rowwise() %>% mutate(klcea = sum(c_across(ends_with('klcea'))),
+ ghcea = sum(c_across(contains('ghcea'))),
+ dcea = sum(c_across(contains('dcea')))) %>%
+ select(klcea, ghcea, dcea)
# A tibble: 5 x 3
# Rowwise:
klcea ghcea dcea
<dbl> <dbl> <dbl>
1 2 457 5
2 2 2 6
3 0 3 3
4 171 19 7
5 68 115 56如果希望保留行名:
> df %>% rownames_to_column('rn') %>% rowwise() %>% mutate(klcea = sum(c_across(ends_with('klcea'))),
+ ghcea = sum(c_across(contains('ghcea'))),
+ dcea = sum(c_across(contains('dcea')))) %>%
+ select(klcea, ghcea, dcea, rn) %>% column_to_rownames('rn')
klcea ghcea dcea
1s.u1 2 457 5
1s.u2 2 2 6
2s.u1 0 3 3
2s.u2 171 19 7
6s.u1 68 115 56
> https://stackoverflow.com/questions/64571832
复制相似问题