## id, name, sex , Case.Details , type
## 1 A3 , M, 020319 entry, 030419 exit , second gen AB-1
## 2 B5 ,male, 040819 in and 050819 out , second gen AB-2
## 3 C8 , F ,081119 in , AB-1/2, (second gen)我需要计算R中Case.Details字段中格式为"ddmmyy“的日期。我在试图获取str_detect正则表达式的下面一行中遇到了问题。请帮帮我。
library(tibble)
library(tidytext)
library(dplyr)
library(stringr)
date_counts <- df %>%
distinct(Case.Details, .keep_all = TRUE) %>%
unnest_tokens(word, Case.Details, drop = FALSE) %>%
distinct(id, word, .keep_all = TRUE) %>%
anti_join(stop_words, by = "word") %>%
filter(str_detect(word, "[^\\d]")) %>% # <--- Here is my problem
group_by(word) %>%
mutate(word_total = n()) %>%
ungroup()
> word_counts <- date_counts %>%
> count(word, sort = TRUE)
> View(date_counts) 发布于 2020-02-27 10:22:42
如果要计算case.detail列的每个元素中出现了多少个6位数字,请执行do (以R为基数):
library(stringr)
df$count = sapply( str_match_all( df$case.details, "\\b\\d{6}\\b"), length)示例:
df = structure(list(name = c("A3 ", "B5 ", "C8 "),
sex = c(" M", " male", " F "),
case.details = c(" 020319 entry, 030419 exit ", " 040819 in and 050819 out ", " 081119 in "),
type = c(" second gen AB-1", " second gen AB-2"," AB-1/2, (second gen)")), class = "data.frame", row.names = c(NA,
df$count = sapply(str_match_all(df$case.details, "\\b\\d{6}\\b"), length)结果:
> df
name sex case.details type count
1 A3 M 020319 entry, 030419 exit second gen AB-1 2
2 B5 male 040819 in and 050819 out second gen AB-2 2
3 C8 F 081119 in AB-1/2, (second gen) 1发布于 2020-02-27 10:46:06
您可以使用stringr中的str_count来计算模式在字符串中出现的次数。
使用@R. Schifini的数据
stringr::str_count(df$case.details, "\\d{6}")
#[1] 2 2 1https://stackoverflow.com/questions/60425035
复制相似问题