我离开R有几个月了,所以这可能会有一些后果。
我在互联网上找到了这个数据集。我对它进行了一些处理,所以我将在这里对它进行dput(),但它最初来自https://ourworldindata.org/terrorism。
> dput(ter)
structure(list(region = c("Afghanistan", "Albania", "Algeria",
"Angola", "Argentina", "Australasia & Oceania", "Australia",
"Austria", "Azerbaijan", "Bahrain", "Bangladesh", "Belgium",
"Brazil", "Burkina Faso", "Burundi", "Cameroon", "Canada", "Central African Republic",
"Central America & Caribbean", "Central Asia", "Chad", "Chile",
"China", "Colombia", "Cote d'Ivoire", "Czech Republic", "Democratic Republic of the Congo",
"Djibouti", "Dominican Republic", "East Asia", "Eastern Europe",
"Ecuador", "Egypt", "Ethiopia", "Finland", "France", "Gabon",
"Georgia", "Germany", "Greece", "Honduras", "India", "Indonesia",
"Iran", "Iraq", "Ireland", "Israel", "Italy", "Jamaica", "Jordan",
"Kenya", "Kosovo", "Kyrgyzstan", "Laos", "Latvia", "Lebanon",
"Liberia", "Libya", "Malawi", "Malaysia", "Maldives", "Mali",
"Malta", "Mexico", "Middle East & North Africa", "Mozambique",
"Myanmar", "Nepal", "Netherlands", "Niger", "Nigeria", "North America",
"Macedonia", "Norway", "Pakistan", "Palestine", "Papua New Guinea",
"Paraguay", "Peru", "Philippines", "Poland", "Russia", "Rwanda",
"Saudi Arabia", "Serbia", "Sierra Leone", "Somalia", "South Africa",
"South America", "South Asia", "South Sudan", "Southeast Asia",
"Spain", "Sri Lanka", "Sub-Saharan Africa", "Sudan", "Sweden",
"Syria", "Taiwan", "Tajikistan", "Tanzania", "Thailand", "Tunisia",
"Turkey", "Uganda", "Ukraine", "UK", "USA", "Venezuela", "Vietnam",
"Western Europe", "World", "Yemen", "Zambia", "Zimbabwe"), Code = c("AFG",
"ALB", "DZA", "AGO", "ARG", NA, "AUS", "AUT", "AZE", "BHR", "BGD",
"BEL", "BRA", "BFA", "BDI", "CMR", "CAN", "CAF", NA, NA, "TCD",
"CHL", "CHN", "COL", "CIV", "CZE", "COD", "DJI", "DOM", NA, NA,
"ECU", "EGY", "ETH", "FIN", "FRA", "GAB", "GEO", "DEU", "GRC",
"HND", "IND", "IDN", "IRN", "IRQ", "IRL", "ISR", "ITA", "JAM",
"JOR", "KEN", "OWID_KOS", "KGZ", "LAO", "LVA", "LBN", "LBR",
"LBY", "MWI", "MYS", "MDV", "MLI", "MLT", "MEX", NA, "MOZ", "MMR",
"NPL", "NLD", "NER", "NGA", NA, "MKD", "NOR", "PAK", "PSE", "PNG",
"PRY", "PER", "PHL", "POL", "RUS", "RWA", "SAU", "SRB", "SLE",
"SOM", "ZAF", NA, NA, "SSD", NA, "ESP", "LKA", NA, "SDN", "SWE",
"SYR", "TWN", "TJK", "TZA", "THA", "TUN", "TUR", "UGA", "UKR",
"GBR", "USA", "VEN", "VNM", NA, "OWID_WRL", "YEM", "ZMB", "ZWE"
), Year = c(2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2017), `Terrorism fatalities (GTD, 2018)` = c(6092,
0, 12, 7, 0, 4, 4, 2, 5, 6, 25, 2, 0, 53, 20, 228, 6, 601, 4,
6, 62, 0, 16, 84, 3, 0, 596, 0, 2, 16, 101, 0, 877, 67, 2, 7,
0, 0, 1, 0, 2, 465, 20, 39, 6476, 0, 3, 0, 0, 4, 126, 0, 0, 1,
0, 17, 0, 289, 0, 4, 1, 361, 1, 23, 10819, 22, 218, 4, 0, 148,
1805, 124, 0, 0, 1076, 50, 0, 4, 8, 496, 0, 61, 2, 31, 0, 0,
1912, 21, 101, 7664, 581, 811, 21, 1, 6712, 82, 5, 2026, 0, 1,
8, 72, 5, 222, 7, 40, 42, 95, 5, 0, 83, 26445, 762, 0, 0)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -115L), spec = structure(list(
cols = list(Entity = structure(list(), class = c("collector_character",
"collector")), Code = structure(list(), class = c("collector_character",
"collector")), Year = structure(list(), class = c("collector_double",
"collector")), `Terrorism fatalities (GTD, 2018)` = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"))我尝试了很多东西,总是缩小重要的代码。最后,我使用了来自vignette("forcats")的示例代码(我加载了tidyverse ),并在不同的数据集上进行了尝试:
starwars %>%
mutate(skin_color = fct_lump(skin_color, n = 5)) %>%
count(skin_color, sort = TRUE)
ter %>%
mutate(hair = fct_lump(region, n = 5)) %>%
count(hair, sort = TRUE)
gss_cat %>%
mutate(relig = fct_lump(relig, n = 5)) %>%
count(relig, sort = TRUE)它在starwars和gss_cat上都能正常工作,但在ter (我的数据)上就不行了:
> ter %>%
+ mutate(hair = fct_lump(region, n = 5)) %>%
+ count(hair, sort = TRUE)
# A tibble: 115 x 2
hair n
<fct> <int>
1 Afghanistan 1
2 Albania 1
3 Algeria 1
4 Angola 1
5 Argentina 1
6 Australasia & Oceania 1
7 Australia 1
8 Austria 1
9 Azerbaijan 1
10 Bahrain 1
# … with 105 more rows为什么会发生这种情况?为什么fct_lump()不在这里工作?
发布于 2020-12-11 23:03:58
看起来你希望将死亡人数少于5人的地区归入“其他”类别。这在base R中很简单
ter$region <- as.character(ter$region)
ter$region[which(ter$`Terrorism fatalities (GTD, 2018)` < 5)] <- "Other"
ter$region <- factor(ter$region)如果您愿意,您可以使用forcats根据死亡率级别重新设置级别:
ter$region <- fct_reorder(ter$region, ter$`Terrorism fatalities (GTD, 2018)`)
ggplot(ter, aes(region, `Terrorism fatalities (GTD, 2018)`)) +
geom_col() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))

或者,如果您运行上面的代码,但将死亡率小于500的所有区域汇总在一起,您会得到:

https://stackoverflow.com/questions/65253329
复制相似问题