Cox.csueastbay.edu



%+%Historical baby nameslibrary(pacman)p_load(tidyverse, babynames, mdsr, Hmisc)This dataset was created in 2014.BabynamesDist <- make_babynames_dist()## Warning: `mutate_()` is deprecated as of dplyr 0.7.0.## Please use `mutate()` instead.## See vignette('programming') for more help## This warning is displayed once every 8 hours.## Call `lifecycle::last_warnings()` to see where this warning was generated.## Warning: `filter_()` is deprecated as of dplyr 0.7.0.## Please use `filter()` instead.## See vignette('programming') for more help## This warning is displayed once every 8 hours.## Call `lifecycle::last_warnings()` to see where this warning was generated.## Warning: `select_()` is deprecated as of dplyr 0.7.0.## Please use `select()` instead.## This warning is displayed once every 8 hours.## Call `lifecycle::last_warnings()` to see where this warning was generated.## Warning: `rename_()` is deprecated as of dplyr 0.7.0.## Please use `rename()` instead.## This warning is displayed once every 8 hours.## Call `lifecycle::last_warnings()` to see where this warning was generated.head(BabynamesDist)## # A tibble: 6 x 9## year sex name n prop alive_prob count_thousands age_today## <dbl> <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl>## 1 1900 F Mary 16706 0.0526 0 16.7 114## 2 1900 F Helen 6343 0.0200 0 6.34 114## 3 1900 F Anna 6114 0.0192 0 6.11 114## 4 1900 F Marg… 5304 0.0167 0 5.30 114## 5 1900 F Ruth 4765 0.0150 0 4.76 114## 6 1900 F Eliz… 4096 0.0129 0 4.10 114## # … with 1 more variable: est_alive_today <dbl>BabynamesDist %>% filter(name == "Benjamin")## # A tibble: 205 x 9## year sex name n prop alive_prob count_thousands age_today## <dbl> <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl>## 1 1900 M Benj… 450 0.00278 0 0.45 114## 2 1901 M Benj… 343 0.00297 0.000025 0.343 113## 3 1902 M Benj… 374 0.00282 0.00005 0.374 112## 4 1903 M Benj… 324 0.00251 0.000075 0.324 111## 5 1904 M Benj… 358 0.00258 0.0001 0.358 110## 6 1905 M Benj… 379 0.00265 0.000125 0.379 109## 7 1906 M Benj… 352 0.00244 0.000150 0.352 108## 8 1907 M Benj… 460 0.00290 0.000175 0.46 107## 9 1908 M Benj… 480 0.00289 0.0002 0.48 106## 10 1909 M Benj… 489 0.00276 0.000225 0.489 105## # … with 195 more rows, and 1 more variable: est_alive_today <dbl>joseph <- BabynamesDist %>% filter(name == "Joseph" & sex == "M")name_plot <- joseph %>% ggplot(aes(x = year))name_plotname_plot <- name_plot + geom_bar(stat = "identity", aes(y = count_thousands*alive_prob), fill = "#b2d7e9", color = "white")name_plotname_plot <- name_plot + geom_line(aes(y = count_thousands), size=2)name_plotname_plot <- name_plot + ylab("Number of People (thousands)") + xlab(NULL)name_plotsummary(name_plot)## data: year, sex, name, n, prop, alive_prob, count_thousands, age_today,## est_alive_today [111x9]## mapping: x = ~year## faceting: <ggproto object: Class FacetNull, Facet, gg>## compute_layout: function## draw_back: function## draw_front: function## draw_labels: function## draw_panels: function## finish_data: function## init_scales: function## map_data: function## params: list## setup_data: function## setup_params: function## shrink: TRUE## train_scales: function## vars: function## super: <ggproto object: Class FacetNull, Facet, gg>## -----------------------------------## mapping: y = ~count_thousands * alive_prob ## geom_bar: width = NULL, na.rm = FALSE## stat_identity: na.rm = FALSE## position_stack ## ## mapping: y = ~count_thousands ## geom_line: na.rm = FALSE## stat_identity: na.rm = FALSE## position_identity?wtd.quantilemedian_yob <- with(joseph, wtd.quantile(year, weights=est_alive_today, prob = 0.5)) # The pipe %>% does not work.median_yob ## 50% ## 1975name_plot <- name_plot + geom_bar(stat = "identity", color = "white", fill = "#008fd5", aes(y = ifelse(year == median_yob, est_alive_today / 1000, 0)))name_plotname_plot + ggtitle("Age Distribution of American Boys Named Joseph") + geom_text(x = 1935, y = 40, label = "Number of Josephs\nborn each year") + geom_text(x = 1915, y = 13, label = "Number of Josephs\nborn each year\nestimated to be alive\non 1/1/214", color = "#b2d7e9") + geom_text(x = 2003, y = 40, label = "The median\nliving Joseph\nis 37 years old", color = "darkgrey") + geom_curve(x = 1995, xend = 1974, y = 40, yend = 24, arrow = arrow(length = unit(0.3, "cm")), curvature = 0.5) + ylim (0, 42)Now replace the data in the ggplot and then make the new plot.name_plot %+% filter(BabynamesDist, name == "Josephine" & sex == "F")FacetingTry the name Jessiename_plot <- name_plot + facet_wrap(~sex)name_plot %+% filter(BabynamesDist, name == "Jessie")Try it with your own name.name_plot <- name_plot + facet_wrap(~sex)name_plot %+% filter(BabynamesDist, name == "Eric")many_names_plot <- name_plot + facet_grid(name ~sex)mnp <- many_names_plot %+% filter(BabynamesDist, name %in% c("Jessie", "Marion", "Jackie"))mnpmnp + facet_grid(sex ~ name)Most common women’s namesThank you Hannah Kim!library(babynames)com_fem <- BabynamesDist %>% filter(sex == "F") %>% group_by(name) %>% summarise( N = n(), est_num_alive = sum(est_alive_today) ) %>% arrange(desc(est_num_alive)) %>% head(25) %>% select(name) %>% left_join(., BabynamesDist, by = "name") %>% group_by(name) %>% summarise( N = n(), est_num_alive = sum(est_alive_today), q1_age = wtd.quantile(age_today, weight = est_alive_today, probs = 0.25), median_age = wtd.quantile(age_today, weight = est_alive_today, probs = 0.5), q3_age = wtd.quantile(age_today, weight = est_alive_today, probs = 0.75) )## `summarise()` ungrouping output (override with `.groups` argument)## `summarise()` ungrouping output (override with `.groups` argument)w_plot <- ggplot(data = com_fem, aes(x = reorder(name, -median_age), y = median_age)) + xlab(NULL) + ylab("Age (in years)") + ggtitle("Median ages for females with the 25 most common names")w_plot <- w_plot + geom_linerange(aes(ymin = q1_age, ymax = q3_age), color = "#f3d478", size = 10, alpha = 0.8)w_plot <- w_plot + geom_point(fill = "#ed3324", colour = "white", size = 4, shape = 21)w_plot + geom_point(aes(y = 55, x = 24), fill = "#ed3324", colour = "white", size = 4, shape = 21) + geom_text(aes(y = 58, x = 24, label = " median")) + geom_text(aes(y = 26, x = 16, label = " 25th")) + geom_text(aes(y = 51, x = 16, label = "75th percentile ")) + geom_point(aes(y = 24, x = 16), shape = 17) + geom_point(aes(y = 56, x = 16), shape = 17) + coord_flip() ................
................

In order to avoid copyright disputes, this page is only a partial summary.

Google Online Preview   Download