Stat. 651 ggplot2



Stat. 651 ggplot2Prof.?Eric A. Suessggplot2 exampleslibrary(tidyverse)library(mdsr)CIACountiesMake the base plot g and then add different layers on to it.head(CIACountries)## country pop area oil_prod gdp educ roadways net_users## 1 Afghanistan 32564342 652230 0 1900 NA 0.06462444 >5%## 2 Albania 3029278 28748 20510 11900 3.3 0.62613051 >35%## 3 Algeria 39542166 2381741 1420000 14500 4.3 0.04771929 >15%## 4 American Samoa 54343 199 0 13000 NA 1.21105528 <NA>## 5 Andorra 85580 468 NA 37200 NA 0.68376068 >60%## 6 Angola 19625353 1246700 1742000 7300 3.5 0.04125211 >15%# base plot gg <- CIACountries %>% ggplot(aes(y = gdp, x = educ)) g + geom_point()## Warning: Removed 64 rows containing missing values (geom_point).g + geom_point(size = 3)## Warning: Removed 64 rows containing missing values (geom_point).g + geom_point(aes(color = net_users), size = 3)## Warning: Removed 64 rows containing missing values (geom_point).# no geom_point used for the next pictureg + geom_text( aes(label = country, color = net_users), size = 3 )## Warning: Removed 64 rows containing missing values (geom_text).g + geom_point( aes(color = net_users, size = roadways) )## Warning: Removed 66 rows containing missing values (geom_point).Change the scalesg + geom_point(aes(color = net_users, size = roadways)) + coord_trans( y = "log10")## Warning: Removed 66 rows containing missing values (geom_point).g + geom_point(aes(color = net_users, size = roadways)) + scale_y_continuous(name = "Gross Domestic Product", trans = "log10")## Warning: Removed 66 rows containing missing values (geom_point).Facetingg + geom_point(alpha = 0.9, aes(size = roadways)) + coord_trans(y = "log10") + facet_wrap( ~ net_users, nrow = 1) + theme(legend.position = "top")## Warning: Removed 66 rows containing missing values (geom_point).g + geom_point(alpha = 0.9, aes(size = roadways)) + coord_trans(y = "log10") + scale_y_continuous(name = "Gross Domestic Product", trans = "log10") + facet_wrap( ~ net_users, nrow = 1) + theme(legend.position = "top")## Warning: Removed 66 rows containing missing values (geom_point).Export the data and try in Tableaugetwd()## [1] "/home/esuess/classes/2020-2021/01 - Fall 2020/Stat651/Presentations/02_ggplot2"write_csv(CIACountries, "CIACountries.csv")MedicareChargesCheck out the MEPS website for more real data.# head(MedicareCharges) # This now causes an error, remove the grouping.? MedicareChargesMedicareCharges <- ungroup(MedicareCharges)head(MedicareCharges)## # A tibble: 6 x 4## drg stateProvider num_charges mean_charge## <chr> <fct> <int> <dbl>## 1 039 AK 1 34805.## 2 039 AL 23 32044.## 3 039 AR 16 27463.## 4 039 AZ 24 33443.## 5 039 CA 67 56095.## 6 039 CO 10 35252.NJCharges <- MedicareCharges %>% filter(stateProvider == "NJ")NJCharges## # A tibble: 100 x 4## drg stateProvider num_charges mean_charge## <chr> <fct> <int> <dbl>## 1 039 NJ 31 35104.## 2 057 NJ 55 45692.## 3 064 NJ 55 87042.## 4 065 NJ 59 59576.## 5 066 NJ 56 45819.## 6 069 NJ 61 41917.## 7 074 NJ 41 42993.## 8 101 NJ 58 42314.## 9 149 NJ 50 34916.## 10 176 NJ 36 58941.## # … with 90 more rowsp <- NJCharges %>% ggplot(aes(y = mean_charge, x = reorder(drg, mean_charge))) + geom_bar(fill = "grey", stat = "identity")pp <- p + ylab("Statewide Average Charges ($)") + xlab("Medical Procedure (DRG)")pp <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1))pNow add the overall data to the plot to compare with NJ.p <- p + geom_point(data = MedicareCharges, size = 1, alpha = 0.3)pSATHere is the link to the College Board SAT website.g <- SAT_2010 %>% ggplot(aes(x = math))g + geom_histogram()## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.g + geom_histogram(binwidth = 10)g + geom_density()ggplot( data = head(SAT_2010, 10), aes( y = math, x = reorder(state, math) ) ) + geom_bar(stat = "identity")Scatterplot with tend linesg <- SAT_2010 %>% ggplot(aes(x = expenditure, y = math)) + geom_point()gg <- g + geom_smooth(method="lm", se = 0) + xlab("Average expenditure per student ($100)") + ylab("Average score on math SAT")gAdd the trend line within groups representing rate of taking the test.SAT_2010 <- SAT_2010 %>% mutate(SAT_rate = cut(sat_pct, breaks = c(0, 30, 60, 100), labels = c("low", "medium", "high") ))g <- g %+% SAT_2010gg + aes(color = SAT_rate)g +facet_wrap( ~ SAT_rate)HELPPrctHere is the link to the NSDUH website.HELPrct %>% ggplot(aes(x = homeless)) + geom_bar(aes(fill = substance), position = "fill") + coord_flip()NHANESHere is the link to the NHANES website.library(NHANES)head(NHANES)## # A tibble: 6 x 76## ID SurveyYr Gender Age AgeDecade AgeMonths Race1 Race3 Education## <int> <fct> <fct> <int> <fct> <int> <fct> <fct> <fct> ## 1 51624 2009_10 male 34 " 30-39" 409 White <NA> High Sch…## 2 51624 2009_10 male 34 " 30-39" 409 White <NA> High Sch…## 3 51624 2009_10 male 34 " 30-39" 409 White <NA> High Sch…## 4 51625 2009_10 male 4 " 0-9" 49 Other <NA> <NA> ## 5 51630 2009_10 female 49 " 40-49" 596 White <NA> Some Col…## 6 51638 2009_10 male 9 " 0-9" 115 White <NA> <NA> ## # … with 67 more variables: MaritalStatus <fct>, HHIncome <fct>,## # HHIncomeMid <int>, Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>,## # Work <fct>, Weight <dbl>, Length <dbl>, HeadCirc <dbl>, Height <dbl>,## # BMI <dbl>, BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>,## # BPSysAve <int>, BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>,## # BPDia2 <int>, BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>,## # DirectChol <dbl>, TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>,## # UrineVol2 <int>, UrineFlow2 <dbl>, Diabetes <fct>, DiabetesAge <int>,## # HealthGen <fct>, DaysPhysHlthBad <int>, DaysMentHlthBad <int>,## # LittleInterest <fct>, Depressed <fct>, nPregnancies <int>, nBabies <int>,## # Age1stBaby <int>, SleepHrsNight <int>, SleepTrouble <fct>,## # PhysActive <fct>, PhysActiveDays <int>, TVHrsDay <fct>, CompHrsDay <fct>,## # TVHrsDayChild <int>, CompHrsDayChild <int>, Alcohol12PlusYr <fct>,## # AlcoholDay <int>, AlcoholYear <int>, SmokeNow <fct>, Smoke100 <fct>,## # Smoke100n <fct>, SmokeAge <int>, Marijuana <fct>, AgeFirstMarij <int>,## # RegularMarij <fct>, AgeRegMarij <int>, HardDrugs <fct>, SexEver <fct>,## # SexAge <int>, SexNumPartnLife <int>, SexNumPartYear <int>, SameSex <fct>,## # SexOrientation <fct>, PregnantNow <fct>Take a sample first and then make the plot.sample_n(NHANES, size = 1000) %>% ggplot(aes(x = Age, y = Height, color = Gender)) + geom_point() + geom_smooth() + xlab("Age (years)") + ylab("Height (cm)")## `geom_smooth()` using method = 'loess' and formula 'y ~ x'## Warning: Removed 39 rows containing non-finite values (stat_smooth).## Warning: Removed 39 rows containing missing values (geom_point).Here is an alternative plot using all the data. This is hexbin plot.NHANES %>% ggplot(aes(x = Age, y = Height, color = Gender)) + geom_hex() + geom_smooth() + xlab("Age (years)") + ylab("Height (cm)")## Warning: Removed 353 rows containing non-finite values (stat_binhex).## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'## Warning: Removed 353 rows containing non-finite values (stat_smooth).library(mosaic)head(NHANES)## # A tibble: 6 x 76## ID SurveyYr Gender Age AgeDecade AgeMonths Race1 Race3 Education## <int> <fct> <fct> <int> <fct> <int> <fct> <fct> <fct> ## 1 51624 2009_10 male 34 " 30-39" 409 White <NA> High Sch…## 2 51624 2009_10 male 34 " 30-39" 409 White <NA> High Sch…## 3 51624 2009_10 male 34 " 30-39" 409 White <NA> High Sch…## 4 51625 2009_10 male 4 " 0-9" 49 Other <NA> <NA> ## 5 51630 2009_10 female 49 " 40-49" 596 White <NA> Some Col…## 6 51638 2009_10 male 9 " 0-9" 115 White <NA> <NA> ## # … with 67 more variables: MaritalStatus <fct>, HHIncome <fct>,## # HHIncomeMid <int>, Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>,## # Work <fct>, Weight <dbl>, Length <dbl>, HeadCirc <dbl>, Height <dbl>,## # BMI <dbl>, BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>,## # BPSysAve <int>, BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>,## # BPDia2 <int>, BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>,## # DirectChol <dbl>, TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>,## # UrineVol2 <int>, UrineFlow2 <dbl>, Diabetes <fct>, DiabetesAge <int>,## # HealthGen <fct>, DaysPhysHlthBad <int>, DaysMentHlthBad <int>,## # LittleInterest <fct>, Depressed <fct>, nPregnancies <int>, nBabies <int>,## # Age1stBaby <int>, SleepHrsNight <int>, SleepTrouble <fct>,## # PhysActive <fct>, PhysActiveDays <int>, TVHrsDay <fct>, CompHrsDay <fct>,## # TVHrsDayChild <int>, CompHrsDayChild <int>, Alcohol12PlusYr <fct>,## # AlcoholDay <int>, AlcoholYear <int>, SmokeNow <fct>, Smoke100 <fct>,## # Smoke100n <fct>, SmokeAge <int>, Marijuana <fct>, AgeFirstMarij <int>,## # RegularMarij <fct>, AgeRegMarij <int>, HardDrugs <fct>, SexEver <fct>,## # SexAge <int>, SexNumPartnLife <int>, SexNumPartYear <int>, SameSex <fct>,## # SexOrientation <fct>, PregnantNow <fct>NHANES2 <- NHANES %>% select(AgeDecade, BMI_WHO) head(NHANES2)## # A tibble: 6 x 2## AgeDecade BMI_WHO ## <fct> <fct> ## 1 " 30-39" 30.0_plus## 2 " 30-39" 30.0_plus## 3 " 30-39" 30.0_plus## 4 " 0-9" 12.0_18.5## 5 " 40-49" 30.0_plus## 6 " 0-9" 12.0_18.5NHANES2_table <- table(NHANES2)NHANES2_table## BMI_WHO## AgeDecade 12.0_18.5 18.5_to_24.9 25.0_to_29.9 30.0_plus## 0-9 873 193 28 7## 10-19 280 664 244 172## 20-29 49 526 349 418## 30-39 10 394 433 495## 40-49 26 371 475 506## 50-59 15 314 487 477## 60-69 8 199 321 373## 70+ 6 142 207 218mosaicplot(NHANES2_table, color = TRUE)Weatherlibrary(macleish)## Loading required package: etlhead(whately_2015)## # A tibble: 6 x 8## when temperature wind_speed wind_dir rel_humidity pressure## <dttm> <dbl> <dbl> <dbl> <dbl> <int>## 1 2015-01-01 00:00:00 -9.32 1.40 225. 54.6 985## 2 2015-01-01 00:10:00 -9.46 1.51 248. 55.4 985## 3 2015-01-01 00:20:00 -9.44 1.62 258. 56.2 985## 4 2015-01-01 00:30:00 -9.3 1.14 244. 56.4 985## 5 2015-01-01 00:40:00 -9.32 1.22 238. 56.9 984## 6 2015-01-01 00:50:00 -9.34 1.09 242. 57.2 984## # … with 2 more variables: solar_radiation <dbl>, rainfall <int>whately_2015 %>% ggplot(aes(x = when, y=temperature)) + geom_line(color = "darkgrey") + geom_smooth() + xlab(NULL) + ylab("Tempurature (degrees Fahrenheit)")## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'Here is the link to the choroplethr website.library(choroplethr)## Loading required package: acs## Loading required package: XML## ## Attaching package: 'acs'## The following object is masked from 'package:dplyr':## ## combine## The following object is masked from 'package:base':## ## applylibrary(choroplethrMaps)library(rUnemploymentData)animated_state_unemployment_choropleth()## [1] "All files will be written to the current working directory: /home/esuess/classes/2020-2021/01 - Fall 2020/Stat651/Presentations/02_ggplot2 . To change this use setwd()"## [1] "Now writing individual choropleth files there as 'choropleth_1.png', 'choropleth_2.png', etc."## Saving 5 x 4 in image## Saving 5 x 4 in image## Saving 5 x 4 in image## Saving 5 x 4 in image## Saving 5 x 4 in image## Saving 5 x 4 in image## Saving 5 x 4 in image## Saving 5 x 4 in image## Saving 5 x 4 in image## Saving 5 x 4 in image## Saving 5 x 4 in image## Saving 5 x 4 in image## Saving 5 x 4 in image## Saving 5 x 4 in image## [1] "Now writing code to animate all images in 'animated_choropleth.html'. Please open that file with a browser."# animated_county_unemployment_choropleth()NetworksCheck out ggnet2 or the newer ggnetworkExample 4.library(GGally)library(network)## network: Classes for Relational Data## Version 1.16.0 created on 2019-11-30.## copyright (c) 2005, Carter T. Butts, University of California-Irvine## Mark S. Handcock, University of California -- Los Angeles## David R. Hunter, Penn State University## Martina Morris, University of Washington## Skye Bender-deMoll, University of Washington## For citation information, type citation("network").## Type help("network-package") to get started.library(sna)## Loading required package: mon## ## Attaching package: 'mon'## The following object is masked from 'package:base':## ## order## sna: Tools for Social Network Analysis## Version 2.5 created on 2019-12-09.## copyright (c) 2005, Carter T. Butts, University of California-Irvine## For citation information, type citation("sna").## Type help(package="sna") to get started.library(ggplot2)# root URLr = " read nodesv = read.csv(paste0(r, "inst/extdata/nodes.tsv"), sep = "\t")names(v)## [1] "Sexe" "Prénom" "Nom" ## [4] "Groupe" "Département.d.élection" "Num.circonscription" ## [7] "Commission.permanente" "Twitter"# read edgese = read.csv(paste0(r, "inst/extdata/network.tsv"), sep = "\t")names(e)## [1] "Source" "Target"# network objectnet = network(e, directed = TRUE)# party affiliationx = data.frame(Twitter = network.vertex.names(net))x = merge(x, v, by = "Twitter", sort = FALSE)$Groupenet %v% "party" = as.character(x)# color palettey = RColorBrewer::brewer.pal(9, "Set1")[ c(3, 1, 9, 6, 8, 5, 2) ]names(y) = levels(x)# network plotggnet2(net, color = "party", palette = y, alpha = 0.75, size = 4, edge.alpha = 0.5)Review Table 3.3 on page 47 for the different kinds of plots that can be made for different kinds of x, y variables.Continue with the Extended example: Historical baby names on page 48. ................
................

In order to avoid copyright disputes, this page is only a partial summary.

Google Online Preview   Download