R Code



R Code#Loading Datasetwd("~/R/Fun Stuff")library(readr)lon <- read_csv("london_is_changing.2019-01-15.1547566599.csv")#Loading packageslibrary(tm)library(RColorBrewer)library(wordcloud)library(udpipe)library(ggplot2)library(plyr)library(stringr)library(gridExtra)#Cleaninglon$`Why relocate` <- tolower(lon$`Why relocate`)lon$`Why relocate` <- removeNumbers(lon$`Why relocate`)lon <- subset(lon, `Why relocate` != "NA")lon <- subset(lon, `Where to` != "NA")lon <- subset(lon, `Where from` != "NA")lon <- subset(lon, `Individual household size` != "NA")lon <- subset(lon, `Relocating by choice` != "NA")#Creating language modelmodel <- udpipe_download_model(language = "english")udmodel_english <- udpipe_load_model(file = 'english-ewt-ud-2.3-181115.udpipe')#Questions#Where are people moving to and where are they coming from?where <- subset(lon, select=c(`Where to`,`Where from`))where <- data.frame(count(where))where.plot <- ggplot(data=where, aes(x=Where.to, y=freq, fill=Where.from)) + geom_bar(stat="identity", colour="black") + labs(title="By Origin", y="Number", x="Destination", fill="Origin") + theme(plot.title = element_text(hjust = 0.5,size=11), axis.title = element_text(size=10, face="bold"), legend.position="bottom", legend.direction="vertical", legend.margin=margin(t = 0, r = 0, b = 45, l = 0, unit = "pt")) + scale_fill_brewer(palette="Set1") + scale_x_discrete(labels = function(y) str_wrap(y, width = 10))#Where are people moving to by household sizehouse <- subset(lon, select=c(`Where to`,`Individual household size`))house <- data.frame(count(house))house.plot <- ggplot(data=house, aes(x=Where.to, y=freq, fill=Individual.household.size)) + geom_bar(stat="identity", colour="black") + labs(title="By Household size", y="Number", x="Destination", fill="Household Size") + theme(plot.title = element_text(hjust = 0.5,size=11), axis.title = element_text(size=10, face="bold"), legend.position="bottom", legend.direction="vertical") + scale_fill_brewer(palette="Set2") + scale_x_discrete(labels = function(y) str_wrap(y, width = 10))#Where are people moving to by choicechoice <- subset(lon, select=c(`Where to`,`Relocating by choice`))choice <- data.frame(count(choice))choice.plot <- ggplot(data=choice, aes(x=Where.to, y=freq, fill=Relocating.by.choice)) + geom_bar(stat="identity", colour="black") + labs(title="By Choice", y="Number", x="Destination", fill="Relocating by Choice") + theme(plot.title = element_text(hjust = 0.5,size=11), axis.title = element_text(size=10, face="bold"), legend.position="bottom", legend.direction="vertical", legend.margin=margin(t = 0, r = 0, b = 80, l = 0, unit = "pt")) + scale_fill_brewer(palette="Set3") + scale_x_discrete(labels = function(y) str_wrap(y, width = 10))#Comparisongrid.arrange(where.plot, choice.plot, house.plot, ncol=3, top = "Destinations of Respondents")#Why are people moving to London #Subsettingto.lon <- subset(lon, `Where to`=="Within London" & `Where from`!="Within London")#Why?why <- udpipe_annotate(udmodel_english, to.lon$`Why relocate`)why <- data.frame(why)why <- subset(why, upos != "PRON")why$phrase_tag <- as_phrasemachine(why$upos, type = "upos")stats <- keywords_phrases(why$phrase_tag, term = why$token, pattern = "(A|N)*N(P+D*(A|N)*N)*", is_regex = TRUE, ngram_max = 4, detailed = FALSE)stats <- subset(stats, ngram > 1 & freq >= 5)head(stats)## keyword ngram freq## 9 job opportunities 2 46## 17 new job 2 27## 27 work opportunities 2 19## 38 better job 2 13## 48 job prospects 2 11## 59 more opportunities 2 10#Wordcloudwordcloud(words = stats$keyword, freq = stats$freq, max.words = 350, scale=c(3,.6), random.color = TRUE,random.order=FALSE, colors=brewer.pal(8, "Dark2"), rot.per=.2) #Top 6 obvsto6 <- subset(stats, freq >= 10)to6$keyword <- factor(to6$keyword, levels = to6$keyword[order(to6$freq)])plot.to <- ggplot(data=to6, aes(x=keyword, y=freq)) + geom_bar(stat="identity", fill="#CFCFCE", colour="black") + labs(y="Frequency", x="Phrases",title="To London") + theme(plot.title = element_text(hjust = 0.5,size=11), axis.title = element_text(size=9, face="bold")) + scale_x_discrete(labels = function(y) str_wrap(y, width = 15)) + coord_flip()#Why are people relocating away from London by choice?#Subsettingaway.lon <- subset(lon, `Where from`=="Within London" & `Where to`!="Within London")#Phrase findingwhy <- udpipe_annotate(udmodel_english, away.lon$`Why relocate`)why <- data.frame(why)why <- subset(why, upos != "PRON")why$phrase_tag <- as_phrasemachine(why$upos, type = "upos")stats <- keywords_phrases(why$phrase_tag, term = why$token, pattern = "(A|N)*N(P+D*(A|N)*N)*", is_regex = TRUE, ngram_max = 4, detailed = FALSE)stats <- subset(stats, ngram > 1 & freq >= 5)head(stats)## keyword ngram freq## 14 quality of life 3 211## 28 cost of living 3 85## 33 better quality 2 77## 41 better quality of life 4 71## 58 house prices 2 46## 85 more space 2 34#Wordcloudwordcloud(words = stats$keyword, freq = stats$freq, max.words = 200, scale=c(3,.6), random.color = TRUE,random.order=FALSE, colors=brewer.pal(8, "Dark2"), rot.per=.2) #Top 6 obvsaway6 <- subset(stats, freq >= 34)away6$keyword <- factor(away6$keyword, levels = away6$keyword[order(away6$freq)])plot.away <- ggplot(data=away6, aes(x=keyword, y=freq)) + geom_bar(stat="identity", fill="#F38F18", colour="black") + labs(y="Frequency", x="Phrases", title="Away from London") + theme(plot.title = element_text(hjust = 0.5,size=11), axis.title = element_text(size=9, face="bold")) + scale_x_discrete(labels = function(y) str_wrap(y, width = 11)) + coord_flip()#Comparisongrid.arrange(plot.to, plot.away, nrow=2, top = "6 Most Common Phrases Given as Reasons for Moving") ................
................

In order to avoid copyright disputes, this page is only a partial summary.

Google Online Preview   Download