R Code

R Code#Loading Datasetwd("~/R/Fun Stuff")library(readr)lon <- read_csv("london_is_changing.2019-01-15.1547566599.csv")#Loading packageslibrary(tm)library(RColorBrewer)library(wordcloud)library(udpipe)library(ggplot2)library(plyr)library(stringr)library(gridExtra)#Cleaninglon$`Why relocate` <- tolower(lon$`Why relocate`)lon$`Why relocate` <- removeNumbers(lon$`Why relocate`)lon <- subset(lon, `Why relocate` != "NA")lon <- subset(lon, `Where to` != "NA")lon <- subset(lon, `Where from` != "NA")lon <- subset(lon, `Individual household size` != "NA")lon <- subset(lon, `Relocating by choice` != "NA")#Creating language modelmodel <- udpipe_download_model(language = "english")udmodel_english <- udpipe_load_model(file = 'english-ewt-ud-2.3-181115.udpipe')#Questions#Where are people moving to and where are they coming from?where <- subset(lon, select=c(`Where to`,`Where from`))where <- data.frame(count(where))where.plot <- ggplot(data=where, aes(x=Where.to, y=freq, fill=Where.from)) + geom_bar(stat="identity", colour="black") + labs(title="By Origin", y="Number", x="Destination", fill="Origin") + theme(plot.title = element_text(hjust = 0.5,size=11), axis.title = element_text(size=10, face="bold"), legend.position="bottom", legend.direction="vertical", legend.margin=margin(t = 0, r = 0, b = 45, l = 0, unit = "pt")) + scale_fill_brewer(palette="Set1") + scale_x_discrete(labels = function(y) str_wrap(y, width = 10))#Where are people moving to by household sizehouse <- subset(lon, select=c(`Where to`,`Individual household size`))house <- data.frame(count(house))house.plot <- ggplot(data=house, aes(x=Where.to, y=freq, fill=Individual.household.size)) + geom_bar(stat="identity", colour="black") + labs(title="By Household size", y="Number", x="Destination", fill="Household Size") + theme(plot.title = element_text(hjust = 0.5,size=11), axis.title = element_text(size=10, face="bold"), legend.position="bottom", legend.direction="vertical") + scale_fill_brewer(palette="Set2") + scale_x_discrete(labels = function(y) str_wrap(y, width = 10))#Where are people moving to by choicechoice <- subset(lon, select=c(`Where to`,`Relocating by choice`))choice <- data.frame(count(choice))choice.plot <- ggplot(data=choice, aes(x=Where.to, y=freq, fill=Relocating.by.choice)) + geom_bar(stat="identity", colour="black") + labs(title="By Choice", y="Number", x="Destination", fill="Relocating by Choice") + theme(plot.title = element_text(hjust = 0.5,size=11), axis.title = element_text(size=10, face="bold"), legend.position="bottom", legend.direction="vertical", legend.margin=margin(t = 0, r = 0, b = 80, l = 0, unit = "pt")) + scale_fill_brewer(palette="Set3") + scale_x_discrete(labels = function(y) str_wrap(y, width = 10))#Comparisongrid.arrange(where.plot, choice.plot, house.plot, ncol=3, top = "Destinations of Respondents")#Why are people moving to London #Subsettingto.lon <- subset(lon, `Where to`=="Within London" & `Where from`!="Within London")#Why?why <- udpipe_annotate(udmodel_english, to.lon$`Why relocate`)why <- data.frame(why)why <- subset(why, upos != "PRON")why$phrase_tag <- as_phrasemachine(why$upos, type = "upos")stats <- keywords_phrases(why$phrase_tag, term = why$token, pattern = "(A|N)*N(P+D*(A|N)*N)*", is_regex = TRUE, ngram_max = 4, detailed = FALSE)stats <- subset(stats, ngram > 1 & freq >= 5)head(stats)## keyword ngram freq## 9 job opportunities 2 46## 17 new job 2 27## 27 work opportunities 2 19## 38 better job 2 13## 48 job prospects 2 11## 59 more opportunities 2 10#Wordcloudwordcloud(words = stats$keyword, freq = stats$freq, max.words = 350, scale=c(3,.6), random.color = TRUE,random.order=FALSE, colors=brewer.pal(8, "Dark2"), rot.per=.2) #Top 6 obvsto6 <- subset(stats, freq >= 10)to6$keyword <- factor(to6$keyword, levels = to6$keyword[order(to6$freq)])plot.to <- ggplot(data=to6, aes(x=keyword, y=freq)) + geom_bar(stat="identity", fill="#CFCFCE", colour="black") + labs(y="Frequency", x="Phrases",title="To London") + theme(plot.title = element_text(hjust = 0.5,size=11), axis.title = element_text(size=9, face="bold")) + scale_x_discrete(labels = function(y) str_wrap(y, width = 15)) + coord_flip()#Why are people relocating away from London by choice?#Subsettingaway.lon <- subset(lon, `Where from`=="Within London" & `Where to`!="Within London")#Phrase findingwhy <- udpipe_annotate(udmodel_english, away.lon$`Why relocate`)why <- data.frame(why)why <- subset(why, upos != "PRON")why$phrase_tag <- as_phrasemachine(why$upos, type = "upos")stats <- keywords_phrases(why$phrase_tag, term = why$token, pattern = "(A|N)*N(P+D*(A|N)*N)*", is_regex = TRUE, ngram_max = 4, detailed = FALSE)stats <- subset(stats, ngram > 1 & freq >= 5)head(stats)## keyword ngram freq## 14 quality of life 3 211## 28 cost of living 3 85## 33 better quality 2 77## 41 better quality of life 4 71## 58 house prices 2 46## 85 more space 2 34#Wordcloudwordcloud(words = stats$keyword, freq = stats$freq, max.words = 200, scale=c(3,.6), random.color = TRUE,random.order=FALSE, colors=brewer.pal(8, "Dark2"), rot.per=.2) #Top 6 obvsaway6 <- subset(stats, freq >= 34)away6$keyword <- factor(away6$keyword, levels = away6$keyword[order(away6$freq)])plot.away <- ggplot(data=away6, aes(x=keyword, y=freq)) + geom_bar(stat="identity", fill="#F38F18", colour="black") + labs(y="Frequency", x="Phrases", title="Away from London") + theme(plot.title = element_text(hjust = 0.5,size=11), axis.title = element_text(size=9, face="bold")) + scale_x_discrete(labels = function(y) str_wrap(y, width = 11)) + coord_flip()#Comparisongrid.arrange(plot.to, plot.away, nrow=2, top = "6 Most Common Phrases Given as Reasons for Moving") ................
................

In order to avoid copyright disputes, this page is only a partial summary.

To fulfill the demand for quickly locating and searching documents.

It is intelligent file search solution for home and business.

Literature Lottery

To fulfill the demand for quickly locating and searching documents.

Related download

Related searches