Network Science - California State University, East Bay



Network ScienceChapter 16This is some of the code from Chapter 16. I was able to get the PageRank code to work.library(pacman)p_load(tidyverse)library(mdsr) ## Loading required package: lattice## Loading required package: ggformula## Loading required package: ggstance## ## Attaching package: 'ggstance'## The following objects are masked from 'package:ggplot2':## ## geom_errorbarh, GeomErrorbarh## ## New to ggformula? Try the tutorials: ## learnr::run_tutorial("introduction", package = "ggformula")## learnr::run_tutorial("refining", package = "ggformula")## Loading required package: mosaicData## Loading required package: Matrix## ## Attaching package: 'Matrix'## The following objects are masked from 'package:tidyr':## ## expand, pack, unpack## ## The 'mosaic' package masks several functions from core packages in order to add ## additional features. The original behavior of these functions should not be affected by this.## ## Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.## ## In accordance with CRAN policy, the 'mdsr' package ## no longer attaches## the 'tidyverse' package automatically.## You may need to 'library(tidyverse)' in order to ## use certain functions.library(igraph) ## ## Attaching package: 'igraph'## The following objects are masked from 'package:dplyr':## ## as_data_frame, groups, union## The following objects are masked from 'package:purrr':## ## compose, simplify## The following object is masked from 'package:tidyr':## ## crossing## The following object is masked from 'package:tibble':## ## as_data_frame## The following objects are masked from 'package:stats':## ## decompose, spectrum## The following object is masked from 'package:base':## ## unionn <- 100 p_star <- log(n)/nplot_er <- function(n, p, ...) { g <- erdos.renyi.game(n, p) plot(g, main = paste("p =", round(p, 4)), vertex.frame.color = "white", vertex.size = 3, vertex.label = NA, ...) }plot_er(n, p = 0.8 * p_star)plot_er(n, p = 1.2 * p_star)n <- 1000 p_star <- log(n)/n ps <- rep(seq(from = 0, to = 2 * p_star, by = 0.001), each = 100) er_connected <- function(n, p, ...) { c(n = n, p = p, connected = is.connected(erdos.renyi.game(n, p)))}sims <- as.data.frame(t(sapply(ps, er_connected, n = n))) ggplot(data = sims, aes(x = p, y = connected)) + geom_vline(xintercept = p_star, color = "darkgray") + geom_text(x = p_star, y = 0.9, label = "Threshold value", hjust="right") + labs(x = "Probability of edge existing", y = "Probability that random graph is connected") + geom_count() + geom_smooth()## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'g1 <- erdos.renyi.game(n, p = log(n)/n) g2 <- barabasi.game(n, m = 3, directed = FALSE) summary(g1)## IGRAPH 655d47b U--- 1000 3530 -- Erdos renyi (gnp) graph## + attr: name (g/c), type (g/c), loops (g/l), p (g/n)summary(g2)## IGRAPH 3e3e88e U--- 1000 2994 -- Barabasi graph## + attr: name (g/c), power (g/n), m (g/n), zero.appeal (g/n),## | algorithm (g/c)d <- data.frame(type = rep(c("Erdos-Renyi", "Barabasi-Albert"), each = n), degree = c(degree(g1), degree(g2)))ggplot(data = d, aes(x = degree, color = type)) + geom_density(size = 2) + scale_x_continuous(limits = c(0, 25))## Warning: Removed 17 rows containing non-finite values (stat_density).Extended example: 1996 men’s college backetballTo get the example to run I downloaded the data from the kaggle website for the March Machine Learning Mania 2015. You need to have an account and your needs to log in to download the data. Note that the data file that is needed to run the example has a different name than the name in the book.library(mdsr) teams <- readr::read_csv("data/all/teams.csv") ## Parsed with column specification:## cols(## team_id = col_double(),## team_name = col_character()## )games <- readr::read_csv("data/all/regular_season_compact_results.csv") %>% filter(season == 1996) ## Parsed with column specification:## cols(## season = col_double(),## daynum = col_double(),## wteam = col_double(),## wscore = col_double(),## lteam = col_double(),## lscore = col_double(),## wloc = col_character(),## numot = col_double()## )dim(games)## [1] 4122 8E <- games %>% mutate(score_ratio = wscore/lscore) %>% select(lteam, wteam, score_ratio)V <- teams %>% filter(team_id %in% unique(c(E$lteam, E$wteam)))library(igraph) g <- graph_from_data_frame(E, directed = TRUE, vertices = V) summary(g)## IGRAPH 3515c29 DN-- 305 4122 -- ## + attr: name (v/c), team_name (v/c), score_ratio (e/n)g <- set_vertex_attr(g, "pagerank", value = page_rank(g)$vector) as_data_frame(g, what = "vertices") %>% arrange(desc(pagerank)) %>% head(20) ## name team_name pagerank## 1 1203 G Washington 0.021863566## 2 1269 Massachusetts 0.020504852## 3 1207 Georgetown 0.016415082## 4 1234 Iowa 0.014343587## 5 1163 Connecticut 0.014084209## 6 1437 Villanova 0.013089525## 7 1246 Kentucky 0.012735610## 8 1345 Purdue 0.011456116## 9 1280 Mississippi St 0.011368140## 10 1210 Georgia Tech 0.010578926## 11 1112 Arizona 0.010255324## 12 1448 Wake Forest 0.010081101## 13 1242 Kansas 0.009917330## 14 1336 Penn St 0.009752762## 15 1185 E Michigan 0.009707138## 16 1393 Syracuse 0.009563255## 17 1266 Marquette 0.009441184## 18 1314 North Carolina 0.009415234## 19 1153 Cincinnati 0.009396704## 20 1396 Temple 0.008595410wins <- E %>% group_by(wteam) %>% summarise(N = n())losses <- E %>% group_by(lteam) %>% summarise(N = n())wins %>% full_join(losses, by = c("wteam" = "lteam")) %>% left_join(teams, by = c("wteam" = "team_id")) %>% rename(wins = N.x, losses = N.y) %>% mutate(win_pct = wins / (wins + losses)) %>% arrange(desc(win_pct)) %>% head(20)## # A tibble: 20 x 5## wteam wins losses team_name win_pct## <dbl> <int> <int> <chr> <dbl>## 1 1269 31 1 Massachusetts 0.969## 2 1403 28 1 Texas Tech 0.966## 3 1163 30 2 Connecticut 0.938## 4 1246 28 2 Kentucky 0.933## 5 1180 25 3 Drexel 0.893## 6 1453 24 3 WI Green Bay 0.889## 7 1158 22 3 Col Charleston 0.88 ## 8 1307 26 4 New Mexico 0.867## 9 1153 25 4 Cincinnati 0.862## 10 1242 25 4 Kansas 0.862## 11 1172 22 4 Davidson 0.846## 12 1345 25 5 Purdue 0.833## 13 1448 23 5 Wake Forest 0.821## 14 1185 22 5 E Michigan 0.815## 15 1439 22 5 Virginia Tech 0.815## 16 1437 25 6 Villanova 0.806## 17 1112 24 6 Arizona 0.8 ## 18 1428 23 6 Utah 0.793## 19 1265 22 6 Marist 0.786## 20 1114 21 6 Ark Little Rock 0.778E %>% filter(wteam == 1269 & lteam == 1246)## # A tibble: 1 x 3## lteam wteam score_ratio## <dbl> <dbl> <dbl>## 1 1246 1269 1.12E %>% filter(lteam %in% c(1203, 1269) & wteam %in% c(1203, 1269))## # A tibble: 2 x 3## lteam wteam score_ratio## <dbl> <dbl> <dbl>## 1 1269 1203 1.13## 2 1203 1269 1.14A_10 <- c("Massachusetts", "Temple", "G Washington", "Rhode Island", "St Bonaventure", "St Joseph's PA", "Virginia Tech", "Xavier", "Dayton", "Duquesne", "La Salle", "Fordhham")a10 <- V(g)[ team_name %in% A_10 ] a <- induced_subgraph(g, vids = a10) a <- set_vertex_attr(a, "pagerank", value = page_rank(a)$vector) summary(a)## IGRAPH 52a800c DN-- 11 90 -- ## + attr: name (v/c), team_name (v/c), pagerank (v/n), score_ratio## | (e/n)library(ggnetwork) library(intergraph)a_df <- ggnetwork(a) ## Loading required package: sna## Loading required package: mon## ## Attaching package: 'mon'## The following object is masked from 'package:base':## ## order## Loading required package: network## network: Classes for Relational Data## Version 1.15 created on 2019-04-01.## copyright (c) 2005, Carter T. Butts, University of California-Irvine## Mark S. Handcock, University of California -- Los Angeles## David R. Hunter, Penn State University## Martina Morris, University of Washington## Skye Bender-deMoll, University of Washington## For citation information, type citation("network").## Type help("network-package") to get started.## ## Attaching package: 'network'## The following objects are masked from 'package:igraph':## ## %c%, %s%, add.edges, add.vertices, delete.edges,## delete.vertices, get.edge.attribute, get.edges,## get.vertex.attribute, is.bipartite, is.directed,## list.edge.attributes, list.vertex.attributes,## set.edge.attribute, set.vertex.attribute## sna: Tools for Social Network Analysis## Version 2.4 created on 2016-07-23.## copyright (c) 2005, Carter T. Butts, University of California-Irvine## For citation information, type citation("sna").## Type help(package="sna") to get started.## ## Attaching package: 'sna'## The following objects are masked from 'package:igraph':## ## betweenness, bonpow, closeness, components, degree,## dyad.census, evcent, hierarchy, is.connected, neighborhood,## triad.census## Warning in work(intergraph::asNetwork(model), ...): duplicated## edges detectedggplot(a_df, aes(x, y, xend = xend, yend = yend)) + geom_edges(aes(alpha = score_ratio), color = "lightgray", arrow = arrow(length = unit(0.2, "cm")), curvature = 0.2) + geom_nodes(aes(size = pagerank, color = pagerank), alpha = 0.6) + geom_nodetext(aes(label = team_name)) + scale_alpha_continuous(range = c(0.4, 1)) + scale_size_continuous(range = c(1, 10)) + guides(color = guide_legend("PageRank"), size=guide_legend("PageRank")) + theme_blank()P <- t(as_adjacency_matrix(a, sparse = FALSE, attr = "score_ratio"))P <- scale(P, center = FALSE, scale = colSums(P)) round(P, 2)## 1173 1182 1203 1247 1269 1348 1382 1386 1396 1439 1462## 1173 0.00 0.09 0.00 0.10 0 0.14 0.11 0.00 0.00 0.00 0.16## 1182 0.11 0.00 0.00 0.11 0 0.00 0.00 0.00 0.00 0.00 0.00## 1203 0.14 0.12 0.00 0.10 1 0.14 0.11 0.17 0.33 0.27 0.16## 1247 0.00 0.09 0.25 0.00 0 0.00 0.12 0.00 0.00 0.00 0.00## 1269 0.14 0.09 0.26 0.12 0 0.14 0.12 0.16 0.41 0.25 0.15## 1348 0.00 0.10 0.00 0.11 0 0.00 0.13 0.16 0.26 0.21 0.18## 1382 0.13 0.08 0.00 0.00 0 0.14 0.00 0.00 0.00 0.00 0.00## 1386 0.13 0.09 0.24 0.10 0 0.14 0.10 0.00 0.00 0.00 0.00## 1396 0.14 0.15 0.00 0.13 0 0.15 0.10 0.16 0.00 0.27 0.19## 1439 0.11 0.10 0.25 0.12 0 0.14 0.11 0.17 0.00 0.00 0.15## 1462 0.11 0.09 0.00 0.11 0 0.00 0.12 0.18 0.00 0.00 0.00## attr(,"scaled:scale")## 1173 1182 1203 1247 1269 1348 1382 ## 9.580291 12.193997 4.385632 10.705995 1.131579 7.620903 10.466131 ## 1386 1396 1439 1462 ## 6.573151 4.110908 5.112315 6.894009v0 <- rep(1, vcount(a)) / vcount(a) v0## [1] 0.09090909 0.09090909 0.09090909 0.09090909 0.09090909 0.09090909## [7] 0.09090909 0.09090909 0.09090909 0.09090909 0.09090909v <- v0 for (i in 1:20) { v <- P %*% v } as.vector(v)## [1] 0.02620462 0.01073061 0.28880610 0.07414360 0.18416968 0.07731754## [7] 0.01477331 0.09291498 0.08133820 0.11927412 0.03032723page_rank(a)$vect## 1173 1182 1203 1247 1269 1348 ## 0.03683789 0.02134844 0.25160534 0.07006017 0.18793707 0.07779895 ## 1382 1386 1396 1439 1462 ## 0.02541142 0.08807760 0.09042641 0.11095533 0.03954137page_rank(a, damping = 1)$vec## 1173 1182 1203 1247 1269 1348 ## 0.023832141 0.008035839 0.288239781 0.073484127 0.204591471 0.072857810 ## 1382 1386 1396 1439 1462 ## 0.011052515 0.091039461 0.084180455 0.115619832 0.027066568w <- v0 d <- 0.85 for (i in 1:20) { w <- d * P %*% w + (1 - d) * v0 }as.vector(w)## [1] 0.04033680 0.02382045 0.25181394 0.07127669 0.16836965 0.08332106## [7] 0.02933482 0.08934652 0.08765190 0.11231744 0.04241073page_rank(a, damping = 0.85)$vector## 1173 1182 1203 1247 1269 1348 ## 0.03683789 0.02134844 0.25160534 0.07006017 0.18793707 0.07779895 ## 1382 1386 1396 1439 1462 ## 0.02541142 0.08807760 0.09042641 0.11095533 0.03954137 ................
................

In order to avoid copyright disputes, this page is only a partial summary.

Google Online Preview   Download

To fulfill the demand for quickly locating and searching documents.

It is intelligent file search solution for home and business.

Literature Lottery

Related searches