WordPress.com

# Updated August 12, 2017# set working directorysetwd("put R working directory name here")# modify this to the path for your directory# Download kohonen package documentation from:# load packageslibrary(rmarkdown)library(knitr)library(dplyr)library(kohonen)library(dummies)library(ggplot2)library(sp)library(reshape2)library(RColorBrewer)library(magrittr)#read dataSR1 <- read.csv(file = "SR1-SOM-example.csv", head=TRUE, sep =",")#exploratory analysissummary(SR1)SR1_SOM <- SR1[, -c(1)] #remove first columnsummary(SR1_SOM)str(SR1_SOM)names(SR1_SOM)hist(SR1_SOM$EventDur)select(SR1_SOM, EventDur) %>% filter(EventDur <= 1000) -> d #select values less tha 1000hist(d$EventDur, breaks = 15)#create frequency tables of error codes by minuteEC1_table <- table(SR1_SOM$Time, SR1_SOM$EC1)margin.table(EC1_table, 1)margin.table(EC1_table, 2)plot(margin.table(EC1_table, 1), col = c("darkblue", "maroon"), xlab = "Time", ylab = "Counts") #summed over Timemargin.table(EC1_table, 1) #summed over EC1EC2_table <- table(SR1_SOM$Time, SR1_SOM$EC2)margin.table(EC2_table, 2)EC2_counts <- table(SR1_SOM$EC2, SR1_SOM$Time)barplot(EC2_counts, main="Distribution of EC2 by Minute", xlab="Time", col = rainbow(7), legend = rownames(EC2_counts), beside=TRUE)EC3_table <- table(SR1_SOM$Time, SR1_SOM$EC3)# Colour palette definitiondisplay.brewer.all()#blue color for large clustercolors <- brewer.pal(10, "Paired") #first 2 colors are bluepal <- colorRampPalette(colors)my_palette <- c(pal(10))# Palette defined by kohonen packagecoolBlueHotRed <- function(n, alpha = 1) { rainbow(n, end=4/6, alpha=alpha)[n:1]}# SOM Model ################################################## scale the data#center is TRUE then centering done by subtracting the column means (omitting NAs) of x from their #corresponding columns#scale is TRUE then scaling done by dividing the (centered) columns of x by their standard deviations#if center is TRUE, and the root mean square otherwise.SR1_SOM.sc <- scale(SR1_SOM, center = TRUE, scale = TRUE)summary(SR1_SOM.sc)set.seed(3)# you can experiment with other grid sizessom_grid <- somgrid(xdim = 15, ydim=10, topo="hexagonal")som_model <- som(SR1_SOM.sc, grid = som_grid, rlen = 100) #rlen is the number of times the complete data set will be presented to the network summary(som_model)print(som_model)# Changes by iteration (specified with rlen, try different values)plot(som_model, type = "changes", main = "SR1: SOM")#plot SOMs#counts per node - empty nodes shown in grayplot(som_model, type = "counts", main="SR1: Node Counts")#shows the sum of the distances to all immediate neighbours. #also known as a U-matrix plot.#Units near a class boundary likely to have higher average distances to their neighboursplot(som_model, type="dist.neighbours", main = "SR1: SOM neighbour distances", palette.name=grey.colors)#code spreadplot(som_model, type = "codes", main = "SR1: Codebook Vectors")#shows the mean distance of objects mapped to a unit to the codebook vector of that unit.#The smaller the distances, the better the objects are represented by the codebook vectorsplot(som_model, type = "quality", main="SR1: Node Quality/Distance")# Plot the original scale heatmap for all variables # (it will be from training set if a training dataset was created; # we did not do that, used all the variables for the model)var <- 1 #column number 1 is EventDurationplot(som_model, type = "property", property = as.data.frame(som_model$codes)[,var], main=names(som_model$SR1_SOM)[var],palette.name=coolBlueHotRed )# Plot the original scale heatmap for all variablespar(mfrow=c(2,3)) #6 plots per pagefor (i in 1:6) { var <- i #define the variable to plot var_unscaled <- aggregate(as.numeric(SR1_SOM[,var]), by=list(som_model$unit.classif), FUN=mean, simplify=TRUE)[,2] plot(som_model, type = "property", property=var_unscaled, main=names(SR1_SOM)[var], palette.name=coolBlueHotRed)#rm(var_unscaled, var)}par(mfrow=c(1,1))#Create plots by minuteSR1_SOM_4 <- filter(SR1_SOM, Time == 4)#Create SOM models by minute#Experiment with different grid size### ------------------ Clustering SOM results -------------------# Show the WCSS (within cluster sum of squares) metric for kmeans # for different clustering sizes.# Can be used as a "rough" indicator of the ideal number of clusters# --> have to convert som_model$codes from list to dataframe, else gives errormySR1_SOM <- as.data.frame((som_model$codes))wss <- ((nrow(mySR1_SOM))-1)*sum(apply(mySR1_SOM,2,var))for (i in 2:15) wss[i] <- sum(kmeans(mySR1_SOM, centers=i)$withinss)par(mar=c(5.1,4.1,4.1,2.1))plot(1:15, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares", main="Within cluster sum of squares (WCSS)")# Form clusters on grid## use hierarchical clustering to cluster the codebook vectors; ## last parameter is number of clusterssom_cluster <- cutree(hclust(dist(as.data.frame((som_model$codes)))), 8)som_cluster# Show the map with different colours for every cluster plot(som_model, type="mapping", #pch = ".", labels = as.integer(SR1_SOM$Time), col = as.integer(SR1_SOM$Time), bgcol = my_palette[som_cluster], main = "SR1 - 8 Clusters")add.cluster.boundaries(som_model, som_cluster, lwd = 5, col = "maroon")#identify.kohonen#show the same plot with the codes instead of colours and pointsplot(som_model, type="codes", codeRendering = "segments", bgcol = my_palette[som_cluster], main = "SR1 - 8 Clusters")add.cluster.boundaries(som_model, som_cluster, lwd = 3, col = "brown")identify(som_model, som_cluster)####################################################################### ................
................

In order to avoid copyright disputes, this page is only a partial summary.

To fulfill the demand for quickly locating and searching documents.

It is intelligent file search solution for home and business.

Literature Lottery

To fulfill the demand for quickly locating and searching documents.

Related download

Related searches