Plotting texts as graphs with R and igraph

On August 5, 2010, in data, by cornelius

I’ve plotted several word association graphs for this New York Times article (1st paragraph) using R and the igraph library.

#1, random method

text-igraph-random

#2, circle method

text-igraph-circle

#3, sphere method

text-igraph-sphere

#4, spring method

text-igraph-spring

#5, fruchterman-reingold method

text-igraph-fruchterman-reingold

# 6, kamada-kawai method

text-igraph-kamada-kawai

#7, graphopt method

text-igraph-graphopt

The red vertices mark cliques. Here’s the (rough) R code for plotting such graphs:

rm(list=ls());

library("igraph");
library("Cairo");

# read parameters
print("Text-as-Graph for R 0.1");
print("------------------------------------");

print("Path (no trailing slash): ");
datafolder <- scan(file="", what="char");

print("Text file: ");
datafile <- scan(file="", what="char");

txt <- scan(paste(datafolder, datafile, sep="/"), what="char", sep="\n", encoding="UTF-8");

print("Width/Height (e.g. 1024x768): ");
res <- scan(file="", what="char");
rwidth <- unlist(strsplit(res, "x"))[1]
rheight <- unlist(strsplit(res, "x"))[2]

words <- unlist(strsplit(gsub("[[:punct:]]", " ", tolower(txt)), "[[:space:]]+"));

g.start <- 1;

g.end <- length(words) - 1;

assocs <- matrix(nrow=g.end, ncol=2)

for (i in g.start:g.end)
{
assocs[i,1] <- words[i];
assocs[i,2] <- words[i+1];
print(paste("Pass #", i, " of ", g.end, ". ", "Node word is ", toupper(words[i]), ".", sep=""));
}

print("Build graph from data frame...");
g.assocs <- graph.data.frame(assocs, directed=F);

print("Label vertices...");
V(g.assocs)$label <- V(g.assocs)$name;

print("Associate colors...");
V(g.assocs)$color <- "Gray";

print("Find cliques...");
V(g.assocs)[unlist(largest.cliques(g.assocs))]$color <- "Red";

print("Plotting random graph...");
CairoPNG(paste(datafolder, "/", "text-igraph-random.png", sep=""), width=as.numeric(rwidth), height=as.numeric(rheight));
plot(g.assocs, layout=layout.random, vertex.size=4, vertex.label.dist=0);
dev.off();

print("Plotting circle graph...");
CairoPNG(paste(datafolder, "/", "text-igraph-circle.png", sep=""), width=as.numeric(rwidth), height=as.numeric(rheight));
plot(g.assocs, layout=layout.circle, vertex.size=4, vertex.label.dist=0);
dev.off();

print("Plotting sphere graph...");
CairoPNG(paste(datafolder, "/", "text-igraph-sphere.png", sep=""), width=as.numeric(rwidth), height=as.numeric(rheight));
plot(g.assocs, layout=layout.sphere, vertex.size=4, vertex.label.dist=0);
dev.off();

print("Plotting spring graph...");
CairoPNG(paste(datafolder, "/", "text-igraph-spring.png", sep=""), width=as.numeric(rwidth), height=as.numeric(rheight));
plot(g.assocs, layout=layout.spring, vertex.size=4, vertex.label.dist=0);
dev.off();

print("Plotting fruchterman-reingold graph...");
CairoPNG(paste(datafolder, "/", "text-igraph-fruchterman-reingold.png", sep=""), width=as.numeric(rwidth), height=as.numeric(rheight));
plot(g.assocs, layout=layout.fruchterman.reingold, vertex.size=4, vertex.label.dist=0);
dev.off();

print("Plotting kamada-kawai graph...");
CairoPNG(paste(datafolder, "/", "text-igraph-kamada-kawai.png", sep=""), width=as.numeric(rwidth), height=as.numeric(rheight));
plot(g.assocs, layout=layout.kamada.kawai, vertex.size=4, vertex.label.dist=0);
dev.off();

#CairoPNG(paste(datafolder, "/", "text-igraph-reingold-tilford.png", sep=""), width=as.numeric(rwidth), height=as.numeric(rheight));
#plot(g.assocs, layout=layout.reingold.tilford, vertex.size=4, vertex.label.dist=0);
#dev.off();

print("Plotting graphopt graph...");
CairoPNG(paste(datafolder, "/", "text-igraph-graphopt.png", sep=""), width=as.numeric(rwidth), height=as.numeric(rheight));
plot(g.assocs, layout=layout.graphopt, vertex.size=4, vertex.label.dist=0);
dev.off();

print("Done!");

Tagged with:  

2 Responses to Plotting texts as graphs with R and igraph

  1. Richie Cotton says:

    Very interesting. Is there a standard method of deciding when to use which algorithm? Or do you just draw them all and see which one looks best?

    Couple of minor things:
    1. readline is arguably cleaner than scan for getting user input.
    2. [:punct:] includes apostrophes, so a more thourough analysis might require an update to the regex that splits your text.

  2. Tyler Rinker says:

    I love the idea of plotting text with R and igraph. The code you posted is more skeletal. Do you have this code located anywhere in it’s completed entirety? Thanks for sharing!

Leave a Reply

Your email address will not be published. Required fields are marked *

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>