iGraphでTwitterのネットワークを表してみる

今年の経営工学基礎演習ではRを使ったネットワークの可視化と分析をやっているということを聞きつけました。なんだか面白そうなので、僕もやってみることにしました。ただ、データを集めるのにみんな苦労しているようなので、僕はお手軽にTwitterのfollowersのつながりを見てみることにしました。Twitterのデータ200人分くらいをぶっこぬいてきたのですが、たくさんありすぎてよく分からなかったので50人分くらいにしときましたwww。

見づらかったり間違ってるっぽいところはあるんですが、今日はこんな感じで。plotにTkとかを使うと、見づらいところを動かしたりすることもできます。

コード

コードは以下のとおり。01からなる隣接行列を作って、そこからグラフを生成しています。

#最初の行はperlで生成させといた
users <- c("nipotan","uta","lurker_","tacke","kokorosha","drry","VoQn","Masaca","catlover","undo","higeorange","ot_inc","morihiro","sidus","Marubon","miyaga50","Misase","Psychs","arigamin","todesking","hirony","ueblog","ululun","yumeka","daisuke","makimoto","unjyoukairou","laddertothemoon","msng","twitter","nemotatsu","youchan","ogijun","Atsushi_Yoshida","hirose30","tomozo","wildwell","ma_shimaro","satoship","sigewo","masakiishitani","akiyan","yoshiori","nakano_h","lalha","takuchan","aerith","kaien","nacun","ono_matope","Sinonome","takesako","takimo","akio0911","onoue50","amachang","syou6162_bot","monado","kentaro","sugamasao","tabebot","tokuriki","Hamachiya2","makopin","fowler","mokana","akky","Seacolor","nirvash","mrkn","swirhen","PoohKid","skrb","yto","jazzanova","kengo","spiegel_2007","westerndog","Gonbuto","y_benjo","f_iryo1","ch1248","kotoriko","AzureStone","Horiuchi_H","woopsdez","musi06co","tomisima","uryan","Azusan","kokogiko","a_dach","yumizou","Caloriemate","nyaxt","allgreen","asane","hyuki","masuidrive","midnblue","jt_noSke","masaka","odz","kazuhide","Mizuhin","nonoriri","hamashun","otsune","kagawa","_tad_","32nm","Futaro","TERRAZI","lovecall","muzie","Lian","mobcov","Dr_Frunk","taguchi","sasakitoshinao","sesejun","kmizu","marco11","nkoz","yon3210","renakka","shinosun","yokochie","babie","infoshako","masui","sota_k","aikawa","buzztter","at_aka","hayamizu","oishi","ONa","syou6162","yomoyomo","aratakojima","pinkmac","denimu","siowulf","whitebell","redribabarn","maybowjing","yanbe","nyaho","ryo_grid","nakajo_k","katoyuu","pha","mashori","extramegane","suVene","dritoshi","mayuki","opera","tokkyo","satou30","jmworks","delphinus","kiririmode","oquno","cress_cc","isshoku","itachimaru","junm","hrnb","negaton","allegrovivace","onk","reima","foxicalty","Echos","monta_cliche","Nautilus","pal9999","himanainu_kawai","tomomoon","kkk6","kohei","nicovideojp","sakito","yoichiro","yuiseki","pianocktail","kanose","hejihogu","daftbeats","hirondelle")

setwd("/home/yasuhisa/svn/public/perl/friends")
getwd()

my.files <-  list.files()
my.files[1:3]
strsplit(my.files[1],"\\.")[[1]][1]
length(users)

for(i in 1:length(users)){
  text <- try(as.vector(t(read.table(paste(users[i],".txt",sep="")))),TRUE)
  if(class(text)!="try-error"){
    eval(parse(text=paste(
                 "my.",users[i]," <- text",sep=""
                 )
               )
         )
  }else{
    eval(parse(text=paste(
                 "my.",users[i]," <- NULL",sep=""
                 )
               )
         )
  }
}

twitter <- matrix(0,length(users),length(users))
colnames(twitter) <- users
rownames(twitter) <- users

colnames(twitter)

warnings()

for(i in 1:length(users)){
  user <- ""
  eval(parse(text=paste(
               "user <- ",paste("my.",users[i],sep=""),sep=""
               )
             )
       )
  for(j in 1:length(user)){
    twitter[i,which(colnames(twitter) == user[j])] <- 1
  }
}

library(igraph)

plot(graph.adjacency( twitter[1:50,1:50] ),layout=layout.random)

n <- 50
    
tmp <- graph.adjacency(twitter[1:n,1:n])
tmp    
V(tmp)$name <- users[1:50]
num <- 1:n
names(num) <- users[1:n]
g <- graph.empty()
g <- add.vertices(g,n,name=users[1:n])
from <- (num-1)[get.edgelist(tmp)[,1]]
to <- (num-1)[get.edgelist(tmp)[,2]]

g <- add.edges(g,t(matrix(c(from,to),nc=2)))
png("graph.jpg")
plot(g,layout=layout.random,vertex.label=V(g)$name)
dev.off()

反省

  • データの加工に時間がかかりすぎ
    • 場数の踏みかたがやっぱりまだまだ足りない
  • リファレンス見るのがへたすぎ
    • 調べるのにも時間かかりすぎ

あとでbenjoとかito君にもっと詳しいことを聞いてみよう!!