今年の経営工学基礎演習ではRを使ったネットワークの可視化と分析をやっているということを聞きつけました。なんだか面白そうなので、僕もやってみることにしました。ただ、データを集めるのにみんな苦労しているようなので、僕はお手軽にTwitterのfollowersのつながりを見てみることにしました。Twitterのデータ200人分くらいをぶっこぬいてきたのですが、たくさんありすぎてよく分からなかったので50人分くらいにしときましたwww。
見づらかったり間違ってるっぽいところはあるんですが、今日はこんな感じで。plotにTkとかを使うと、見づらいところを動かしたりすることもできます。
コード
コードは以下のとおり。01からなる隣接行列を作って、そこからグラフを生成しています。
#最初の行はperlで生成させといた users <- c("nipotan","uta","lurker_","tacke","kokorosha","drry","VoQn","Masaca","catlover","undo","higeorange","ot_inc","morihiro","sidus","Marubon","miyaga50","Misase","Psychs","arigamin","todesking","hirony","ueblog","ululun","yumeka","daisuke","makimoto","unjyoukairou","laddertothemoon","msng","twitter","nemotatsu","youchan","ogijun","Atsushi_Yoshida","hirose30","tomozo","wildwell","ma_shimaro","satoship","sigewo","masakiishitani","akiyan","yoshiori","nakano_h","lalha","takuchan","aerith","kaien","nacun","ono_matope","Sinonome","takesako","takimo","akio0911","onoue50","amachang","syou6162_bot","monado","kentaro","sugamasao","tabebot","tokuriki","Hamachiya2","makopin","fowler","mokana","akky","Seacolor","nirvash","mrkn","swirhen","PoohKid","skrb","yto","jazzanova","kengo","spiegel_2007","westerndog","Gonbuto","y_benjo","f_iryo1","ch1248","kotoriko","AzureStone","Horiuchi_H","woopsdez","musi06co","tomisima","uryan","Azusan","kokogiko","a_dach","yumizou","Caloriemate","nyaxt","allgreen","asane","hyuki","masuidrive","midnblue","jt_noSke","masaka","odz","kazuhide","Mizuhin","nonoriri","hamashun","otsune","kagawa","_tad_","32nm","Futaro","TERRAZI","lovecall","muzie","Lian","mobcov","Dr_Frunk","taguchi","sasakitoshinao","sesejun","kmizu","marco11","nkoz","yon3210","renakka","shinosun","yokochie","babie","infoshako","masui","sota_k","aikawa","buzztter","at_aka","hayamizu","oishi","ONa","syou6162","yomoyomo","aratakojima","pinkmac","denimu","siowulf","whitebell","redribabarn","maybowjing","yanbe","nyaho","ryo_grid","nakajo_k","katoyuu","pha","mashori","extramegane","suVene","dritoshi","mayuki","opera","tokkyo","satou30","jmworks","delphinus","kiririmode","oquno","cress_cc","isshoku","itachimaru","junm","hrnb","negaton","allegrovivace","onk","reima","foxicalty","Echos","monta_cliche","Nautilus","pal9999","himanainu_kawai","tomomoon","kkk6","kohei","nicovideojp","sakito","yoichiro","yuiseki","pianocktail","kanose","hejihogu","daftbeats","hirondelle") setwd("/home/yasuhisa/svn/public/perl/friends") getwd() my.files <- list.files() my.files[1:3] strsplit(my.files[1],"\\.")[[1]][1] length(users) for(i in 1:length(users)){ text <- try(as.vector(t(read.table(paste(users[i],".txt",sep="")))),TRUE) if(class(text)!="try-error"){ eval(parse(text=paste( "my.",users[i]," <- text",sep="" ) ) ) }else{ eval(parse(text=paste( "my.",users[i]," <- NULL",sep="" ) ) ) } } twitter <- matrix(0,length(users),length(users)) colnames(twitter) <- users rownames(twitter) <- users colnames(twitter) warnings() for(i in 1:length(users)){ user <- "" eval(parse(text=paste( "user <- ",paste("my.",users[i],sep=""),sep="" ) ) ) for(j in 1:length(user)){ twitter[i,which(colnames(twitter) == user[j])] <- 1 } } library(igraph) plot(graph.adjacency( twitter[1:50,1:50] ),layout=layout.random) n <- 50 tmp <- graph.adjacency(twitter[1:n,1:n]) tmp V(tmp)$name <- users[1:50] num <- 1:n names(num) <- users[1:n] g <- graph.empty() g <- add.vertices(g,n,name=users[1:n]) from <- (num-1)[get.edgelist(tmp)[,1]] to <- (num-1)[get.edgelist(tmp)[,2]] g <- add.edges(g,t(matrix(c(from,to),nc=2))) png("graph.jpg") plot(g,layout=layout.random,vertex.label=V(g)$name) dev.off()
反省
- データの加工に時間がかかりすぎ
- 場数の踏みかたがやっぱりまだまだ足りない
- リファレンス見るのがへたすぎ
- 調べるのにも時間かかりすぎ
あとでbenjoとかito君にもっと詳しいことを聞いてみよう!!