読者です 読者をやめる 読者になる 読者になる

prediction.rb

Ruby 1000speakers

確率を予想したい人たちの説明変数を収集するのに必要な情報を収集するのに必要なプログラム。基本的にはmain.rbの使いまわし。

#!/usr/bin/ruby -Ke
$KCODE='e'
require 'rubygems'
require 'mechanize'
require 'classifier'
require 'stemmer'

load 'friend.rb'

agent = WWW::Mechanize.new
agent.max_history = 1
page = agent.get('http://twitter.com/')
config = YAML.load_file("twitter.yaml")
login_form = page.forms.first
login_form['username_or_email'] = config['username_or_email']
login_form['password'] = config['password']
agent.submit(login_form)

program_file = "program_bio.txt"
file = File.open(program_file,'r')
program =  file.read

not_program_file = "not_program_bio.txt"
file = File.open(not_program_file,'r')
not_program =  file.read

# 分類の設定
bayes_program = Classifier::Bayes.new('program','others')
wakati = MeCab::Tagger.new('-O wakati')

# 分類の学習
bayes_program.train('program', wakati.parse(program))
bayes_program.train('others', wakati.parse(not_program))

otaku_file = "otaku_bio.txt"
file = File.open(otaku_file,'r')
otaku =  file.read

not_otaku_file = "not_otaku_bio.txt"
file = File.open(not_otaku_file,'r')
not_otaku =  file.read

# 分類の設定
bayes_otaku = Classifier::Bayes.new('otaku','others')
wakati = MeCab::Tagger.new('-O wakati')

# 分類の学習
bayes_otaku.train('otaku', wakati.parse(otaku))
bayes_otaku.train('others', wakati.parse(not_otaku))


prediction = ["issm","ranha","ina_ani","daisuke_m","pgf2"]

get_friends_for_prediction(prediction)

prediction.map{|name|
  begin
    friend = Friend.new({
                          :name =>name,
                          :rewrite => 0,
                          :agent => agent,
                          :flag => 0
                        })
    friend.is_program_by_bayes = is_program_by_bayes(bayes_program,wakati.parse(friend.bio))
    friend.is_otaku_in_bio_by_bayes = is_otaku_by_bayes(bayes_otaku,wakati.parse(friend.bio))
    friend.is_otaku_in_entry_by_bayes = is_otaku_by_bayes(bayes_otaku,wakati.parse(friend.entry))
    friend.write_friends_info
  rescue => ex
    puts ex.message
    next
  end

}