確率を予想したい人たちの説明変数を収集するのに必要な情報を収集するのに必要なプログラム。基本的にはmain.rbの使いまわし。
#!/usr/bin/ruby -Ke $KCODE='e' require 'rubygems' require 'mechanize' require 'classifier' require 'stemmer' load 'friend.rb' agent = WWW::Mechanize.new agent.max_history = 1 page = agent.get('http://twitter.com/') config = YAML.load_file("twitter.yaml") login_form = page.forms.first login_form['username_or_email'] = config['username_or_email'] login_form['password'] = config['password'] agent.submit(login_form) program_file = "program_bio.txt" file = File.open(program_file,'r') program = file.read not_program_file = "not_program_bio.txt" file = File.open(not_program_file,'r') not_program = file.read # 分類の設定 bayes_program = Classifier::Bayes.new('program','others') wakati = MeCab::Tagger.new('-O wakati') # 分類の学習 bayes_program.train('program', wakati.parse(program)) bayes_program.train('others', wakati.parse(not_program)) otaku_file = "otaku_bio.txt" file = File.open(otaku_file,'r') otaku = file.read not_otaku_file = "not_otaku_bio.txt" file = File.open(not_otaku_file,'r') not_otaku = file.read # 分類の設定 bayes_otaku = Classifier::Bayes.new('otaku','others') wakati = MeCab::Tagger.new('-O wakati') # 分類の学習 bayes_otaku.train('otaku', wakati.parse(otaku)) bayes_otaku.train('others', wakati.parse(not_otaku)) prediction = ["issm","ranha","ina_ani","daisuke_m","pgf2"] get_friends_for_prediction(prediction) prediction.map{|name| begin friend = Friend.new({ :name =>name, :rewrite => 0, :agent => agent, :flag => 0 }) friend.is_program_by_bayes = is_program_by_bayes(bayes_program,wakati.parse(friend.bio)) friend.is_otaku_in_bio_by_bayes = is_otaku_by_bayes(bayes_otaku,wakati.parse(friend.bio)) friend.is_otaku_in_entry_by_bayes = is_otaku_by_bayes(bayes_otaku,wakati.parse(friend.entry)) friend.write_friends_info rescue => ex puts ex.message next end }