2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#!/usr/bin/env ruby
							 | 
						
					
						
							
								
									
										
										
										
											2014-05-07 16:45:17 +10:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# encoding: utf-8
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								require 'twitter_ebooks'
							 | 
						
					
						
							
								
									
										
										
										
											2014-05-27 19:14:19 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								require 'csv'
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-18 02:59:15 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								$debug = true
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								module Ebooks
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  APP_PATH = Dir.pwd # XXX do some recursive thing instead
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-16 07:02:05 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  def self.new(reponame)
							 | 
						
					
						
							
								
									
										
										
										
											2014-04-18 22:51:33 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    usage = <<STR
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Usage: ebooks new <reponame>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Creates a new skeleton repository defining a template bot in
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								the current working directory specified by <reponame>.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								STR
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-16 07:02:05 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if reponame.nil?
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      log usage
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      exit
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-27 07:05:27 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    path = "./#{reponame}"
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-27 07:05:27 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if File.exists?(path)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      log "#{path} already exists. Please remove if you want to recreate."
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      exit
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-27 07:05:27 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    FileUtils.cp_r(SKELETON_PATH, path)
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-27 07:05:27 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    File.open(File.join(path, 'bots.rb'), 'w') do |f|
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      template = File.read(File.join(SKELETON_PATH, 'bots.rb'))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      f.write(template.gsub("{{BOT_NAME}}", reponame))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-16 07:02:05 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    log "New twitter_ebooks app created at #{reponame}"
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  def self.consume(pathes)
							 | 
						
					
						
							
								
									
										
										
										
											2014-04-18 22:51:33 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    usage = <<STR
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Usage: ebooks consume <corpus_path> [corpus_path2] [...]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Processes some number of text files or json tweet corpuses
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								into usable models. These will be output at model/<name>.model
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								STR
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if pathes.empty?
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      log usage
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      exit
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    pathes.each do |path|
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      filename = File.basename(path)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      shortname = filename.split('.')[0..-2].join('.')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      outpath = File.join(APP_PATH, 'model', "#{shortname}.model")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      Model.consume(path).save(outpath)
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-27 06:55:14 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      log "Corpus consumed to #{outpath}"
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  def self.gen(model_path, input)
							 | 
						
					
						
							
								
									
										
										
										
											2014-04-18 22:51:33 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    usage = <<STR
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Usage: ebooks gen <model_path> [input]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Make a test tweet from the processed model at <model_path>.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Will respond to input if provided.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								STR
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if model_path.nil?
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      log usage
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      exit
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    model = Model.load(model_path)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if input && !input.empty?
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-14 07:44:05 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      puts "@cmd " + model.make_response(input, 135)
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    else
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-14 07:44:05 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      puts model.make_statement
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  def self.score(model_path, input)
							 | 
						
					
						
							
								
									
										
										
										
											2014-04-18 22:51:33 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    usage = <<STR
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Usage: ebooks score <model_path> <input>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Scores "interest" in some text input according to how
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								well unique keywords match the model.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								STR
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if model_path.nil? || input.nil?
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      log usage
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      exit
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    model = Model.load(model_path)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    model.score_interest(input)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  def self.archive(username, outpath)
							 | 
						
					
						
							
								
									
										
										
										
											2014-04-18 22:51:33 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    usage = <<STR
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Usage: ebooks archive <username> <outpath>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Downloads a json corpus of the <username>'s tweets to <outpath>.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Due to API limitations, this can only receive up to ~3000 tweets
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								into the past.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								STR
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if username.nil? || outpath.nil?
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      log usage
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      exit
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-24 13:16:34 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    Archive.new(username, outpath).sync
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2014-04-18 23:37:25 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  def self.tweet(modelpath, botname)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    usage = <<STR
							 | 
						
					
						
							
								
									
										
										
										
											2014-05-11 11:31:45 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Usage: ebooks tweet <model_path> <botname>
							 | 
						
					
						
							
								
									
										
										
										
											2014-04-18 23:37:25 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2014-05-11 11:31:45 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Sends a public tweet from the specified bot using text
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								from the processed model at <model_path>.
							 | 
						
					
						
							
								
									
										
										
										
											2014-04-18 23:37:25 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								STR
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if modelpath.nil? || botname.nil?
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      log usage
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      exit
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    load File.join(APP_PATH, 'bots.rb')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    model = Model.load(modelpath)
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-14 07:44:05 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    statement = model.make_statement
							 | 
						
					
						
							
								
									
										
										
										
											2014-04-18 23:37:25 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    log "@#{botname}: #{statement}"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    bot = Bot.get(botname)
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    bot.configure
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    bot.tweet(statement)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-27 05:12:54 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  def self.jsonify(paths)
							 | 
						
					
						
							
								
									
										
										
										
											2014-04-18 22:51:33 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    usage = <<STR
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Usage: ebooks jsonify <old_corpus_path> [old_corpus_path2] [...]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Takes an old-style corpus of plain tweet text and converts it to json.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								STR
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if paths.empty?
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      log usage
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      exit
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-27 05:12:54 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    paths.each do |path|
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      name = File.basename(path).split('.')[0]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      new_path = name + ".json"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      tweets = []
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      id = nil
							 | 
						
					
						
							
								
									
										
										
										
											2014-05-27 19:14:19 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      if path.split('.')[-1] == "csv" #from twitter archive
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        csv_archive = CSV.read(path, :headers=>:first_row)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        tweets = csv_archive.map do |tweet|
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          { text: tweet['text'], id: tweet['tweet_id'] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      else
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        File.read(path).split("\n").each do |l|
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          if l.start_with?('# ')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            id = l.split('# ')[-1]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          else
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            tweet = { text: l }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if id
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								              tweet[:id] = id
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								              id = nil
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            tweets << tweet
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-27 05:12:54 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								          end
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-24 13:16:34 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-27 05:12:54 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      File.open(new_path, 'w') do |f|
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        log "Writing #{tweets.length} tweets to #{new_path}"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        f.write(JSON.pretty_generate(tweets))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      end
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-24 13:16:34 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  def self.command(args)
							 | 
						
					
						
							
								
									
										
										
										
											2014-04-18 23:37:25 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    usage = <<STR
							 | 
						
					
						
							
								
									
										
										
										
											2014-05-06 22:17:54 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Usage:
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     ebooks new <reponame>
							 | 
						
					
						
							
								
									
										
										
										
											2014-04-18 22:51:33 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								     ebooks consume <corpus_path> [corpus_path2] [...]
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     ebooks gen <model_path> [input]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     ebooks score <model_path> <input>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     ebooks archive <@user> <outpath>
							 | 
						
					
						
							
								
									
										
										
										
											2014-04-18 23:37:25 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								     ebooks tweet <model_path> <botname>
							 | 
						
					
						
							
								
									
										
										
										
											2014-04-18 22:51:33 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								     ebooks jsonify <old_corpus_path> [old_corpus_path2] [...]
							 | 
						
					
						
							
								
									
										
										
										
											2014-04-18 23:37:25 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								STR
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if args.length == 0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      log usage
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      exit
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    case args[0]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    when "new" then new(args[1])
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    when "consume" then consume(args[1..-1])
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    when "gen" then gen(args[1], args[2..-1].join(' '))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    when "score" then score(args[1], args[2..-1].join(' '))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    when "archive" then archive(args[1], args[2])
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    when "tweet" then tweet(args[1], args[2])
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-27 05:12:54 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    when "jsonify" then jsonify(args[1..-1])
							 | 
						
					
						
							
								
									
										
										
										
											2014-05-06 22:17:57 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    else
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      log usage
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      exit 1
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-08 06:02:05 +11:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Ebooks.command(ARGV)
							 |