twitter-ebooks/bin/ebooks

218 lines
4.9 KiB
Text
Raw Normal View History

2013-11-08 06:02:05 +11:00
#!/usr/bin/env ruby
2014-05-07 16:45:17 +10:00
# encoding: utf-8
2013-11-08 06:02:05 +11:00
require 'twitter_ebooks'
require 'csv'
2013-11-08 06:02:05 +11:00
2013-11-18 02:59:15 -08:00
$debug = true
2013-11-08 06:02:05 +11:00
module Ebooks
APP_PATH = Dir.pwd # XXX do some recursive thing instead
2013-11-16 07:02:05 -08:00
def self.new(reponame)
usage = <<STR
Usage: ebooks new <reponame>
Creates a new skeleton repository defining a template bot in
the current working directory specified by <reponame>.
STR
2013-11-08 06:02:05 +11:00
2013-11-16 07:02:05 -08:00
if reponame.nil?
2013-11-08 06:02:05 +11:00
log usage
exit
end
path = "./#{reponame}"
2013-11-08 06:02:05 +11:00
if File.exists?(path)
log "#{path} already exists. Please remove if you want to recreate."
2013-11-08 06:02:05 +11:00
exit
end
FileUtils.cp_r(SKELETON_PATH, path)
2013-11-08 06:02:05 +11:00
File.open(File.join(path, 'bots.rb'), 'w') do |f|
2013-11-08 06:02:05 +11:00
template = File.read(File.join(SKELETON_PATH, 'bots.rb'))
f.write(template.gsub("{{BOT_NAME}}", reponame))
end
2013-11-16 07:02:05 -08:00
log "New twitter_ebooks app created at #{reponame}"
2013-11-08 06:02:05 +11:00
end
def self.consume(pathes)
usage = <<STR
Usage: ebooks consume <corpus_path> [corpus_path2] [...]
Processes some number of text files or json tweet corpuses
into usable models. These will be output at model/<name>.model
STR
if pathes.empty?
log usage
exit
end
2013-11-08 06:02:05 +11:00
pathes.each do |path|
filename = File.basename(path)
shortname = filename.split('.')[0..-2].join('.')
outpath = File.join(APP_PATH, 'model', "#{shortname}.model")
Model.consume(path).save(outpath)
2013-11-27 06:55:14 -08:00
log "Corpus consumed to #{outpath}"
2013-11-08 06:02:05 +11:00
end
end
def self.gen(model_path, input)
usage = <<STR
Usage: ebooks gen <model_path> [input]
Make a test tweet from the processed model at <model_path>.
Will respond to input if provided.
STR
if model_path.nil?
log usage
exit
end
2013-11-08 06:02:05 +11:00
model = Model.load(model_path)
if input && !input.empty?
puts "@cmd " + model.make_response(input, 135)
2013-11-08 06:02:05 +11:00
else
puts model.make_statement
2013-11-08 06:02:05 +11:00
end
end
def self.score(model_path, input)
usage = <<STR
Usage: ebooks score <model_path> <input>
Scores "interest" in some text input according to how
well unique keywords match the model.
STR
if model_path.nil? || input.nil?
log usage
exit
end
2013-11-08 06:02:05 +11:00
model = Model.load(model_path)
model.score_interest(input)
end
def self.archive(username, outpath)
usage = <<STR
Usage: ebooks archive <username> <outpath>
Downloads a json corpus of the <username>'s tweets to <outpath>.
Due to API limitations, this can only receive up to ~3000 tweets
into the past.
STR
if username.nil? || outpath.nil?
log usage
exit
end
2013-11-24 13:16:34 -08:00
Archive.new(username, outpath).sync
2013-11-08 06:02:05 +11:00
end
2014-04-18 23:37:25 -07:00
def self.tweet(modelpath, botname)
usage = <<STR
Usage: ebooks tweet <model_path> <botname>
2014-04-18 23:37:25 -07:00
Sends a public tweet from the specified bot using text
from the processed model at <model_path>.
2014-04-18 23:37:25 -07:00
STR
if modelpath.nil? || botname.nil?
log usage
exit
end
2013-11-08 06:02:05 +11:00
load File.join(APP_PATH, 'bots.rb')
model = Model.load(modelpath)
statement = model.make_statement
2014-04-18 23:37:25 -07:00
log "@#{botname}: #{statement}"
bot = Bot.get(botname)
2013-11-08 06:02:05 +11:00
bot.configure
bot.tweet(statement)
end
2013-11-27 05:12:54 -08:00
def self.jsonify(paths)
usage = <<STR
Usage: ebooks jsonify <old_corpus_path> [old_corpus_path2] [...]
Takes an old-style corpus of plain tweet text and converts it to json.
STR
if paths.empty?
log usage
exit
end
2013-11-27 05:12:54 -08:00
paths.each do |path|
name = File.basename(path).split('.')[0]
new_path = name + ".json"
tweets = []
id = nil
if path.split('.')[-1] == "csv" #from twitter archive
csv_archive = CSV.read(path, :headers=>:first_row)
tweets = csv_archive.map do |tweet|
{ text: tweet['text'], id: tweet['tweet_id'] }
end
else
File.read(path).split("\n").each do |l|
if l.start_with?('# ')
id = l.split('# ')[-1]
else
tweet = { text: l }
if id
tweet[:id] = id
id = nil
end
tweets << tweet
2013-11-27 05:12:54 -08:00
end
2013-11-24 13:16:34 -08:00
end
end
2013-11-27 05:12:54 -08:00
File.open(new_path, 'w') do |f|
log "Writing #{tweets.length} tweets to #{new_path}"
f.write(JSON.pretty_generate(tweets))
end
2013-11-24 13:16:34 -08:00
end
end
2013-11-08 06:02:05 +11:00
def self.command(args)
2014-04-18 23:37:25 -07:00
usage = <<STR
2014-05-06 22:17:54 -07:00
Usage:
2013-11-08 06:02:05 +11:00
ebooks new <reponame>
ebooks consume <corpus_path> [corpus_path2] [...]
2013-11-08 06:02:05 +11:00
ebooks gen <model_path> [input]
ebooks score <model_path> <input>
ebooks archive <@user> <outpath>
2014-04-18 23:37:25 -07:00
ebooks tweet <model_path> <botname>
ebooks jsonify <old_corpus_path> [old_corpus_path2] [...]
2014-04-18 23:37:25 -07:00
STR
2013-11-08 06:02:05 +11:00
if args.length == 0
log usage
exit
end
case args[0]
when "new" then new(args[1])
when "consume" then consume(args[1..-1])
when "gen" then gen(args[1], args[2..-1].join(' '))
when "score" then score(args[1], args[2..-1].join(' '))
when "archive" then archive(args[1], args[2])
when "tweet" then tweet(args[1], args[2])
2013-11-27 05:12:54 -08:00
when "jsonify" then jsonify(args[1..-1])
2014-05-06 22:17:57 -07:00
else
log usage
exit 1
2013-11-08 06:02:05 +11:00
end
end
end
Ebooks.command(ARGV)