twitter-ebooks/bin/ebooks

#!/usr/bin/env ruby
# encoding: utf-8

require 'twitter_ebooks'
require 'csv'

$debug = true

module Ebooks
  APP_PATH = Dir.pwd # XXX do some recursive thing instead

  def self.new(reponame)
    usage = <<STR
Usage: ebooks new <reponame>

Creates a new skeleton repository defining a template bot in
the current working directory specified by <reponame>.
STR

    if reponame.nil?
      log usage
      exit
    end

    path = "./#{reponame}"

    if File.exists?(path)
      log "#{path} already exists. Please remove if you want to recreate."
      exit
    end

    FileUtils.cp_r(SKELETON_PATH, path)

    File.open(File.join(path, 'bots.rb'), 'w') do |f|
      template = File.read(File.join(SKELETON_PATH, 'bots.rb'))
      f.write(template.gsub("{{BOT_NAME}}", reponame))
    end

    log "New twitter_ebooks app created at #{reponame}"
  end

  def self.consume(pathes)
    usage = <<STR
Usage: ebooks consume <corpus_path> [corpus_path2] [...]

Processes some number of text files or json tweet corpuses
into usable models. These will be output at model/<name>.model
STR

    if pathes.empty?
      log usage
      exit
    end

    pathes.each do |path|
      filename = File.basename(path)
      shortname = filename.split('.')[0..-2].join('.')

      outpath = File.join(APP_PATH, 'model', "#{shortname}.model")
      Model.consume(path).save(outpath)
      log "Corpus consumed to #{outpath}"
    end
  end

  def self.gen(model_path, input)
    usage = <<STR
Usage: ebooks gen <model_path> [input]

Make a test tweet from the processed model at <model_path>.
Will respond to input if provided.
STR
    if model_path.nil?
      log usage
      exit
    end

    model = Model.load(model_path)
    if input && !input.empty?
      puts "@cmd " + model.make_response(input, 135)
    else
      puts model.make_statement
    end
  end

  def self.score(model_path, input)
    usage = <<STR
Usage: ebooks score <model_path> <input>

Scores "interest" in some text input according to how
well unique keywords match the model.
STR
    if model_path.nil? || input.nil?
      log usage
      exit
    end

    model = Model.load(model_path)
    model.score_interest(input)
  end

  def self.archive(username, outpath)
    usage = <<STR
Usage: ebooks archive <username> <outpath>

Downloads a json corpus of the <username>'s tweets to <outpath>.
Due to API limitations, this can only receive up to ~3000 tweets
into the past.
STR

    if username.nil? || outpath.nil?
      log usage
      exit
    end

    Archive.new(username, outpath).sync
  end

  def self.tweet(modelpath, botname)
    usage = <<STR
Usage: ebooks tweet <model_path> <botname>

Sends a public tweet from the specified bot using text
from the processed model at <model_path>.
STR

    if modelpath.nil? || botname.nil?
      log usage
      exit
    end

    load File.join(APP_PATH, 'bots.rb')
    model = Model.load(modelpath)
    statement = model.make_statement
    log "@#{botname}: #{statement}"
    bot = Bot.get(botname)
    bot.configure
    bot.tweet(statement)
  end

  def self.jsonify(paths)
    usage = <<STR
Usage: ebooks jsonify <old_corpus_path> [old_corpus_path2] [...]

Takes an old-style corpus of plain tweet text and converts it to json.
STR

    if paths.empty?
      log usage
      exit
    end

    paths.each do |path|
      name = File.basename(path).split('.')[0]
      new_path = name + ".json"

      tweets = []
      id = nil
      if path.split('.')[-1] == "csv" #from twitter archive
        csv_archive = CSV.read(path, :headers=>:first_row)
        tweets = csv_archive.map do |tweet|
          { text: tweet['text'], id: tweet['tweet_id'] }
        end
      else
        File.read(path).split("\n").each do |l|
          if l.start_with?('# ')
            id = l.split('# ')[-1]
          else
            tweet = { text: l }
            if id
              tweet[:id] = id
              id = nil
            end
            tweets << tweet
          end
        end
      end

      File.open(new_path, 'w') do |f|
        log "Writing #{tweets.length} tweets to #{new_path}"
        f.write(JSON.pretty_generate(tweets))
      end
    end
  end

  def self.command(args)
    usage = <<STR
Usage:
     ebooks new <reponame>
     ebooks consume <corpus_path> [corpus_path2] [...]
     ebooks gen <model_path> [input]
     ebooks score <model_path> <input>
     ebooks archive <@user> <outpath>
     ebooks tweet <model_path> <botname>
     ebooks jsonify <old_corpus_path> [old_corpus_path2] [...]
STR

    if args.length == 0
      log usage
      exit
    end

    case args[0]
    when "new" then new(args[1])
    when "consume" then consume(args[1..-1])
    when "gen" then gen(args[1], args[2..-1].join(' '))
    when "score" then score(args[1], args[2..-1].join(' '))
    when "archive" then archive(args[1], args[2])
    when "tweet" then tweet(args[1], args[2])
    when "jsonify" then jsonify(args[1..-1])
    else
      log usage
      exit 1
    end
  end
end

Ebooks.command(ARGV)
Github time! 2013-11-08 06:02:05 +11:00			`#!/usr/bin/env ruby`
2.2.5 - encoding: utf-8 2014-05-07 16:45:17 +10:00			`# encoding: utf-8`
Github time! 2013-11-08 06:02:05 +11:00
			`require 'twitter_ebooks'`
jsonify: Support tweets.csv Allow official archive tweets.csv as input to create Ebooks::Archive compatible JSON archive 2014-05-27 19:14:19 -04:00			`require 'csv'`
Github time! 2013-11-08 06:02:05 +11:00
Retry limit and mention separation 2013-11-18 02:59:15 -08:00			`$debug = true`

Github time! 2013-11-08 06:02:05 +11:00			`module Ebooks`
			`APP_PATH = Dir.pwd # XXX do some recursive thing instead`

2.1.0 - Fix skeleton init 2013-11-16 07:02:05 -08:00			`def self.new(reponame)`
Add expanded usage information to commands 2014-04-18 22:51:33 -07:00			`usage = <<STR`
			`Usage: ebooks new <reponame>`

			`Creates a new skeleton repository defining a template bot in`
			`the current working directory specified by <reponame>.`
			`STR`
Github time! 2013-11-08 06:02:05 +11:00
2.1.0 - Fix skeleton init 2013-11-16 07:02:05 -08:00			`if reponame.nil?`
Github time! 2013-11-08 06:02:05 +11:00			`log usage`
			`exit`
			`end`

Use reponame instead of path in bots.rb 2013-11-27 07:05:27 -08:00			`path = "./#{reponame}"`
Github time! 2013-11-08 06:02:05 +11:00
Use reponame instead of path in bots.rb 2013-11-27 07:05:27 -08:00			`if File.exists?(path)`
			`log "#{path} already exists. Please remove if you want to recreate."`
Github time! 2013-11-08 06:02:05 +11:00			`exit`
			`end`

Use reponame instead of path in bots.rb 2013-11-27 07:05:27 -08:00			`FileUtils.cp_r(SKELETON_PATH, path)`
Github time! 2013-11-08 06:02:05 +11:00
Use reponame instead of path in bots.rb 2013-11-27 07:05:27 -08:00			`File.open(File.join(path, 'bots.rb'), 'w') do \|f\|`
Github time! 2013-11-08 06:02:05 +11:00			`template = File.read(File.join(SKELETON_PATH, 'bots.rb'))`
			`f.write(template.gsub("{{BOT_NAME}}", reponame))`
			`end`

2.1.0 - Fix skeleton init 2013-11-16 07:02:05 -08:00			`log "New twitter_ebooks app created at #{reponame}"`
Github time! 2013-11-08 06:02:05 +11:00			`end`

			`def self.consume(pathes)`
Add expanded usage information to commands 2014-04-18 22:51:33 -07:00			`usage = <<STR`
			`Usage: ebooks consume <corpus_path> [corpus_path2] [...]`

			`Processes some number of text files or json tweet corpuses`
			`into usable models. These will be output at model/<name>.model`
			`STR`

			`if pathes.empty?`
			`log usage`
			`exit`
			`end`

Github time! 2013-11-08 06:02:05 +11:00			`pathes.each do \|path\|`
			`filename = File.basename(path)`
			`shortname = filename.split('.')[0..-2].join('.')`

			`outpath = File.join(APP_PATH, 'model', "#{shortname}.model")`
			`Model.consume(path).save(outpath)`
Lengthy README 2013-11-27 06:55:14 -08:00			`log "Corpus consumed to #{outpath}"`
Github time! 2013-11-08 06:02:05 +11:00			`end`
			`end`

			`def self.gen(model_path, input)`
Add expanded usage information to commands 2014-04-18 22:51:33 -07:00			`usage = <<STR`
			`Usage: ebooks gen <model_path> [input]`

			`Make a test tweet from the processed model at <model_path>.`
			`Will respond to input if provided.`
			`STR`
			`if model_path.nil?`
			`log usage`
			`exit`
			`end`

Github time! 2013-11-08 06:02:05 +11:00			`model = Model.load(model_path)`
			`if input && !input.empty?`
2.0.8 -- different generation algorithm 2013-11-14 07:44:05 -08:00			`puts "@cmd " + model.make_response(input, 135)`
Github time! 2013-11-08 06:02:05 +11:00			`else`
2.0.8 -- different generation algorithm 2013-11-14 07:44:05 -08:00			`puts model.make_statement`
Github time! 2013-11-08 06:02:05 +11:00			`end`
			`end`

			`def self.score(model_path, input)`
Add expanded usage information to commands 2014-04-18 22:51:33 -07:00			`usage = <<STR`
			`Usage: ebooks score <model_path> <input>`

			`Scores "interest" in some text input according to how`
			`well unique keywords match the model.`
			`STR`
			`if model_path.nil? \|\| input.nil?`
			`log usage`
			`exit`
			`end`

Github time! 2013-11-08 06:02:05 +11:00			`model = Model.load(model_path)`
			`model.score_interest(input)`
			`end`

			`def self.archive(username, outpath)`
Add expanded usage information to commands 2014-04-18 22:51:33 -07:00			`usage = <<STR`
			`Usage: ebooks archive <username> <outpath>`

			`Downloads a json corpus of the <username>'s tweets to <outpath>.`
			`Due to API limitations, this can only receive up to ~3000 tweets`
			`into the past.`
			`STR`

			`if username.nil? \|\| outpath.nil?`
			`log usage`
			`exit`
			`end`

2.1.3 - better archiver 2013-11-24 13:16:34 -08:00			`Archive.new(username, outpath).sync`
Github time! 2013-11-08 06:02:05 +11:00			`end`

And self.tweet 2014-04-18 23:37:25 -07:00			`def self.tweet(modelpath, botname)`
			`usage = <<STR`
Corrected usage information for `ebooks tweet` Usage information for `ebooks tweet` was incorrect, replaced with relevant explanation. Fixes mispy/twitter_ebooks/issues/16 2014-05-11 11:31:45 +01:00			`Usage: ebooks tweet <model_path> <botname>`
And self.tweet 2014-04-18 23:37:25 -07:00
Corrected usage information for `ebooks tweet` Usage information for `ebooks tweet` was incorrect, replaced with relevant explanation. Fixes mispy/twitter_ebooks/issues/16 2014-05-11 11:31:45 +01:00			`Sends a public tweet from the specified bot using text`
			`from the processed model at <model_path>.`
And self.tweet 2014-04-18 23:37:25 -07:00			`STR`

			`if modelpath.nil? \|\| botname.nil?`
			`log usage`
			`exit`
			`end`

Github time! 2013-11-08 06:02:05 +11:00			`load File.join(APP_PATH, 'bots.rb')`
			`model = Model.load(modelpath)`
2.0.8 -- different generation algorithm 2013-11-14 07:44:05 -08:00			`statement = model.make_statement`
And self.tweet 2014-04-18 23:37:25 -07:00			`log "@#{botname}: #{statement}"`
			`bot = Bot.get(botname)`
Github time! 2013-11-08 06:02:05 +11:00			`bot.configure`
			`bot.tweet(statement)`
			`end`

Allow consumption of json archives 2013-11-27 05:12:54 -08:00			`def self.jsonify(paths)`
Add expanded usage information to commands 2014-04-18 22:51:33 -07:00			`usage = <<STR`
			`Usage: ebooks jsonify <old_corpus_path> [old_corpus_path2] [...]`

			`Takes an old-style corpus of plain tweet text and converts it to json.`
			`STR`

			`if paths.empty?`
			`log usage`
			`exit`
			`end`

Allow consumption of json archives 2013-11-27 05:12:54 -08:00			`paths.each do \|path\|`
			`name = File.basename(path).split('.')[0]`
			`new_path = name + ".json"`

			`tweets = []`
			`id = nil`
jsonify: Support tweets.csv Allow official archive tweets.csv as input to create Ebooks::Archive compatible JSON archive 2014-05-27 19:14:19 -04:00			`if path.split('.')[-1] == "csv" #from twitter archive`
			`csv_archive = CSV.read(path, :headers=>:first_row)`
			`tweets = csv_archive.map do \|tweet\|`
			`{ text: tweet['text'], id: tweet['tweet_id'] }`
			`end`
			`else`
			`File.read(path).split("\n").each do \|l\|`
			`if l.start_with?('# ')`
			`id = l.split('# ')[-1]`
			`else`
			`tweet = { text: l }`
			`if id`
			`tweet[:id] = id`
			`id = nil`
			`end`
			`tweets << tweet`
Allow consumption of json archives 2013-11-27 05:12:54 -08:00			`end`
2.1.3 - better archiver 2013-11-24 13:16:34 -08:00			`end`
			`end`

Allow consumption of json archives 2013-11-27 05:12:54 -08:00			`File.open(new_path, 'w') do \|f\|`
			`log "Writing #{tweets.length} tweets to #{new_path}"`
			`f.write(JSON.pretty_generate(tweets))`
			`end`
2.1.3 - better archiver 2013-11-24 13:16:34 -08:00			`end`
			`end`

Github time! 2013-11-08 06:02:05 +11:00			`def self.command(args)`
And self.tweet 2014-04-18 23:37:25 -07:00			`usage = <<STR`
Whitespace 2014-05-06 22:17:54 -07:00			`Usage:`
Github time! 2013-11-08 06:02:05 +11:00			`ebooks new <reponame>`
Add expanded usage information to commands 2014-04-18 22:51:33 -07:00			`ebooks consume <corpus_path> [corpus_path2] [...]`
Github time! 2013-11-08 06:02:05 +11:00			`ebooks gen <model_path> [input]`
			`ebooks score <model_path> <input>`
			`ebooks archive <@user> <outpath>`
And self.tweet 2014-04-18 23:37:25 -07:00			`ebooks tweet <model_path> <botname>`
Add expanded usage information to commands 2014-04-18 22:51:33 -07:00			`ebooks jsonify <old_corpus_path> [old_corpus_path2] [...]`
And self.tweet 2014-04-18 23:37:25 -07:00			`STR`
Github time! 2013-11-08 06:02:05 +11:00
			`if args.length == 0`
			`log usage`
			`exit`
			`end`

			`case args[0]`
			`when "new" then new(args[1])`
			`when "consume" then consume(args[1..-1])`
			`when "gen" then gen(args[1], args[2..-1].join(' '))`
			`when "score" then score(args[1], args[2..-1].join(' '))`
			`when "archive" then archive(args[1], args[2])`
			`when "tweet" then tweet(args[1], args[2])`
Allow consumption of json archives 2013-11-27 05:12:54 -08:00			`when "jsonify" then jsonify(args[1..-1])`
Barf on invalid commands 2014-05-06 22:17:57 -07:00			`else`
			`log usage`
			`exit 1`
Github time! 2013-11-08 06:02:05 +11:00			`end`
			`end`
			`end`

			`Ebooks.command(ARGV)`