diff --git a/bin/ebooks b/bin/ebooks index e1dbbad..a8cdbba 100755 --- a/bin/ebooks +++ b/bin/ebooks @@ -62,6 +62,32 @@ STR end end + def self.consume_all(name, paths) + usage = < [corpus_path2] [...] + +Processes some number of text files or json tweet corpuses +into one usable model. It will be output at model/.model +STR + + if paths.empty? + log usage + exit + end + + outpath = File.join(APP_PATH, 'model', "#{name}.model") + #pathes.each do |path| + # filename = File.basename(path) + # shortname = filename.split('.')[0..-2].join('.') + # + # outpath = File.join(APP_PATH, 'model', "#{shortname}.model") + # Model.consume(path).save(outpath) + # log "Corpus consumed to #{outpath}" + #end + Model.consume_all(paths).save(outpath) + log "Corpuses consumed to #{outpath}" + end + def self.gen(model_path, input) usage = < [input] @@ -187,6 +213,7 @@ STR Usage: ebooks new ebooks consume [corpus_path2] [...] + ebooks consume-all [corpus_path2] [...] ebooks gen [input] ebooks score ebooks archive <@user> @@ -202,6 +229,7 @@ STR case args[0] when "new" then new(args[1]) when "consume" then consume(args[1..-1]) + when "consume-all" then consume_all(args[1], args[2..-1]) when "gen" then gen(args[1], args[2..-1].join(' ')) when "score" then score(args[1], args[2..-1].join(' ')) when "archive" then archive(args[1], args[2]) diff --git a/lib/twitter_ebooks/archive.rb b/lib/twitter_ebooks/archive.rb index 87e06dc..566d9d8 100644 --- a/lib/twitter_ebooks/archive.rb +++ b/lib/twitter_ebooks/archive.rb @@ -31,14 +31,12 @@ module Ebooks end end - Twitter.configure do |config| + Twitter::REST::Client.new do |config| config.consumer_key = @config[:consumer_key] config.consumer_secret = @config[:consumer_secret] config.oauth_token = @config[:oauth_token] config.oauth_token_secret = @config[:oauth_token_secret] end - - Twitter::Client.new end def initialize(username, path, client=nil) diff --git a/lib/twitter_ebooks/bot.rb b/lib/twitter_ebooks/bot.rb index aa8456e..6dcea64 100755 --- a/lib/twitter_ebooks/bot.rb +++ b/lib/twitter_ebooks/bot.rb @@ -43,15 +43,13 @@ module Ebooks config.oauth_token_secret = @oauth_token_secret end - Twitter.configure do |config| + @twitter = Twitter::REST::Client.new do |config| config.consumer_key = @consumer_key config.consumer_secret = @consumer_secret config.oauth_token = @oauth_token config.oauth_token_secret = @oauth_token_secret end - @twitter = Twitter::Client.new - needs_stream = [@on_follow, @on_message, @on_mention, @on_timeline].any? {|e| !e.nil?} @stream = TweetStream::Client.new if needs_stream @@ -90,19 +88,19 @@ module Ebooks end @stream.userstream do |ev| - next unless ev[:text] # If it's not a text-containing tweet, ignore it - next if ev[:user][:screen_name] == @username # Ignore our own tweets + next unless ev.text # If it's not a text-containing tweet, ignore it + next if ev.user.screen_name == @username # Ignore our own tweets meta = {} mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] } reply_mentions = mentions.reject { |m| m.downcase == @username.downcase } - reply_mentions = [ev[:user][:screen_name]] + reply_mentions + reply_mentions = [ev.user.screen_name] + reply_mentions meta[:reply_prefix] = reply_mentions.uniq.map { |m| '@'+m }.join(' ') + ' ' meta[:limit] = 140 - meta[:reply_prefix].length - mless = ev[:text] + mless = ev.text begin ev.attrs[:entities][:user_mentions].reverse.each do |entity| last = mless[entity[:indices][1]..-1]||'' @@ -119,8 +117,8 @@ module Ebooks # - The tweet mentions list contains our username # - The tweet is not being retweeted by somebody else # - Or soft-retweeted by somebody else - if mentions.map(&:downcase).include?(@username.downcase) && !ev[:retweeted_status] && !ev[:text].start_with?('RT ') - log "Mention from @#{ev[:user][:screen_name]}: #{ev[:text]}" + if mentions.map(&:downcase).include?(@username.downcase) && !ev.retweeted_status? && !ev.text.start_with?('RT ') + log "Mention from @#{ev.user.screen_name}: #{ev.text}" @on_mention.call(ev, meta) if @on_mention else @on_timeline.call(ev, meta) if @on_timeline @@ -144,8 +142,8 @@ module Ebooks log "Sending DM to @#{ev[:sender][:screen_name]}: #{text}" @twitter.direct_message_create(ev[:sender][:screen_name], text, opts) elsif ev.is_a? Twitter::Tweet - log "Replying to @#{ev[:user][:screen_name]} with: #{text}" - @twitter.update(text, in_reply_to_status_id: ev[:id]) + log "Replying to @#{ev.user.screen_name} with: #{text}" + @twitter.update(text, in_reply_to_status_id: ev.id) else raise Exception("Don't know how to reply to a #{ev.class}") end diff --git a/lib/twitter_ebooks/model.rb b/lib/twitter_ebooks/model.rb index f3139e0..0f1bbad 100644 --- a/lib/twitter_ebooks/model.rb +++ b/lib/twitter_ebooks/model.rb @@ -14,6 +14,10 @@ module Ebooks Model.new.consume(txtpath) end + def self.consume_all(paths) + Model.new.consume_all(paths) + end + def self.load(path) model = Model.new model.instance_eval do @@ -87,6 +91,10 @@ module Ebooks lines = content.split("\n") end + consume_lines(lines) + end + + def consume_lines(lines) log "Removing commented lines and sorting mentions" statements = [] @@ -118,6 +126,36 @@ module Ebooks self end + def consume_all(paths) + lines = [] + paths.each do |path| + content = File.read(path, :encoding => 'utf-8') + @hash = Digest::MD5.hexdigest(content) + + if path.split('.')[-1] == "json" + log "Reading json corpus from #{path}" + l = JSON.parse(content).map do |tweet| + tweet['text'] + end + lines.concat(l) + elsif path.split('.')[-1] == "csv" + log "Reading CSV corpus from #{path}" + content = CSV.parse(content) + header = content.shift + text_col = header.index('text') + l = content.map do |tweet| + tweet[text_col] + end + lines.concat(l) + else + log "Reading plaintext corpus from #{path}" + l = content.split("\n") + lines.concat(l) + end + end + consume_lines(lines) + end + def fix(tweet) # This seems to require an external api call #begin diff --git a/twitter_ebooks.gemspec b/twitter_ebooks.gemspec index 8f3fe83..0fec762 100644 --- a/twitter_ebooks.gemspec +++ b/twitter_ebooks.gemspec @@ -19,7 +19,7 @@ Gem::Specification.new do |gem| gem.add_development_dependency 'memory_profiler' gem.add_development_dependency 'pry-byebug' - gem.add_runtime_dependency 'twitter', '~> 4.0' + gem.add_runtime_dependency 'twitter', '~> 5.0' gem.add_runtime_dependency 'simple_oauth', '~> 0.2.0' gem.add_runtime_dependency 'tweetstream' gem.add_runtime_dependency 'rufus-scheduler'