diff --git a/bin/ebooks b/bin/ebooks index 25b2237..81896d8 100755 --- a/bin/ebooks +++ b/bin/ebooks @@ -73,28 +73,30 @@ module Ebooks bot.tweet(statement) end - def self.jsonify(old_path, new_path) - name = File.basename(old_path).split('.')[0] - new_path ||= name + ".json" + def self.jsonify(paths) + paths.each do |path| + name = File.basename(path).split('.')[0] + new_path = name + ".json" - tweets = [] - id = nil - File.read(old_path).split("\n").each do |l| - if l.start_with?('# ') - id = l.split('# ')[-1] - else - tweet = { text: l } - if id - tweet[:id] = id - id = nil + tweets = [] + id = nil + File.read(path).split("\n").each do |l| + if l.start_with?('# ') + id = l.split('# ')[-1] + else + tweet = { text: l } + if id + tweet[:id] = id + id = nil + end + tweets << tweet end - tweets << tweet end - end - File.open(new_path, 'w') do |f| - log "Writing #{tweets.length} tweets to #{new_path}" - f.write(JSON.pretty_generate(tweets)) + File.open(new_path, 'w') do |f| + log "Writing #{tweets.length} tweets to #{new_path}" + f.write(JSON.pretty_generate(tweets)) + end end end @@ -106,7 +108,7 @@ module Ebooks ebooks score ebooks archive <@user> ebooks tweet <@bot> - ebooks jsonify [new_corpus_path] + ebooks jsonify [...] """ if args.length == 0 @@ -121,7 +123,7 @@ module Ebooks when "score" then score(args[1], args[2..-1].join(' ')) when "archive" then archive(args[1], args[2]) when "tweet" then tweet(args[1], args[2]) - when "jsonify" then jsonify(args[1], args[2]) + when "jsonify" then jsonify(args[1..-1]) end end end diff --git a/lib/twitter_ebooks/model.rb b/lib/twitter_ebooks/model.rb index 0742197..581682e 100755 --- a/lib/twitter_ebooks/model.rb +++ b/lib/twitter_ebooks/model.rb @@ -17,14 +17,22 @@ module Ebooks Marshal.load(File.read(path)) end - def consume(txtpath) - # Record hash of source file so we know to update later - @hash = Digest::MD5.hexdigest(File.read(txtpath)) + def consume(path) + content = File.read(path) + @hash = Digest::MD5.hexdigest(content) + + if path.split('.')[-1] == "json" + log "Reading json corpus from #{path}" + lines = JSON.parse(content, symbolize_names: true).map do |tweet| + tweet[:text] + end + else + log "Reading plaintext corpus from #{path}" + lines = content.split("\n") + end - text = File.read(txtpath) log "Removing commented lines and sorting mentions" - lines = text.split("\n") keeping = [] mentions = [] lines.each do |l|