From d16ee04d5aebb804d574c3ac2082afb280a193ae Mon Sep 17 00:00:00 2001 From: Brett O'Connor Date: Tue, 19 May 2015 15:48:48 -0600 Subject: [PATCH 1/2] added append command usage is: ebooks append model path/to/corpus.csv --- bin/ebooks | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/bin/ebooks b/bin/ebooks index c564df8..9ce5a53 100755 --- a/bin/ebooks +++ b/bin/ebooks @@ -25,6 +25,7 @@ Usage: ebooks auth ebooks consume [corpus_path2] [...] ebooks consume-all [corpus_path2] [...] + ebooks append ebooks gen [input] ebooks archive [path] ebooks tweet @@ -116,6 +117,24 @@ STR log "Corpuses consumed to #{outpath}" end + HELP.append = <<-STR + Usage: ebooks append + + Process then append the provided corpus to the model + instead of overwriting. + STR + + def self.append(name, path) + if !name || !path + help :append + exit 1 + end + + Ebooks::Model.consume(path).append(File.join(APP_PATH,'model',"#{name}.model")) + log "Corpus appended to #{name}.model" + end + + HELP.jsonify = <<-STR Usage: ebooks jsonify [tweets.csv2] [...] @@ -380,6 +399,7 @@ STR when "new" then new(args[1]) when "consume" then consume(args[1..-1]) when "consume-all" then consume_all(args[1], args[2..-1]) + when "append" then append(args[1],args[2]) when "gen" then gen(args[1], args[2..-1].join(' ')) when "archive" then archive(args[1], args[2]) when "tweet" then tweet(args[1], args[2]) From 43491cb668b5eba3d016af6345532236905e9d05 Mon Sep 17 00:00:00 2001 From: Brett O'Connor Date: Tue, 19 May 2015 15:49:35 -0600 Subject: [PATCH 2/2] added append method which reads and adds to an existing model file --- lib/twitter_ebooks/model.rb | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/lib/twitter_ebooks/model.rb b/lib/twitter_ebooks/model.rb index b3bbb13..2366379 100644 --- a/lib/twitter_ebooks/model.rb +++ b/lib/twitter_ebooks/model.rb @@ -69,6 +69,35 @@ module Ebooks self end + # Append a generated model to existing model file instead of overwriting it + # @param path [String] + def append(path) + existing = File.file?(path) + if !existing + log "No existing model found at #{path}" + return + else + #read-in and deserialize existing model + props = Marshal.load(File.open(path,'rb') { |old| old.read }) + old_tokens = props[:tokens] + old_sentences = props[:sentences] + old_mentions = props[:mentions] + old_keywords = props[:keywords] + + #append existing properties to new ones and overwrite with new model + File.open(path, 'wb') do |f| + f.write(Marshal.dump({ + tokens: @tokens.concat(old_tokens), + sentences: @sentences.concat(old_sentences), + mentions: @mentions.concat(old_mentions), + keywords: @keywords.concat(old_keywords) + })) + end + end + self + end + + def initialize @tokens = []