Merge pull request #82 from negatendo/consume_append

append to model
This commit is contained in:
Jaiden Mispy 2015-06-13 18:07:20 +10:00
commit 42eee9f8e6
2 changed files with 49 additions and 0 deletions

View file

@ -25,6 +25,7 @@ Usage:
ebooks auth
ebooks consume <corpus_path> [corpus_path2] [...]
ebooks consume-all <model_name> <corpus_path> [corpus_path2] [...]
ebooks append <model_name> <corpus_path>
ebooks gen <model_path> [input]
ebooks archive <username> [path]
ebooks tweet <model_path> <botname>
@ -116,6 +117,24 @@ STR
log "Corpuses consumed to #{outpath}"
end
HELP.append = <<-STR
Usage: ebooks append <model_name> <corpus_path>
Process then append the provided corpus to the model
instead of overwriting.
STR
def self.append(name, path)
if !name || !path
help :append
exit 1
end
Ebooks::Model.consume(path).append(File.join(APP_PATH,'model',"#{name}.model"))
log "Corpus appended to #{name}.model"
end
HELP.jsonify = <<-STR
Usage: ebooks jsonify <tweets.csv> [tweets.csv2] [...]
@ -380,6 +399,7 @@ STR
when "new" then new(args[1])
when "consume" then consume(args[1..-1])
when "consume-all" then consume_all(args[1], args[2..-1])
when "append" then append(args[1],args[2])
when "gen" then gen(args[1], args[2..-1].join(' '))
when "archive" then archive(args[1], args[2])
when "tweet" then tweet(args[1], args[2])

View file

@ -69,6 +69,35 @@ module Ebooks
self
end
# Append a generated model to existing model file instead of overwriting it
# @param path [String]
def append(path)
existing = File.file?(path)
if !existing
log "No existing model found at #{path}"
return
else
#read-in and deserialize existing model
props = Marshal.load(File.open(path,'rb') { |old| old.read })
old_tokens = props[:tokens]
old_sentences = props[:sentences]
old_mentions = props[:mentions]
old_keywords = props[:keywords]
#append existing properties to new ones and overwrite with new model
File.open(path, 'wb') do |f|
f.write(Marshal.dump({
tokens: @tokens.concat(old_tokens),
sentences: @sentences.concat(old_sentences),
mentions: @mentions.concat(old_mentions),
keywords: @keywords.concat(old_keywords)
}))
end
end
self
end
def initialize
@tokens = []