On second thought, we can't use a cache system

Simply because the corpuses are too darn big to keep around
2014-11-18 13:51:31 +11:00 · 2014-11-18 13:51:31 +11:00 · 2e336fb9be
commit 2e336fb9be
parent 8135aaaabb
1 changed files with 2 additions and 21 deletions
--- a/lib/twitter_ebooks/model.rb
+++ b/lib/twitter_ebooks/model.rb
@ -4,33 +4,14 @@
 require 'json'
 require 'set'
 require 'digest/md5'
-require 'fileutils'
 require 'csv'

 module Ebooks
  class Model
    attr_accessor :hash, :tokens, :sentences, :mentions, :keywords

-    # Consume a corpus file to create a model
-    # @param corpus_path Path to a json, text or csv file to consume
-    # @param cache Optional path to a directory to store cached models
-    def self.consume(corpus_path, cache: nil)
-      if cache
-        FileUtils::mkdir_p cache
-
-        cache_path = File.join(cache, Digest::MD5.file(corpus_path).to_s)
-        if File.exists?(cache_path)
-          log "Reading model from cache at #{cache_path}"
-          return Model.load(cache_path)
-        end
-      end
-
-      model = Model.new.consume(corpus_path)
-
-      if cache
-        log "Caching model at #{cache_path}"
-        model.save(cache_path)
-      end
+    def self.consume(txtpath)
+      Model.new.consume(txtpath)
    end

    def self.consume_all(paths)