diff --git a/lib/twitter_ebooks/model.rb b/lib/twitter_ebooks/model.rb index 0b45baf..cfb56a5 100644 --- a/lib/twitter_ebooks/model.rb +++ b/lib/twitter_ebooks/model.rb @@ -183,7 +183,7 @@ module Ebooks lines = nil; statements = nil; mentions = nil # Allow garbage collection - log "Tokenizing #{text.count('\n')} statements and #{mention_text.count('\n')} mentions" + log "Tokenizing #{text.count("\n")} statements and #{mention_text.count("\n")} mentions" @sentences = mass_tikify(text) @mentions = mass_tikify(mention_text) @@ -254,7 +254,7 @@ module Ebooks tweet = "" while (tikis = generator.generate(3, :bigrams)) do - log "Attempting to produce tweet try #{retries+1}/#{retry_limit}" + #log "Attempting to produce tweet try #{retries+1}/#{retry_limit}" break if (tikis.length > 3 || responding) && valid_tweet?(tikis, limit) retries += 1 @@ -262,7 +262,7 @@ module Ebooks end if verbatim?(tikis) && tikis.length > 3 # We made a verbatim tweet by accident - log "Attempting to produce unigram tweet try #{retries+1}/#{retry_limit}" + #log "Attempting to produce unigram tweet try #{retries+1}/#{retry_limit}" while (tikis = generator.generate(3, :unigrams)) do break if valid_tweet?(tikis, limit) && !verbatim?(tikis) diff --git a/lib/twitter_ebooks/suffix.rb b/lib/twitter_ebooks/suffix.rb index d44d88d..6ac30a4 100644 --- a/lib/twitter_ebooks/suffix.rb +++ b/lib/twitter_ebooks/suffix.rb @@ -14,7 +14,7 @@ module Ebooks end def initialize(sentences) - @sentences = sentences.reject { |s| s.length < 2 } + @sentences = sentences @unigrams = {} @bigrams = {} diff --git a/spec/model_spec.rb b/spec/model_spec.rb index 2d2d039..5ff9f39 100644 --- a/spec/model_spec.rb +++ b/spec/model_spec.rb @@ -70,5 +70,19 @@ describe Ebooks::Model do file.unlink end + + it 'handles strange unicode edge-cases' do + file = Tempfile.new('unicode') + file.write("šŸ’ž\nšŸ’ž") + file.close + + model = Ebooks::Model.consume(file.path) + expect(model.mentions.count).to eq 0 + expect(model.sentences.count).to eq 2 + + file.unlink + + p model.make_statement + end end end