From 8a5c4831adaa68dc155ec3abcf58ce809a3ba33b Mon Sep 17 00:00:00 2001 From: Mispy <^_^@mispy.me> Date: Wed, 7 May 2014 16:45:17 +1000 Subject: [PATCH] 2.2.5 - encoding: utf-8 --- bin/ebooks | 1 + lib/twitter_ebooks/bot.rb | 1 + lib/twitter_ebooks/suffix.rb | 6 ++++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/bin/ebooks b/bin/ebooks index a2309c3..e1950cc 100755 --- a/bin/ebooks +++ b/bin/ebooks @@ -1,4 +1,5 @@ #!/usr/bin/env ruby +# encoding: utf-8 require 'twitter_ebooks' diff --git a/lib/twitter_ebooks/bot.rb b/lib/twitter_ebooks/bot.rb index 7b08929..bace4be 100644 --- a/lib/twitter_ebooks/bot.rb +++ b/lib/twitter_ebooks/bot.rb @@ -1,4 +1,5 @@ #!/usr/bin/env ruby +# encoding: utf-8 require 'twitter' require 'tweetstream' require 'rufus/scheduler' diff --git a/lib/twitter_ebooks/suffix.rb b/lib/twitter_ebooks/suffix.rb index 9a4bd04..0b10f64 100644 --- a/lib/twitter_ebooks/suffix.rb +++ b/lib/twitter_ebooks/suffix.rb @@ -1,3 +1,5 @@ +# encoding: utf-8 + module Ebooks # This generator uses data identical to the markov model, but # instead of making a chain by looking up bigrams it uses the @@ -50,7 +52,7 @@ module Ebooks tokens.each_with_index do |token, i| next_token = tokens[i+1] break if next_token.nil? - + alternatives = (n == :unigrams) ? @unigrams[next_token] : @bigrams[token][next_token] # Filter out suffixes from previous sentences alternatives.reject! { |a| a[1] == INTERIM || used.include?(a[0]) } @@ -66,7 +68,7 @@ module Ebooks verbatim << @sentences[alt[0]] suffix = @sentences[alt[0]][alt[1]..-1] potential = tokens[0..start+1] + suffix - + # Ensure we're not just rebuilding some segment of another sentence unless verbatim.find { |v| NLP.subseq?(v, potential) || NLP.subseq?(potential, v) } used << alt[0]