2.2.5 - encoding: utf-8

This commit is contained in:
Mispy 2014-05-07 16:45:17 +10:00
parent 35a971e245
commit 8a5c4831ad
3 changed files with 6 additions and 2 deletions

View file

@ -1,4 +1,5 @@
#!/usr/bin/env ruby #!/usr/bin/env ruby
# encoding: utf-8
require 'twitter_ebooks' require 'twitter_ebooks'

View file

@ -1,4 +1,5 @@
#!/usr/bin/env ruby #!/usr/bin/env ruby
# encoding: utf-8
require 'twitter' require 'twitter'
require 'tweetstream' require 'tweetstream'
require 'rufus/scheduler' require 'rufus/scheduler'

View file

@ -1,3 +1,5 @@
# encoding: utf-8
module Ebooks module Ebooks
# This generator uses data identical to the markov model, but # This generator uses data identical to the markov model, but
# instead of making a chain by looking up bigrams it uses the # instead of making a chain by looking up bigrams it uses the
@ -50,7 +52,7 @@ module Ebooks
tokens.each_with_index do |token, i| tokens.each_with_index do |token, i|
next_token = tokens[i+1] next_token = tokens[i+1]
break if next_token.nil? break if next_token.nil?
alternatives = (n == :unigrams) ? @unigrams[next_token] : @bigrams[token][next_token] alternatives = (n == :unigrams) ? @unigrams[next_token] : @bigrams[token][next_token]
# Filter out suffixes from previous sentences # Filter out suffixes from previous sentences
alternatives.reject! { |a| a[1] == INTERIM || used.include?(a[0]) } alternatives.reject! { |a| a[1] == INTERIM || used.include?(a[0]) }
@ -66,7 +68,7 @@ module Ebooks
verbatim << @sentences[alt[0]] verbatim << @sentences[alt[0]]
suffix = @sentences[alt[0]][alt[1]..-1] suffix = @sentences[alt[0]][alt[1]..-1]
potential = tokens[0..start+1] + suffix potential = tokens[0..start+1] + suffix
# Ensure we're not just rebuilding some segment of another sentence # Ensure we're not just rebuilding some segment of another sentence
unless verbatim.find { |v| NLP.subseq?(v, potential) || NLP.subseq?(potential, v) } unless verbatim.find { |v| NLP.subseq?(v, potential) || NLP.subseq?(potential, v) }
used << alt[0] used << alt[0]