Handle edge-case corpuses with short sentences
This commit is contained in:
parent
dbae6d3499
commit
a272bd69ca
3 changed files with 18 additions and 4 deletions
|
@ -183,7 +183,7 @@ module Ebooks
|
|||
|
||||
lines = nil; statements = nil; mentions = nil # Allow garbage collection
|
||||
|
||||
log "Tokenizing #{text.count('\n')} statements and #{mention_text.count('\n')} mentions"
|
||||
log "Tokenizing #{text.count("\n")} statements and #{mention_text.count("\n")} mentions"
|
||||
|
||||
@sentences = mass_tikify(text)
|
||||
@mentions = mass_tikify(mention_text)
|
||||
|
@ -254,7 +254,7 @@ module Ebooks
|
|||
tweet = ""
|
||||
|
||||
while (tikis = generator.generate(3, :bigrams)) do
|
||||
log "Attempting to produce tweet try #{retries+1}/#{retry_limit}"
|
||||
#log "Attempting to produce tweet try #{retries+1}/#{retry_limit}"
|
||||
break if (tikis.length > 3 || responding) && valid_tweet?(tikis, limit)
|
||||
|
||||
retries += 1
|
||||
|
@ -262,7 +262,7 @@ module Ebooks
|
|||
end
|
||||
|
||||
if verbatim?(tikis) && tikis.length > 3 # We made a verbatim tweet by accident
|
||||
log "Attempting to produce unigram tweet try #{retries+1}/#{retry_limit}"
|
||||
#log "Attempting to produce unigram tweet try #{retries+1}/#{retry_limit}"
|
||||
while (tikis = generator.generate(3, :unigrams)) do
|
||||
break if valid_tweet?(tikis, limit) && !verbatim?(tikis)
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ module Ebooks
|
|||
end
|
||||
|
||||
def initialize(sentences)
|
||||
@sentences = sentences.reject { |s| s.length < 2 }
|
||||
@sentences = sentences
|
||||
@unigrams = {}
|
||||
@bigrams = {}
|
||||
|
||||
|
|
|
@ -70,5 +70,19 @@ describe Ebooks::Model do
|
|||
|
||||
file.unlink
|
||||
end
|
||||
|
||||
it 'handles strange unicode edge-cases' do
|
||||
file = Tempfile.new('unicode')
|
||||
file.write("💞\n💞")
|
||||
file.close
|
||||
|
||||
model = Ebooks::Model.consume(file.path)
|
||||
expect(model.mentions.count).to eq 0
|
||||
expect(model.sentences.count).to eq 2
|
||||
|
||||
file.unlink
|
||||
|
||||
p model.make_statement
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue