stuff I had to change to get the bot working
This commit is contained in:
parent
854f4351ee
commit
a885d5fe22
3 changed files with 16 additions and 4 deletions
|
@ -80,7 +80,13 @@ module Ebooks
|
||||||
# @param token [String]
|
# @param token [String]
|
||||||
# @return [Integer]
|
# @return [Integer]
|
||||||
def tikify(token)
|
def tikify(token)
|
||||||
@tikis[token] or (@tokens << token and @tikis[token] = @tokens.length-1)
|
if @tikis.has_key?(token) then
|
||||||
|
return @tikis[token]
|
||||||
|
else
|
||||||
|
(@tokens.length+1)%1000 == 0 and puts "#{@tokens.length+1} tokens"
|
||||||
|
@tokens << token
|
||||||
|
return @tikis[token] = @tokens.length-1
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Convert a body of text into arrays of tikis
|
# Convert a body of text into arrays of tikis
|
||||||
|
@ -143,8 +149,8 @@ module Ebooks
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
text = statements.join("\n")
|
text = statements.join("\n").encode('UTF-8', :invalid => :replace)
|
||||||
mention_text = mentions.join("\n")
|
mention_text = mentions.join("\n").encode('UTF-8', :invalid => :replace)
|
||||||
|
|
||||||
lines = nil; statements = nil; mentions = nil # Allow garbage collection
|
lines = nil; statements = nil; mentions = nil # Allow garbage collection
|
||||||
|
|
||||||
|
@ -155,6 +161,7 @@ module Ebooks
|
||||||
|
|
||||||
log "Ranking keywords"
|
log "Ranking keywords"
|
||||||
@keywords = NLP.keywords(text).top(200).map(&:to_s)
|
@keywords = NLP.keywords(text).top(200).map(&:to_s)
|
||||||
|
log "Top keywords: #{@keywords[0]} #{@keywords[1]} #{@keywords[2]}"
|
||||||
|
|
||||||
self
|
self
|
||||||
end
|
end
|
||||||
|
@ -218,6 +225,7 @@ module Ebooks
|
||||||
tweet = ""
|
tweet = ""
|
||||||
|
|
||||||
while (tikis = generator.generate(3, :bigrams)) do
|
while (tikis = generator.generate(3, :bigrams)) do
|
||||||
|
log "Attempting to produce tweet try #{retries+1}/#{retry_limit}"
|
||||||
next if tikis.length <= 3 && !responding
|
next if tikis.length <= 3 && !responding
|
||||||
break if valid_tweet?(tikis, limit)
|
break if valid_tweet?(tikis, limit)
|
||||||
|
|
||||||
|
@ -226,6 +234,7 @@ module Ebooks
|
||||||
end
|
end
|
||||||
|
|
||||||
if verbatim?(tikis) && tikis.length > 3 # We made a verbatim tweet by accident
|
if verbatim?(tikis) && tikis.length > 3 # We made a verbatim tweet by accident
|
||||||
|
log "Attempting to produce unigram tweet try #{retries+1}/#{retry_limit}"
|
||||||
while (tikis = generator.generate(3, :unigrams)) do
|
while (tikis = generator.generate(3, :unigrams)) do
|
||||||
break if valid_tweet?(tikis, limit) && !verbatim?(tikis)
|
break if valid_tweet?(tikis, limit) && !verbatim?(tikis)
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
require 'fast-stemmer'
|
require 'fast-stemmer'
|
||||||
require 'highscore'
|
require 'highscore'
|
||||||
|
require 'htmlentities'
|
||||||
|
|
||||||
module Ebooks
|
module Ebooks
|
||||||
module NLP
|
module NLP
|
||||||
|
@ -42,7 +43,6 @@ module Ebooks
|
||||||
# Lazily load HTML entity decoder
|
# Lazily load HTML entity decoder
|
||||||
# @return [HTMLEntities]
|
# @return [HTMLEntities]
|
||||||
def self.htmlentities
|
def self.htmlentities
|
||||||
require 'htmlentities'
|
|
||||||
@htmlentities ||= HTMLEntities.new
|
@htmlentities ||= HTMLEntities.new
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,9 @@ module Ebooks
|
||||||
@bigrams = {}
|
@bigrams = {}
|
||||||
|
|
||||||
@sentences.each_with_index do |tikis, i|
|
@sentences.each_with_index do |tikis, i|
|
||||||
|
if (i % 10000 == 0) then
|
||||||
|
log ("Building: sentence #{i} of #{sentences.length}")
|
||||||
|
end
|
||||||
last_tiki = INTERIM
|
last_tiki = INTERIM
|
||||||
tikis.each_with_index do |tiki, j|
|
tikis.each_with_index do |tiki, j|
|
||||||
@unigrams[last_tiki] ||= []
|
@unigrams[last_tiki] ||= []
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue