stuff I had to change to get the bot working

This commit is contained in:
Joshua Charles Campbell 2015-06-04 10:46:01 -06:00
parent 854f4351ee
commit a885d5fe22
3 changed files with 16 additions and 4 deletions

View file

@ -80,7 +80,13 @@ module Ebooks
# @param token [String]
# @return [Integer]
def tikify(token)
@tikis[token] or (@tokens << token and @tikis[token] = @tokens.length-1)
if @tikis.has_key?(token) then
return @tikis[token]
else
(@tokens.length+1)%1000 == 0 and puts "#{@tokens.length+1} tokens"
@tokens << token
return @tikis[token] = @tokens.length-1
end
end
# Convert a body of text into arrays of tikis
@ -143,8 +149,8 @@ module Ebooks
end
end
text = statements.join("\n")
mention_text = mentions.join("\n")
text = statements.join("\n").encode('UTF-8', :invalid => :replace)
mention_text = mentions.join("\n").encode('UTF-8', :invalid => :replace)
lines = nil; statements = nil; mentions = nil # Allow garbage collection
@ -155,6 +161,7 @@ module Ebooks
log "Ranking keywords"
@keywords = NLP.keywords(text).top(200).map(&:to_s)
log "Top keywords: #{@keywords[0]} #{@keywords[1]} #{@keywords[2]}"
self
end
@ -218,6 +225,7 @@ module Ebooks
tweet = ""
while (tikis = generator.generate(3, :bigrams)) do
log "Attempting to produce tweet try #{retries+1}/#{retry_limit}"
next if tikis.length <= 3 && !responding
break if valid_tweet?(tikis, limit)
@ -226,6 +234,7 @@ module Ebooks
end
if verbatim?(tikis) && tikis.length > 3 # We made a verbatim tweet by accident
log "Attempting to produce unigram tweet try #{retries+1}/#{retry_limit}"
while (tikis = generator.generate(3, :unigrams)) do
break if valid_tweet?(tikis, limit) && !verbatim?(tikis)

View file

@ -1,6 +1,7 @@
# encoding: utf-8
require 'fast-stemmer'
require 'highscore'
require 'htmlentities'
module Ebooks
module NLP
@ -42,7 +43,6 @@ module Ebooks
# Lazily load HTML entity decoder
# @return [HTMLEntities]
def self.htmlentities
require 'htmlentities'
@htmlentities ||= HTMLEntities.new
end

View file

@ -19,6 +19,9 @@ module Ebooks
@bigrams = {}
@sentences.each_with_index do |tikis, i|
if (i % 10000 == 0) then
log ("Building: sentence #{i} of #{sentences.length}")
end
last_tiki = INTERIM
tikis.each_with_index do |tiki, j|
@unigrams[last_tiki] ||= []