More cleanup

This commit is contained in:
Jaiden Mispy 2014-12-05 22:57:32 +11:00
parent 1977445b1c
commit 822f5e4c6c
13 changed files with 144 additions and 14884 deletions

View file

@ -8,8 +8,11 @@ A framework for building interactive twitterbots which respond to mentions/DMs.
## New in 3.0
- Bots now run in their own threads (no eventmachine), and startup is parallelized
- Replies are slightly rate-limited by default to prevent infinite bot convos
- Bots run in their own threads (no eventmachine), and startup is parallelized
- Bots start with `ebooks start`, and no longer die on unhandled exceptions
- `ebooks auth` command will create new access tokens, for running multiple bots
- `ebooks console` starts a ruby interpreter with bots loaded (see Ebooks::Bot.all)
- Replies are slightly rate-limited to prevent infinite bot convos
- Non-participating users in a mention chain will be dropped after a few tweets
## Installation
@ -26,47 +29,57 @@ Run `ebooks new <reponame>` to generate a new repository containing a sample bot
``` ruby
# This is an example bot definition with event handlers commented out
# You can define as many of these as you like; they will run simultaneously
# You can define and instantiate as many bots as you like
Ebooks::Bot.new("abby_ebooks") do |bot|
# Consumer details come from registering an app at https://dev.twitter.com/
# OAuth details can be fetched with https://github.com/marcel/twurl
bot.consumer_key = "" # Your app consumer key
bot.consumer_secret = "" # Your app consumer secret
bot.oauth_token = "" # Token connecting the app to this account
bot.oauth_token_secret = "" # Secret connecting the app to this account
class MyBot < Ebooks::Bot
# Configuration here applies to all MyBots
def configure
# Consumer details come from registering an app at https://dev.twitter.com/
# Once you have consumer details, use "ebooks auth" for new access tokens
self.consumer_key = '' # Your app consumer key
self.consumer_secret = '' # Your app consumer secret
bot.on_startup do
# Run some startup task
# puts "I'm ready!"
# Users to block instead of interacting with
self.blacklist = ['tnietzschequote']
# Range in seconds to randomize delay when bot.delay is called
self.delay_range = 1..6
end
bot.on_message do |dm|
def on_startup
scheduler.every '24h' do
# Tweet something every 24 hours
# See https://github.com/jmettraux/rufus-scheduler
# bot.tweet("hi")
# bot.pictweet("hi", "cuteselfie.jpg")
end
end
def on_message(dm)
# Reply to a DM
# bot.reply(dm, "secret secrets")
end
bot.on_follow do |user|
def on_follow(user)
# Follow a user back
# bot.follow(user[:screen_name])
end
bot.on_mention do |tweet, meta|
def on_mention(tweet)
# Reply to a mention
# bot.reply(tweet, meta[:reply_prefix] + "oh hullo")
# bot.reply(tweet, meta(tweet)[:reply_prefix] + "oh hullo")
end
bot.on_timeline do |tweet, meta|
def on_timeline(tweet)
# Reply to a tweet in the bot's timeline
# bot.reply(tweet, meta[:reply_prefix] + "nice tweet")
# bot.reply(tweet, meta(tweet)[:reply_prefix] + "nice tweet")
end
end
bot.scheduler.every '24h' do
# Tweet something every 24 hours
# See https://github.com/jmettraux/rufus-scheduler
# bot.tweet("hi")
# bot.pictweet("hi", "cuteselfie.jpg", ":possibly_sensitive => true")
end
# Make a MyBot and attach it to an account
MyBot.new("{{BOT_NAME}}") do |bot|
bot.access_token = "" # Token connecting the app to this account
bot.access_token_secret = "" # Secret connecting the app to this account
end
```
@ -107,7 +120,6 @@ Text files use newlines and full stops to seperate statements.
Once you have a model, the primary use is to produce statements and related responses to input, using a pseudo-Markov generator:
``` ruby
> require 'twitter_ebooks'
> model = Ebooks::Model.load("model/0xabad1dea.model")
> model.make_statement(140)
=> "My Terrible Netbook may be the kind of person who buys Starbucks, but this Rackspace vuln is pretty straight up a backdoor"
@ -118,14 +130,18 @@ Once you have a model, the primary use is to produce statements and related resp
The secondary function is the "interesting keywords" list. For example, I use this to determine whether a bot wants to fav/retweet/reply to something in its timeline:
``` ruby
top100 = model.keywords.top(100)
top100 = model.keywords.take(100)
tokens = Ebooks::NLP.tokenize(tweet[:text])
if tokens.find { |t| top100.include?(t) }
bot.twitter.favorite(tweet[:id])
bot.favorite(tweet[:id])
end
```
## Bot niceness
## Other notes
If you're using Heroku, which has no persistent filesystem, automating the process of archiving, consuming and updating can be tricky. My current solution is just a daily cron job which commits and pushes for me, which is pretty hacky.

View file

@ -4,6 +4,12 @@
require 'twitter_ebooks'
require 'ostruct'
module Ebooks::Util
def pretty_exception(e)
end
end
module Ebooks::CLI
APP_PATH = Dir.pwd # XXX do some recursive thing instead
HELP = OpenStruct.new
@ -17,8 +23,7 @@ Usage:
ebooks consume <corpus_path> [corpus_path2] [...]
ebooks consume-all <corpus_path> [corpus_path2] [...]
ebooks gen <model_path> [input]
ebooks score <model_path> <input>
ebooks archive <username> <outpath>
ebooks archive <username> [path]
ebooks tweet <model_path> <botname>
STR
@ -50,13 +55,18 @@ STR
exit 1
end
FileUtils.cp_r(SKELETON_PATH, path)
FileUtils.cp_r(Ebooks::SKELETON_PATH, path)
File.open(File.join(path, 'bots.rb'), 'w') do |f|
template = File.read(File.join(SKELETON_PATH, 'bots.rb'))
template = File.read(File.join(Ebooks::SKELETON_PATH, 'bots.rb'))
f.write(template.gsub("{{BOT_NAME}}", reponame))
end
File.open(File.join(path, 'Gemfile'), 'w') do |f|
template = File.read(File.join(Ebooks::SKELETON_PATH, 'Gemfile'))
f.write(template.gsub("{{RUBY_VERSION}}", RUBY_VERSION))
end
log "New twitter_ebooks app created at #{reponame}"
end
@ -78,7 +88,7 @@ STR
shortname = filename.split('.')[0..-2].join('.')
outpath = File.join(APP_PATH, 'model', "#{shortname}.model")
Model.consume(path).save(outpath)
Ebooks::Model.consume(path).save(outpath)
log "Corpus consumed to #{outpath}"
end
end
@ -97,15 +107,7 @@ STR
end
outpath = File.join(APP_PATH, 'model', "#{name}.model")
#pathes.each do |path|
# filename = File.basename(path)
# shortname = filename.split('.')[0..-2].join('.')
#
# outpath = File.join(APP_PATH, 'model', "#{shortname}.model")
# Model.consume(path).save(outpath)
# log "Corpus consumed to #{outpath}"
#end
Model.consume_all(paths).save(outpath)
Ebooks::Model.consume_all(paths).save(outpath)
log "Corpuses consumed to #{outpath}"
end
@ -122,7 +124,7 @@ STR
exit 1
end
model = Model.load(model_path)
model = Ebooks::Model.load(model_path)
if input && !input.empty?
puts "@cmd " + model.make_response(input, 135)
else
@ -130,38 +132,22 @@ STR
end
end
HELP.score = <<-STR
Usage: ebooks score <model_path> <input>
Scores "interest" in some text input according to how
well unique keywords match the model.
STR
def self.score(model_path, input)
if model_path.nil? || input.nil?
help :score
exit 1
end
model = Model.load(model_path)
model.score_interest(input)
end
HELP.archive = <<-STR
Usage: ebooks archive <username> <outpath>
Usage: ebooks archive <username> [outpath]
Downloads a json corpus of the <username>'s tweets to <outpath>.
Downloads a json corpus of the <username>'s tweets.
Output defaults to corpus/<username>.json
Due to API limitations, this can only receive up to ~3000 tweets
into the past.
STR
def self.archive(username, outpath)
if username.nil? || outpath.nil?
def self.archive(username, outpath=nil)
if username.nil?
help :archive
exit 1
end
Archive.new(username, outpath).sync
Ebooks::Archive.new(username, outpath).sync
end
HELP.tweet = <<-STR
@ -178,10 +164,9 @@ STR
end
load File.join(APP_PATH, 'bots.rb')
model = Model.load(modelpath)
model = Ebooks::Model.load(modelpath)
statement = model.make_statement
log "@#{botname}: #{statement}"
bot = Bot.get(botname)
bot = Ebooks::Bot.get(botname)
bot.configure
bot.tweet(statement)
end
@ -223,7 +208,7 @@ STR
access_token = request_token.get_access_token(oauth_verifier: pin)
log "Account authorized successfully.\n" +
log "Account authorized successfully. Make sure to put these in your bots.rb!\n" +
" access token: #{access_token.token}\n" +
" access token secret: #{access_token.secret}"
end
@ -271,9 +256,9 @@ STR
loop do
begin
bot.start
rescue Exception
bot.log $!
puts $@
rescue Exception => e
bot.log e.inspect
puts e.backtrace.map { |s| "\t"+s }.join("\n")
end
bot.log "Sleeping before reconnect"
sleep 5
@ -334,7 +319,6 @@ STR
when "consume" then consume(args[1..-1])
when "consume-all" then consume_all(args[1], args[2..-1])
when "gen" then gen(args[1], args[2..-1].join(' '))
when "score" then score(args[1], args[2..-1].join(' '))
when "archive" then archive(args[1], args[2])
when "tweet" then tweet(args[1], args[2])
when "jsonify" then jsonify(args[1..-1])

View file

@ -11,6 +11,7 @@ module Ebooks
SKELETON_PATH = File.join(GEM_PATH, 'skeleton')
TEST_PATH = File.join(GEM_PATH, 'test')
TEST_CORPUS_PATH = File.join(TEST_PATH, 'corpus/0xabad1dea.tweets')
INTERIM = :interim
end
require 'twitter_ebooks/nlp'

View file

@ -39,9 +39,14 @@ module Ebooks
end
end
def initialize(username, path, client=nil)
def initialize(username, path=nil, client=nil)
@username = username
@path = path || "#{username}.json"
@path = path || "corpus/#{username}.json"
if File.directory?(@path)
@path = File.join(@path, "#{username}.json")
end
@client = client || make_client
if File.exists?(@path)

65
lib/twitter_ebooks/bot.rb Executable file → Normal file
View file

@ -6,28 +6,11 @@ module Ebooks
class ConfigurationError < Exception
end
# Information about a particular Twitter user we know
class UserInfo
attr_reader :username
# @return [Integer] how many times we can pester this user unprompted
attr_accessor :pesters_left
def initialize(username)
@username = username
@pesters_left = 1
end
# @return [Boolean] true if we're allowed to pester this user
def can_pester?
@pesters_left > 0
end
end
# Represents a single reply tree of tweets
class Conversation
attr_reader :last_update
# @param bot [Ebooks::Bot]
def initialize(bot)
@bot = bot
@tweets = []
@ -90,6 +73,8 @@ module Ebooks
@mentions.map(&:downcase).include?(@bot.username.downcase) && !@tweet.retweeted_status? && !@tweet.text.start_with?('RT ')
end
# @param bot [Ebooks::Bot]
# @param ev [Twitter::Tweet]
def initialize(bot, ev)
@bot = bot
@tweet = ev
@ -138,7 +123,7 @@ module Ebooks
# @return [Hash{String => Ebooks::Conversation}] maps tweet ids to their conversation contexts
attr_accessor :conversations
# @return [Range, Integer] range of seconds to delay in delay method
attr_accessor :delay
attr_accessor :delay_range
# @return [Array] list of all defined bots
def self.all; @@all ||= []; end
@ -161,24 +146,17 @@ module Ebooks
# @param b Block to call with new bot
def initialize(username, &b)
@blacklist ||= []
@userinfo ||= {}
@conversations ||= {}
# Tweet ids we've already observed, to avoid duplication
@seen_tweets ||= {}
@username = username
configure(*args, &b)
configure
b.call(self) unless b.nil?
Bot.all << self
end
# Find information we've collected about a user
# @param username [String]
# @return [Ebooks::UserInfo]
def userinfo(username)
@userinfo[username] ||= UserInfo.new(username)
end
# Find or create the conversation context for this tweet
# @param tweet [Twitter::Tweet]
# @return [Ebooks::Conversation]
@ -229,7 +207,7 @@ module Ebooks
# Calculate some meta information about a tweet relevant for replying
# @param ev [Twitter::Tweet]
# @return [Ebooks::TweetMeta]
def calc_meta(ev)
def meta(ev)
TweetMeta.new(self, ev)
end
@ -255,7 +233,7 @@ module Ebooks
return unless ev.text # If it's not a text-containing tweet, ignore it
return if ev.user.screen_name == @username # Ignore our own tweets
meta = calc_meta(ev)
meta = meta(ev)
if blacklisted?(ev.user.screen_name)
log "Blocking blacklisted user @#{ev.user.screen_name}"
@ -273,9 +251,9 @@ module Ebooks
if meta.mentions_bot?
log "Mention from @#{ev.user.screen_name}: #{ev.text}"
conversation(ev).add(ev)
fire(:mention, ev, meta)
fire(:mention, ev)
else
fire(:timeline, ev, meta)
fire(:timeline, ev)
end
elsif ev.is_a?(Twitter::Streaming::DeletedTweet) ||
@ -290,7 +268,19 @@ module Ebooks
def prepare
# Sanity check
if @username.nil?
raise ConfigurationError, "bot.username cannot be nil"
raise ConfigurationError, "bot username cannot be nil"
end
if @consumer_key.nil? || @consumer_key.empty? ||
@consumer_secret.nil? || @consumer_key.empty?
log "Missing consumer_key or consumer_secret. These details can be acquired by registering a Twitter app at https://apps.twitter.com/"
exit 1
end
if @access_token.nil? || @access_token.empty? ||
@access_token_secret.nil? || @access_token_secret.empty?
log "Missing access_token or access_token_secret. Please run `ebooks auth`."
exit 1
end
twitter
@ -346,20 +336,13 @@ module Ebooks
log "Sending DM to @#{ev.sender.screen_name}: #{text}"
twitter.create_direct_message(ev.sender.screen_name, text, opts)
elsif ev.is_a? Twitter::Tweet
meta = calc_meta(ev)
meta = meta(ev)
if conversation(ev).is_bot?(ev.user.screen_name)
log "Not replying to suspected bot @#{ev.user.screen_name}"
return false
end
if !meta.mentions_bot?
if !userinfo(ev.user.screen_name).can_pester?
log "Not replying: leaving @#{ev.user.screen_name} alone"
return false
end
end
log "Replying to @#{ev.user.screen_name} with: #{meta.reply_prefix + text}"
tweet = twitter.update(meta.reply_prefix + text, in_reply_to_status_id: ev.id)
conversation(tweet).add(tweet)

View file

@ -1,4 +1,4 @@
source 'http://rubygems.org'
ruby '1.9.3'
ruby '{{RUBY_VERSION}}'
gem 'twitter_ebooks'

View file

@ -1 +1 @@
worker: ruby run.rb start
worker: ebooks start

59
skeleton/bots.rb Executable file → Normal file
View file

@ -1,42 +1,55 @@
#!/usr/bin/env ruby
require 'twitter_ebooks'
# This is an example bot definition with event handlers commented out
# You can define as many of these as you like; they will run simultaneously
# You can define and instantiate as many bots as you like
Ebooks::Bot.new("{{BOT_NAME}}") do |bot|
# Consumer details come from registering an app at https://dev.twitter.com/
# OAuth details can be fetched with https://github.com/marcel/twurl
bot.consumer_key = "" # Your app consumer key
bot.consumer_secret = "" # Your app consumer secret
bot.oauth_token = "" # Token connecting the app to this account
bot.oauth_token_secret = "" # Secret connecting the app to this account
class MyBot < Ebooks::Bot
# Configuration here applies to all MyBots
def configure
# Consumer details come from registering an app at https://dev.twitter.com/
# Once you have consumer details, use "ebooks auth" for new access tokens
self.consumer_key = '' # Your app consumer key
self.consumer_secret = '' # Your app consumer secret
bot.on_message do |dm|
# Users to block instead of interacting with
self.blacklist = ['tnietzschequote']
# Range in seconds to randomize delay when bot.delay is called
self.delay_range = 1..6
end
def on_startup
scheduler.every '24h' do
# Tweet something every 24 hours
# See https://github.com/jmettraux/rufus-scheduler
# bot.tweet("hi")
# bot.pictweet("hi", "cuteselfie.jpg")
end
end
def on_message(dm)
# Reply to a DM
# bot.reply(dm, "secret secrets")
end
bot.on_follow do |user|
def on_follow(user)
# Follow a user back
# bot.follow(user[:screen_name])
end
bot.on_mention do |tweet, meta|
def on_mention(tweet)
# Reply to a mention
# bot.reply(tweet, meta[:reply_prefix] + "oh hullo")
# bot.reply(tweet, meta(tweet)[:reply_prefix] + "oh hullo")
end
bot.on_timeline do |tweet, meta|
def on_timeline(tweet)
# Reply to a tweet in the bot's timeline
# bot.reply(tweet, meta[:reply_prefix] + "nice tweet")
end
bot.scheduler.every '24h' do
# Tweet something every 24 hours
# See https://github.com/jmettraux/rufus-scheduler
# bot.tweet("hi")
# bot.pictweet("hi", "cuteselfie.jpg", ":possibly_sensitive => true")
# bot.reply(tweet, meta(tweet)[:reply_prefix] + "nice tweet")
end
end
# Make a MyBot and attach it to an account
MyBot.new("{{BOT_NAME}}") do |bot|
bot.access_token = "" # Token connecting the app to this account
bot.access_token_secret = "" # Secret connecting the app to this account
end

View file

@ -1,9 +0,0 @@
#!/usr/bin/env ruby
require_relative 'bots'
EM.run do
Ebooks::Bot.all.each do |bot|
bot.start
end
end

View file

@ -7,7 +7,6 @@ class TestBot < Ebooks::Bot
attr_accessor :twitter
def configure
self.username = "test_ebooks"
end
def on_direct_message(dm)
@ -84,7 +83,7 @@ end
describe Ebooks::Bot do
include Ebooks::Test
let(:bot) { TestBot.new }
let(:bot) { TestBot.new('test_ebooks') }
before { Timecop.freeze }
after { Timecop.return }

File diff suppressed because it is too large Load diff

View file

@ -1,18 +0,0 @@
#!/usr/bin/env ruby
# encoding: utf-8
require 'twitter_ebooks'
require 'minitest/autorun'
require 'benchmark'
module Ebooks
class TestKeywords < Minitest::Test
corpus = NLP.normalize(File.read(ARGV[0]))
puts "Finding and ranking keywords"
puts Benchmark.measure {
NLP.keywords(corpus).top(50).each do |keyword|
puts "#{keyword.text} #{keyword.weight}"
end
}
end
end

View file

@ -1,18 +0,0 @@
#!/usr/bin/env ruby
# encoding: utf-8
require 'twitter_ebooks'
require 'minitest/autorun'
module Ebooks
class TestTokenize < Minitest::Test
corpus = NLP.normalize(File.read(TEST_CORPUS_PATH))
sents = NLP.sentences(corpus).sample(10)
NLP.sentences(corpus).sample(10).each do |sent|
p sent
p NLP.tokenize(sent)
puts
end
end
end