Bot anti-bot measures

We assume a user is a bot if it has 'ebooks' in the name
or if it replies more than once in a 30-second window
This commit is contained in:
Jaiden Mispy 2014-11-18 12:00:34 +11:00
parent 8326fc19dd
commit 29beb23502
3 changed files with 209 additions and 67 deletions

View file

@ -49,13 +49,56 @@ module Ebooks
class ConfigurationError < Exception class ConfigurationError < Exception
end end
# UserInfo tracks some meta information for how much # We track how many unprompted interactions the bot has had with
# we've interacted with a user, and how much they've responded # each user and start dropping them from mentions after two in a row
class UserInfo class UserInfo
attr_accessor :times_bugged, :times_responded attr_reader :username
def initialize attr_accessor :pester_count
self.times_bugged = 0
self.times_responded = 0 def initialize(username)
@username = username
@pester_count = 0
end
def can_pester?
@pester_count < 2
end
end
# Represents a current "interaction state" with another user
class Interaction
attr_reader :userinfo, :received, :last_update
def initialize(userinfo)
@userinfo = userinfo
@received = []
@last_update = Time.now
end
def receive(tweet)
@received << tweet
@last_update = Time.now
@userinfo.pester_count = 0
end
# Make an informed guess as to whether this user is a bot
# based on its username and reply speed
def is_bot?
if @received.length > 1
if (@received[-1].created_at - @received[-2].created_at) < 30
return true
end
end
@userinfo.username.include?("ebooks")
end
def continue?
if is_bot?
true if @received.length < 2
else
true
end
end end
end end
@ -80,13 +123,27 @@ module Ebooks
STDOUT.flush STDOUT.flush
end end
def initialize def initialize(*args, &b)
@username ||= nil @username ||= nil
@blacklist ||= [] @blacklist ||= []
@delay_range ||= 0 @delay_range ||= 0
@users ||= {} @users ||= {}
configure @interactions ||= {}
configure(*args, &b)
end
def userinfo(username)
@users[username] ||= UserInfo.new(username)
end
def interaction(username)
if @interactions[username] &&
Time.now - @interactions[username].last_update < 600
@interactions[username]
else
@interactions[username] = Interaction.new(userinfo(username))
end
end end
def make_client def make_client
@ -112,18 +169,34 @@ module Ebooks
meta = {} meta = {}
meta[:mentions] = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] } meta[:mentions] = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
# To check if this is someone talking to us, ensure:
# - The tweet mentions list contains our username
# - The tweet is not being retweeted by somebody else
# - Or soft-retweeted by somebody else
meta[:mentions_bot] = meta[:mentions].map(&:downcase).include?(@username.downcase) && !ev.retweeted_status? && !ev.text.start_with?('RT ')
# Process mentions to figure out who to reply to
reply_mentions = meta[:mentions].reject { |m| m.downcase == @username.downcase } reply_mentions = meta[:mentions].reject { |m| m.downcase == @username.downcase }
reply_mentions = [ev.user.screen_name] + reply_mentions reply_mentions = reply_mentions.select { |username| userinfo(username).can_pester? }
meta[:reply_mentions] = [ev.user.screen_name] + reply_mentions
# Don't reply to more than three users at a time meta[:reply_prefix] = meta[:reply_mentions].uniq.map { |m| '@'+m }.join(' ') + ' '
if reply_mentions.length > 3
log "Truncating reply_mentions to the first three users"
reply_mentions = reply_mentions[0..2]
end
meta[:reply_prefix] = reply_mentions.uniq.map { |m| '@'+m }.join(' ') + ' '
meta[:limit] = 140 - meta[:reply_prefix].length meta[:limit] = 140 - meta[:reply_prefix].length
mless = ev.text
begin
ev.attrs[:entities][:user_mentions].reverse.each do |entity|
last = mless[entity[:indices][1]..-1]||''
mless = mless[0...entity[:indices][0]] + last.strip
end
rescue Exception
p ev.attrs[:entities][:user_mentions]
p ev.text
raise
end
meta[:mentionless] = mless
meta meta
end end
@ -150,29 +223,14 @@ module Ebooks
meta = calc_meta(ev) meta = calc_meta(ev)
mless = ev.text if meta[:mentions_bot]
begin
ev.attrs[:entities][:user_mentions].reverse.each do |entity|
last = mless[entity[:indices][1]..-1]||''
mless = mless[0...entity[:indices][0]] + last.strip
end
rescue Exception
p ev.attrs[:entities][:user_mentions]
p ev.text
raise
end
meta[:mentionless] = mless
# To check if this is a mention, ensure:
# - The tweet mentions list contains our username
# - The tweet is not being retweeted by somebody else
# - Or soft-retweeted by somebody else
if meta[:mentions].map(&:downcase).include?(@username.downcase) && !ev.retweeted_status? && !ev.text.start_with?('RT ')
log "Mention from @#{ev.user.screen_name}: #{ev.text}" log "Mention from @#{ev.user.screen_name}: #{ev.text}"
interaction(ev.user.screen_name).receive(ev)
fire(:mention, ev, meta) fire(:mention, ev, meta)
else else
fire(:timeline, ev, meta) fire(:timeline, ev, meta)
end end
elsif ev.is_a? Twitter::Streaming::DeletedTweet elsif ev.is_a? Twitter::Streaming::DeletedTweet
# pass # pass
else else
@ -242,9 +300,20 @@ module Ebooks
elsif ev.is_a? Twitter::Tweet elsif ev.is_a? Twitter::Tweet
meta = calc_meta(ev) meta = calc_meta(ev)
return if blacklisted?(ev.user.screen_name) if blacklisted?(ev.user.screen_name)
log "Replying to @#{ev.user.screen_name} with: #{text}" log "Not replying to blacklisted user @#{ev.user.screen_name}"
return
elsif !interaction(ev.user.screen_name).continue?
log "Not replying to suspected bot @#{ev.user.screen_name}"
return
end
log "Replying to @#{ev.user.screen_name} with: #{meta[:reply_prefix] + text}"
@twitter.update(meta[:reply_prefix] + text, in_reply_to_status_id: ev.id) @twitter.update(meta[:reply_prefix] + text, in_reply_to_status_id: ev.id)
meta[:reply_mentions].each do |username|
userinfo(username).pester_count += 1
end
else else
raise Exception("Don't know how to reply to a #{ev.class}") raise Exception("Don't know how to reply to a #{ev.class}")
end end

View file

@ -1,6 +1,7 @@
require 'spec_helper' require 'spec_helper'
require 'memory_profiler' require 'memory_profiler'
require 'tempfile' require 'tempfile'
require 'timecop'
def Process.rss; `ps -o rss= -p #{Process.pid}`.chomp.to_i; end def Process.rss; `ps -o rss= -p #{Process.pid}`.chomp.to_i; end
@ -24,22 +25,31 @@ class TestBot < Ebooks::Bot
end end
end end
module Ebooks::Test
# Generates a random twitter id
def twitter_id def twitter_id
533295311591337984 (rand*10**18).to_i
end end
# Creates a mock direct message
# @param username User sending the DM
# @param text DM content
def mock_dm(username, text) def mock_dm(username, text)
Twitter::DirectMessage.new(id: twitter_id, Twitter::DirectMessage.new(id: twitter_id,
sender: { id: twitter_id, screen_name: username}, sender: { id: twitter_id, screen_name: username},
text: text) text: text)
end end
# Creates a mock tweet
# @param username User sending the tweet
# @param text Tweet content
def mock_tweet(username, text) def mock_tweet(username, text)
mentions = text.split.find_all { |x| x.start_with?('@') } mentions = text.split.find_all { |x| x.start_with?('@') }
Twitter::Tweet.new( Twitter::Tweet.new(
id: twitter_id, id: twitter_id,
user: { id: twitter_id, screen_name: username }, user: { id: twitter_id, screen_name: username },
text: text, text: text,
created_at: Time.now.to_s,
entities: { entities: {
user_mentions: mentions.map { |m| user_mentions: mentions.map { |m|
{ screen_name: m.split('@')[1], { screen_name: m.split('@')[1],
@ -49,27 +59,89 @@ def mock_tweet(username, text)
) )
end end
def simulate(bot, &b)
bot.twitter = spy("twitter")
b.call
end
def expect_direct_message(bot, content)
expect(bot.twitter).to have_received(:create_direct_message).with(anything(), content, {})
bot.twitter = spy("twitter")
end
def expect_tweet(bot, content)
expect(bot.twitter).to have_received(:update).with(content, anything())
bot.twitter = spy("twitter")
end
end
describe Ebooks::Bot do describe Ebooks::Bot do
include Ebooks::Test
let(:bot) { TestBot.new } let(:bot) { TestBot.new }
before { Timecop.freeze }
after { Timecop.return }
it "responds to dms" do it "responds to dms" do
bot.twitter = double("twitter") simulate(bot) do
expect(bot.twitter).to receive(:create_direct_message).with("m1sp", "echo: this is a dm", {})
bot.receive_event(mock_dm("m1sp", "this is a dm")) bot.receive_event(mock_dm("m1sp", "this is a dm"))
expect_direct_message(bot, "echo: this is a dm")
end
end end
it "responds to mentions" do it "responds to mentions" do
bot.twitter = double("twitter") simulate(bot) do
expect(bot.twitter).to receive(:update).with("@m1sp echo: this is a mention",
in_reply_to_status_id: twitter_id)
bot.receive_event(mock_tweet("m1sp", "@test_ebooks this is a mention")) bot.receive_event(mock_tweet("m1sp", "@test_ebooks this is a mention"))
expect_tweet(bot, "@m1sp echo: this is a mention")
end
end end
it "responds to timeline tweets" do it "responds to timeline tweets" do
bot.twitter = double("twitter") simulate(bot) do
expect(bot.twitter).to receive(:update).with("@m1sp fine tweet good sir",
in_reply_to_status_id: twitter_id)
bot.receive_event(mock_tweet("m1sp", "some excellent tweet")) bot.receive_event(mock_tweet("m1sp", "some excellent tweet"))
expect_tweet(bot, "@m1sp fine tweet good sir")
end
end
it "stops mentioning people after a certain limit" do
simulate(bot) do
bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 1"))
expect_tweet(bot, "@spammer @m1sp echo: 1")
Timecop.travel(Time.now + 60)
bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 2"))
expect_tweet(bot, "@spammer @m1sp echo: 2")
Timecop.travel(Time.now + 60)
bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 3"))
expect_tweet(bot, "@spammer echo: 3")
end
end
it "doesn't stop mentioning them if they reply" do
simulate(bot) do
bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 4"))
expect_tweet(bot, "@spammer @m1sp echo: 4")
Timecop.travel(Time.now + 60)
bot.receive_event(mock_tweet("m1sp", "@spammer @test_ebooks 5"))
expect_tweet(bot, "@m1sp @spammer echo: 5")
Timecop.travel(Time.now + 60)
bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 6"))
expect_tweet(bot, "@spammer @m1sp echo: 6")
end
end
it "doesn't get into infinite bot conversations" do
simulate(bot) do
bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 7"))
expect_tweet(bot, "@spammer @m1sp echo: 7")
Timecop.travel(Time.now + 10)
bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 8"))
expect(bot.twitter).to_not have_received(:update)
end
end end
end end

View file

@ -19,6 +19,7 @@ Gem::Specification.new do |gem|
gem.add_development_dependency 'rspec-mocks' gem.add_development_dependency 'rspec-mocks'
gem.add_development_dependency 'memory_profiler' gem.add_development_dependency 'memory_profiler'
gem.add_development_dependency 'pry-byebug' gem.add_development_dependency 'pry-byebug'
gem.add_development_dependency 'timecop'
gem.add_runtime_dependency 'twitter', '~> 5.0' gem.add_runtime_dependency 'twitter', '~> 5.0'
gem.add_runtime_dependency 'simple_oauth' gem.add_runtime_dependency 'simple_oauth'