Conversation-based bot detection and politeness

This commit is contained in:
Jaiden Mispy 2014-12-05 19:04:15 +11:00
parent 8708aaa3e3
commit efde0fd16f
2 changed files with 80 additions and 65 deletions

View file

@ -12,62 +12,50 @@ module Ebooks
# number of times we've interacted with a timeline tweet, unprompted # number of times we've interacted with a timeline tweet, unprompted
attr_accessor :pesters_left attr_accessor :pesters_left
# number of times we've included them in a mention that wasn't from them
attr_accessor :includes_left
def initialize(username) def initialize(username)
@username = username @username = username
@pesters_left = 1 @pesters_left = 1
@includes_left = 2
end end
def can_pester? def can_pester?
@pesters_left > 0 @pesters_left > 0
end end
def can_include?
@includes_left > 0
end
end end
# Represents a current "interaction state" with another user # Represents a single reply tree of tweets
class Interaction class Conversation
attr_reader :userinfo, :received, :last_update attr_reader :last_update
def initialize(userinfo) def initialize(bot)
@userinfo = userinfo @bot = bot
@received = [] @tweets = []
@last_update = Time.now @last_update = Time.now
end end
def receive(tweet) def add(tweet)
@received << tweet @tweets << tweet
@last_update = Time.now @last_update = Time.now
# When we receive a tweet from someone, become more
# inclined to pester them and include in mentions
@userinfo.pesters_left += 1
@userinfo.includes_left += 2
end end
# Make an informed guess as to whether this user is a bot # Make an informed guess as to whether a user is a bot based
# based on its username and reply speed # on their behavior in this conversation
def is_bot? def is_bot?(username)
if @received.length > 2 usertweets = @tweets.select { |t| t.user.screen_name == username }
if (@received[-1].created_at - @received[-3].created_at) < 30
if usertweets.length > 2
if (usertweets[-1].created_at - usertweets[-3].created_at) < 30
return true return true
end end
end end
@userinfo.username.include?("ebooks") username.include?("ebooks")
end end
def continue? # Figure out whether to keep this user in the reply prefix
if is_bot? # We want to avoid spamming non-participating users
true if @received.length < 2 def can_include?(username)
else @tweets.length <= 4 ||
true !@tweets[-4..-1].select { |t| t.user.screen_name == username }.empty?
end
end end
end end
@ -99,7 +87,7 @@ module Ebooks
# i.e. not self and nobody who has seen too many secondary mentions # i.e. not self and nobody who has seen too many secondary mentions
reply_mentions = @mentions.reject do |m| reply_mentions = @mentions.reject do |m|
username = m.downcase username = m.downcase
username == @bot.username || !@bot.userinfo(username).can_include? username == @bot.username || !@bot.conversation(ev).can_include?(username)
end end
@reply_mentions = ([ev.user.screen_name] + reply_mentions).uniq @reply_mentions = ([ev.user.screen_name] + reply_mentions).uniq
@ -130,6 +118,8 @@ module Ebooks
# Configuration # Configuration
attr_accessor :username, :delay_range, :blacklist attr_accessor :username, :delay_range, :blacklist
attr_accessor :conversations
@@all = [] # List of all defined bots @@all = [] # List of all defined bots
def self.all; @@all; end def self.all; @@all; end
@ -148,7 +138,7 @@ module Ebooks
@delay_range ||= 0 @delay_range ||= 0
@users ||= {} @users ||= {}
@interactions ||= {} @conversations ||= {}
configure(*args, &b) configure(*args, &b)
# Tweet ids we've already observed, to avoid duplication # Tweet ids we've already observed, to avoid duplication
@ -160,13 +150,29 @@ module Ebooks
@users[username] ||= UserInfo.new(username) @users[username] ||= UserInfo.new(username)
end end
def interaction(username) # Grab or create the conversation context for this tweet
if @interactions[username] && def conversation(tweet)
Time.now - @interactions[username].last_update < 600 conv = if tweet.in_reply_to_status_id?
@interactions[username] @conversations[tweet.in_reply_to_status_id]
else
@interactions[username] = Interaction.new(userinfo(username))
end end
if conv.nil?
conv = @conversations[tweet.id] || Conversation.new(self)
end
if tweet.in_reply_to_status_id?
@conversations[tweet.in_reply_to_status_id] = conv
end
@conversations[tweet.id] = conv
# Expire any old conversations to prevent memory growth
@conversations.each do |k,v|
if v != conv && Time.now - v.last_update > 3600
@conversations.delete(k)
end
end
conv
end end
def twitter def twitter
@ -222,6 +228,7 @@ module Ebooks
# Avoid responding to duplicate tweets # Avoid responding to duplicate tweets
if @seen_tweets[ev.id] if @seen_tweets[ev.id]
log "Not firing event for duplicate tweet #{ev.id}"
return return
else else
@seen_tweets[ev.id] = true @seen_tweets[ev.id] = true
@ -229,7 +236,7 @@ module Ebooks
if meta.mentions_bot? if meta.mentions_bot?
log "Mention from @#{ev.user.screen_name}: #{ev.text}" log "Mention from @#{ev.user.screen_name}: #{ev.text}"
interaction(ev.user.screen_name).receive(ev) conversation(ev).add(ev)
fire(:mention, ev, meta) fire(:mention, ev, meta)
else else
fire(:timeline, ev, meta) fire(:timeline, ev, meta)
@ -292,13 +299,12 @@ module Ebooks
opts = opts.clone opts = opts.clone
if ev.is_a? Twitter::DirectMessage if ev.is_a? Twitter::DirectMessage
return if blacklisted?(ev.sender.screen_name)
log "Sending DM to @#{ev.sender.screen_name}: #{text}" log "Sending DM to @#{ev.sender.screen_name}: #{text}"
twitter.create_direct_message(ev.sender.screen_name, text, opts) twitter.create_direct_message(ev.sender.screen_name, text, opts)
elsif ev.is_a? Twitter::Tweet elsif ev.is_a? Twitter::Tweet
meta = calc_meta(ev) meta = calc_meta(ev)
if !interaction(ev.user.screen_name).continue? if conversation(ev).is_bot?(ev.user.screen_name)
log "Not replying to suspected bot @#{ev.user.screen_name}" log "Not replying to suspected bot @#{ev.user.screen_name}"
return return
end end
@ -310,16 +316,9 @@ module Ebooks
end end
end end
meta.reply_mentions.each do |username|
# Decrease includes_left for everyone involved here who isn't
# directly talking to the bot
if !meta.mentions_bot? || username != ev.user.screen_name
userinfo(username).includes_left -= 1
end
end
log "Replying to @#{ev.user.screen_name} with: #{meta.reply_prefix + text}" log "Replying to @#{ev.user.screen_name} with: #{meta.reply_prefix + text}"
twitter.update(meta.reply_prefix + text, in_reply_to_status_id: ev.id) tweet = twitter.update(meta.reply_prefix + text, in_reply_to_status_id: ev.id)
conversation(tweet).add(tweet)
else else
raise Exception("Don't know how to reply to a #{ev.class}") raise Exception("Don't know how to reply to a #{ev.class}")
end end
@ -329,11 +328,6 @@ module Ebooks
return if blacklisted?(tweet.user.screen_name) return if blacklisted?(tweet.user.screen_name)
log "Favoriting @#{tweet.user.screen_name}: #{tweet.text}" log "Favoriting @#{tweet.user.screen_name}: #{tweet.text}"
meta = calc_meta(tweet)
#if !meta[:mentions_bot] && !userinfo(ev.user.screen_name).can_pester?
# log "Not favoriting: leaving @#{ev.user.screen_name} alone"
#end
begin begin
twitter.favorite(tweet.id) twitter.favorite(tweet.id)
rescue Twitter::Error::Forbidden rescue Twitter::Error::Forbidden
@ -342,7 +336,6 @@ module Ebooks
end end
def retweet(tweet) def retweet(tweet)
return if blacklisted?(tweet.user.screen_name)
log "Retweeting @#{tweet.user.screen_name}: #{tweet.text}" log "Retweeting @#{tweet.user.screen_name}: #{tweet.text}"
begin begin

View file

@ -43,10 +43,11 @@ module Ebooks::Test
# Creates a mock tweet # Creates a mock tweet
# @param username User sending the tweet # @param username User sending the tweet
# @param text Tweet content # @param text Tweet content
def mock_tweet(username, text) def mock_tweet(username, text, extra={})
mentions = text.split.find_all { |x| x.start_with?('@') } mentions = text.split.find_all { |x| x.start_with?('@') }
Twitter::Tweet.new( tweet = Twitter::Tweet.new({
id: twitter_id, id: twitter_id,
in_reply_to_status_id: 'mock-link',
user: { id: twitter_id, screen_name: username }, user: { id: twitter_id, screen_name: username },
text: text, text: text,
created_at: Time.now.to_s, created_at: Time.now.to_s,
@ -56,22 +57,29 @@ module Ebooks::Test
indices: [text.index(m), text.index(m)+m.length] } indices: [text.index(m), text.index(m)+m.length] }
} }
} }
) }.merge!(extra))
tweet
end
def twitter_spy(bot)
twitter = spy("twitter")
allow(twitter).to receive(:update).and_return(mock_tweet(bot.username, "test tweet"))
twitter
end end
def simulate(bot, &b) def simulate(bot, &b)
bot.twitter = spy("twitter") bot.twitter = twitter_spy(bot)
b.call b.call
end end
def expect_direct_message(bot, content) def expect_direct_message(bot, content)
expect(bot.twitter).to have_received(:create_direct_message).with(anything(), content, {}) expect(bot.twitter).to have_received(:create_direct_message).with(anything(), content, {})
bot.twitter = spy("twitter") bot.twitter = twitter_spy(bot)
end end
def expect_tweet(bot, content) def expect_tweet(bot, content)
expect(bot.twitter).to have_received(:update).with(content, anything()) expect(bot.twitter).to have_received(:update).with(content, anything())
bot.twitter = spy("twitter") bot.twitter = twitter_spy(bot)
end end
end end
@ -104,6 +112,20 @@ describe Ebooks::Bot do
end end
end end
it "links tweets to conversations correctly" do
tweet1 = mock_tweet("m1sp", "tweet 1", id: 1, in_reply_to_status_id: nil)
tweet2 = mock_tweet("m1sp", "tweet 2", id: 2, in_reply_to_status_id: 1)
tweet3 = mock_tweet("m1sp", "tweet 3", id: 3, in_reply_to_status_id: nil)
bot.conversation(tweet1).add(tweet1)
expect(bot.conversation(tweet2)).to eq(bot.conversation(tweet1))
bot.conversation(tweet2).add(tweet2)
expect(bot.conversation(tweet3)).to_not eq(bot.conversation(tweet2))
end
it "stops mentioning people after a certain limit" do it "stops mentioning people after a certain limit" do
simulate(bot) do simulate(bot) do
bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 1")) bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 1"))