2013-11-24 13:16:34 -08:00
#!/usr/bin/env ruby
# encoding: utf-8
require 'twitter'
require 'json'
2013-12-02 22:46:23 -08:00
CONFIG_PATH = " #{ ENV [ 'HOME' ] } /.ebooksrc "
2013-11-24 13:16:34 -08:00
module Ebooks
class Archive
attr_reader :tweets
def make_client
if File . exists? ( CONFIG_PATH )
@config = JSON . parse ( File . read ( CONFIG_PATH ) , symbolize_names : true )
else
@config = { }
puts " As Twitter no longer allows anonymous API access, you'll need to enter the auth details of any account to use for archiving. These will be stored in #{ CONFIG_PATH } if you need to change them later. "
print " Consumer key: "
@config [ :consumer_key ] = STDIN . gets . chomp
print " Consumer secret: "
@config [ :consumer_secret ] = STDIN . gets . chomp
2014-11-15 03:55:32 +11:00
print " Access token: "
2013-11-24 13:16:34 -08:00
@config [ :oauth_token ] = STDIN . gets . chomp
2014-11-15 03:55:32 +11:00
print " Access secret: "
2013-11-24 13:16:34 -08:00
@config [ :oauth_token_secret ] = STDIN . gets . chomp
File . open ( CONFIG_PATH , 'w' ) do | f |
f . write ( JSON . pretty_generate ( @config ) )
end
end
2014-10-27 13:52:35 +01:00
Twitter :: REST :: Client . new do | config |
2013-11-24 13:16:34 -08:00
config . consumer_key = @config [ :consumer_key ]
config . consumer_secret = @config [ :consumer_secret ]
2014-11-15 03:55:32 +11:00
config . access_token = @config [ :oauth_token ]
config . access_token_secret = @config [ :oauth_token_secret ]
2013-11-24 13:16:34 -08:00
end
end
2014-12-05 22:57:32 +11:00
def initialize ( username , path = nil , client = nil )
2013-11-24 13:16:34 -08:00
@username = username
2014-12-05 22:57:32 +11:00
@path = path || " corpus/ #{ username } .json "
if File . directory? ( @path )
@path = File . join ( @path , " #{ username } .json " )
end
2013-11-24 13:16:34 -08:00
@client = client || make_client
2016-02-22 23:12:25 +08:00
if ( File . exists? ( @path ) && ! File . zero? ( @path ) )
2016-01-13 00:58:38 -08:00
@filetext = File . read ( @path , :encoding = > 'utf-8' )
@tweets = JSON . parse ( @filetext , symbolize_names : true )
2013-11-24 13:16:34 -08:00
log " Currently #{ @tweets . length } tweets for #{ @username } "
else
@tweets . nil?
log " New archive for @ #{ username } at #{ @path } "
end
end
def sync
2016-01-13 00:58:38 -08:00
# We use this structure to ensure that
# a) if there's an issue opening the file, we error out before download
# b) if there's an issue during download we restore the original
File . open ( @path , 'w' ) do | file |
begin
sync_to ( file )
rescue Exception
file . seek ( 0 )
file . write ( @filetext )
raise
end
end
end
def sync_to ( file )
2013-11-24 13:16:34 -08:00
retries = 0
tweets = [ ]
max_id = nil
opts = {
count : 200 ,
#include_rts: false,
trim_user : true
}
opts [ :since_id ] = @tweets [ 0 ] [ :id ] unless @tweets . nil?
loop do
opts [ :max_id ] = max_id unless max_id . nil?
2014-12-06 00:07:31 +11:00
begin
new = @client . user_timeline ( @username , opts )
rescue Twitter :: Error :: TooManyRequests
log " Rate limit exceeded. Waiting for 5 mins before retry. "
sleep 60 * 5
retry
end
2013-11-24 13:16:34 -08:00
break if new . length < = 1
tweets += new
2014-12-06 00:07:31 +11:00
log " Received #{ tweets . length } new tweets "
2013-11-24 13:16:34 -08:00
max_id = new . last . id
end
if tweets . length == 0
log " No new tweets "
else
@tweets || = [ ]
@tweets = tweets . map ( & :attrs ) . each { | tw |
tw . delete ( :entities )
} + @tweets
end
2016-01-15 15:59:16 -08:00
file . write ( JSON . pretty_generate ( @tweets ) )
2013-11-24 13:16:34 -08:00
end
end
end