twitter-ebooks/lib/twitter_ebooks/archive.rb

117 lines
3.2 KiB
Ruby
Raw Normal View History

2013-11-24 13:16:34 -08:00
#!/usr/bin/env ruby
# encoding: utf-8
require 'twitter'
require 'json'
2013-12-02 22:46:23 -08:00
CONFIG_PATH = "#{ENV['HOME']}/.ebooksrc"
2013-11-24 13:16:34 -08:00
module Ebooks
class Archive
attr_reader :tweets
def make_client
if File.exists?(CONFIG_PATH)
@config = JSON.parse(File.read(CONFIG_PATH), symbolize_names: true)
else
@config = {}
puts "As Twitter no longer allows anonymous API access, you'll need to enter the auth details of any account to use for archiving. These will be stored in #{CONFIG_PATH} if you need to change them later."
print "Consumer key: "
@config[:consumer_key] = STDIN.gets.chomp
print "Consumer secret: "
@config[:consumer_secret] = STDIN.gets.chomp
print "Access token: "
2013-11-24 13:16:34 -08:00
@config[:oauth_token] = STDIN.gets.chomp
print "Access secret: "
2013-11-24 13:16:34 -08:00
@config[:oauth_token_secret] = STDIN.gets.chomp
File.open(CONFIG_PATH, 'w') do |f|
f.write(JSON.pretty_generate(@config))
end
end
2014-10-27 13:52:35 +01:00
Twitter::REST::Client.new do |config|
2013-11-24 13:16:34 -08:00
config.consumer_key = @config[:consumer_key]
config.consumer_secret = @config[:consumer_secret]
config.access_token = @config[:oauth_token]
config.access_token_secret = @config[:oauth_token_secret]
2013-11-24 13:16:34 -08:00
end
end
2014-12-05 22:57:32 +11:00
def initialize(username, path=nil, client=nil)
2013-11-24 13:16:34 -08:00
@username = username
2014-12-05 22:57:32 +11:00
@path = path || "corpus/#{username}.json"
if File.directory?(@path)
@path = File.join(@path, "#{username}.json")
end
2013-11-24 13:16:34 -08:00
@client = client || make_client
if (File.exists?(@path) && !File.zero?(@path))
@filetext = File.read(@path, :encoding => 'utf-8')
@tweets = JSON.parse(@filetext, symbolize_names: true)
2013-11-24 13:16:34 -08:00
log "Currently #{@tweets.length} tweets for #{@username}"
else
@tweets.nil?
log "New archive for @#{username} at #{@path}"
end
end
def sync
# We use this structure to ensure that
# a) if there's an issue opening the file, we error out before download
# b) if there's an issue during download we restore the original
File.open(@path, 'w') do |file|
begin
sync_to(file)
rescue Exception
file.seek(0)
file.write(@filetext)
raise
end
end
end
def sync_to(file)
2013-11-24 13:16:34 -08:00
retries = 0
tweets = []
max_id = nil
opts = {
count: 200,
#include_rts: false,
trim_user: true
}
opts[:since_id] = @tweets[0][:id] unless @tweets.nil?
loop do
opts[:max_id] = max_id unless max_id.nil?
begin
new = @client.user_timeline(@username, opts)
rescue Twitter::Error::TooManyRequests
log "Rate limit exceeded. Waiting for 5 mins before retry."
sleep 60*5
retry
end
2013-11-24 13:16:34 -08:00
break if new.length <= 1
tweets += new
log "Received #{tweets.length} new tweets"
2013-11-24 13:16:34 -08:00
max_id = new.last.id
end
if tweets.length == 0
log "No new tweets"
else
@tweets ||= []
@tweets = tweets.map(&:attrs).each { |tw|
tw.delete(:entities)
} + @tweets
end
file.write(JSON.pretty_generate(@tweets))
2013-11-24 13:16:34 -08:00
end
end
end