2.1.3 - better archiver
This commit is contained in:
		
							parent
							
								
									c3053e5091
								
							
						
					
					
						commit
						acc2f42b38
					
				
					 7 changed files with 124 additions and 90 deletions
				
			
		
							
								
								
									
										93
									
								
								lib/twitter_ebooks/archive.rb
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										93
									
								
								lib/twitter_ebooks/archive.rb
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,93 @@
 | 
			
		|||
#!/usr/bin/env ruby
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
 | 
			
		||||
require 'twitter'
 | 
			
		||||
require 'json'
 | 
			
		||||
 | 
			
		||||
CONFIG_PATH = "/home/#{ENV['USER']}/.ebooksrc"
 | 
			
		||||
 | 
			
		||||
module Ebooks
 | 
			
		||||
  class Archive
 | 
			
		||||
    attr_reader :tweets
 | 
			
		||||
 | 
			
		||||
    def make_client
 | 
			
		||||
      if File.exists?(CONFIG_PATH)
 | 
			
		||||
        @config = JSON.parse(File.read(CONFIG_PATH), symbolize_names: true)
 | 
			
		||||
      else
 | 
			
		||||
        @config = {}
 | 
			
		||||
 | 
			
		||||
        puts "As Twitter no longer allows anonymous API access, you'll need to enter the auth details of any account to use for archiving. These will be stored in #{CONFIG_PATH} if you need to change them later."
 | 
			
		||||
        print "Consumer key: "
 | 
			
		||||
        @config[:consumer_key] = STDIN.gets.chomp
 | 
			
		||||
        print "Consumer secret: "
 | 
			
		||||
        @config[:consumer_secret] = STDIN.gets.chomp
 | 
			
		||||
        print "Oauth token: "
 | 
			
		||||
        @config[:oauth_token] = STDIN.gets.chomp
 | 
			
		||||
        print "Oauth secret: "
 | 
			
		||||
        @config[:oauth_token_secret] = STDIN.gets.chomp
 | 
			
		||||
 | 
			
		||||
        File.open(CONFIG_PATH, 'w') do |f|
 | 
			
		||||
          f.write(JSON.pretty_generate(@config))
 | 
			
		||||
        end
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      Twitter.configure do |config|
 | 
			
		||||
        config.consumer_key = @config[:consumer_key]
 | 
			
		||||
        config.consumer_secret = @config[:consumer_secret]
 | 
			
		||||
        config.oauth_token = @config[:oauth_token]
 | 
			
		||||
        config.oauth_token_secret = @config[:oauth_token_secret]
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      Twitter::Client.new
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def initialize(username, path, client=nil)
 | 
			
		||||
      @username = username
 | 
			
		||||
      @path = path || "#{username}.json"
 | 
			
		||||
      @client = client || make_client
 | 
			
		||||
 | 
			
		||||
      if File.exists?(@path)
 | 
			
		||||
        @tweets = JSON.parse(File.read(@path), symbolize_names: true)
 | 
			
		||||
        log "Currently #{@tweets.length} tweets for #{@username}"
 | 
			
		||||
      else
 | 
			
		||||
        @tweets.nil?
 | 
			
		||||
        log "New archive for @#{username} at #{@path}"
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def sync
 | 
			
		||||
      retries = 0
 | 
			
		||||
      tweets = []
 | 
			
		||||
      max_id = nil
 | 
			
		||||
 | 
			
		||||
      opts = {
 | 
			
		||||
        count: 200,
 | 
			
		||||
        #include_rts: false,
 | 
			
		||||
        trim_user: true
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      opts[:since_id] = @tweets[0][:id] unless @tweets.nil?
 | 
			
		||||
 | 
			
		||||
      loop do
 | 
			
		||||
        opts[:max_id] = max_id unless max_id.nil?
 | 
			
		||||
        new = @client.user_timeline(@username, opts)
 | 
			
		||||
        break if new.length <= 1
 | 
			
		||||
        tweets += new
 | 
			
		||||
        puts "Received #{tweets.length} new tweets"
 | 
			
		||||
        max_id = new.last.id
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      if tweets.length == 0
 | 
			
		||||
        log "No new tweets"
 | 
			
		||||
      else
 | 
			
		||||
        @tweets ||= []
 | 
			
		||||
        @tweets = tweets.map(&:attrs).each { |tw|
 | 
			
		||||
          tw.delete(:entities)
 | 
			
		||||
        } + @tweets
 | 
			
		||||
        File.open(@path, 'w') do |f|
 | 
			
		||||
          f.write(JSON.pretty_generate(@tweets))
 | 
			
		||||
        end
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			@ -1,82 +0,0 @@
 | 
			
		|||
#!/usr/bin/env ruby
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
 | 
			
		||||
require 'twitter'
 | 
			
		||||
 | 
			
		||||
module Ebooks
 | 
			
		||||
  class Archiver
 | 
			
		||||
    def initialize(username, outpath)
 | 
			
		||||
      @username = username
 | 
			
		||||
      @outpath = outpath
 | 
			
		||||
      @client = Twitter::Client.new
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    # Read exiting corpus into memory.
 | 
			
		||||
    # Return list of tweet lines and the last tweet id.
 | 
			
		||||
    def read_corpus
 | 
			
		||||
      lines = []
 | 
			
		||||
      since_id = nil
 | 
			
		||||
 | 
			
		||||
      if File.exists?(@outpath)
 | 
			
		||||
        lines = File.read(@outpath).split("\n")
 | 
			
		||||
        if lines[0].start_with?('#')
 | 
			
		||||
          since_id = lines[0].split('# ').last
 | 
			
		||||
        end
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      [lines, since_id]
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    # Retrieve all available tweets for a given user since the last tweet id
 | 
			
		||||
    def tweets_since(since_id)
 | 
			
		||||
      page = 1
 | 
			
		||||
      retries = 0
 | 
			
		||||
      tweets = []
 | 
			
		||||
      max_id = nil
 | 
			
		||||
 | 
			
		||||
      opts = {
 | 
			
		||||
        count: 200,
 | 
			
		||||
        include_rts: false,
 | 
			
		||||
        trim_user: true
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      opts[:since_id] = since_id unless since_id.nil?
 | 
			
		||||
 | 
			
		||||
      loop do
 | 
			
		||||
        opts[:max_id] = max_id unless max_id.nil?
 | 
			
		||||
        new = @client.user_timeline(@username, opts)
 | 
			
		||||
        break if new.length <= 1
 | 
			
		||||
        puts "Received #{new.length} tweets"
 | 
			
		||||
        tweets += new
 | 
			
		||||
        max_id = new.last.id
 | 
			
		||||
        break
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      tweets
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def fetch_tweets
 | 
			
		||||
      lines, since_id = read_corpus
 | 
			
		||||
 | 
			
		||||
      if since_id.nil?
 | 
			
		||||
        puts "Retrieving tweets from @#{@username}"
 | 
			
		||||
      else
 | 
			
		||||
        puts "Retrieving tweets from @#{@username} since #{since_id}"
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      tweets = tweets_since(since_id)
 | 
			
		||||
 | 
			
		||||
      if tweets.length == 0
 | 
			
		||||
        puts "No new tweets"
 | 
			
		||||
        return
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      new_lines = tweets.map { |tweet| tweet.text.gsub("\n", " ") }
 | 
			
		||||
      new_since_id = tweets[0].id.to_s
 | 
			
		||||
      lines = ["# " + new_since_id] + new_lines + lines
 | 
			
		||||
      corpus = File.open(@outpath, 'w')
 | 
			
		||||
      corpus.write(lines.join("\n"))
 | 
			
		||||
      corpus.close
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			@ -1,3 +1,3 @@
 | 
			
		|||
module Ebooks
 | 
			
		||||
  VERSION = "2.1.2"
 | 
			
		||||
  VERSION = "2.1.3"
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue