commit
						763bfe5efd
					
				
					 5 changed files with 77 additions and 15 deletions
				
			
		
							
								
								
									
										28
									
								
								bin/ebooks
									
										
									
									
									
								
							
							
						
						
									
										28
									
								
								bin/ebooks
									
										
									
									
									
								
							| 
						 | 
					@ -62,6 +62,32 @@ STR
 | 
				
			||||||
    end
 | 
					    end
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def self.consume_all(name, paths)
 | 
				
			||||||
 | 
					    usage = <<STR
 | 
				
			||||||
 | 
					Usage: ebooks consume-all <name> <corpus_path> [corpus_path2] [...]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Processes some number of text files or json tweet corpuses
 | 
				
			||||||
 | 
					into one usable model. It will be output at model/<name>.model
 | 
				
			||||||
 | 
					STR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if paths.empty?
 | 
				
			||||||
 | 
					      log usage
 | 
				
			||||||
 | 
					      exit
 | 
				
			||||||
 | 
					    end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    outpath = File.join(APP_PATH, 'model', "#{name}.model")
 | 
				
			||||||
 | 
					    #pathes.each do |path|
 | 
				
			||||||
 | 
					    #  filename = File.basename(path)
 | 
				
			||||||
 | 
					    #  shortname = filename.split('.')[0..-2].join('.')
 | 
				
			||||||
 | 
					    #
 | 
				
			||||||
 | 
					    #  outpath = File.join(APP_PATH, 'model', "#{shortname}.model")
 | 
				
			||||||
 | 
					    #  Model.consume(path).save(outpath)
 | 
				
			||||||
 | 
					    #  log "Corpus consumed to #{outpath}"
 | 
				
			||||||
 | 
					    #end
 | 
				
			||||||
 | 
					    Model.consume_all(paths).save(outpath)
 | 
				
			||||||
 | 
					    log "Corpuses consumed to #{outpath}"
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def self.gen(model_path, input)
 | 
					  def self.gen(model_path, input)
 | 
				
			||||||
    usage = <<STR
 | 
					    usage = <<STR
 | 
				
			||||||
Usage: ebooks gen <model_path> [input]
 | 
					Usage: ebooks gen <model_path> [input]
 | 
				
			||||||
| 
						 | 
					@ -187,6 +213,7 @@ STR
 | 
				
			||||||
Usage:
 | 
					Usage:
 | 
				
			||||||
     ebooks new <reponame>
 | 
					     ebooks new <reponame>
 | 
				
			||||||
     ebooks consume <corpus_path> [corpus_path2] [...]
 | 
					     ebooks consume <corpus_path> [corpus_path2] [...]
 | 
				
			||||||
 | 
					     ebooks consume-all <corpus_path> [corpus_path2] [...]
 | 
				
			||||||
     ebooks gen <model_path> [input]
 | 
					     ebooks gen <model_path> [input]
 | 
				
			||||||
     ebooks score <model_path> <input>
 | 
					     ebooks score <model_path> <input>
 | 
				
			||||||
     ebooks archive <@user> <outpath>
 | 
					     ebooks archive <@user> <outpath>
 | 
				
			||||||
| 
						 | 
					@ -202,6 +229,7 @@ STR
 | 
				
			||||||
    case args[0]
 | 
					    case args[0]
 | 
				
			||||||
    when "new" then new(args[1])
 | 
					    when "new" then new(args[1])
 | 
				
			||||||
    when "consume" then consume(args[1..-1])
 | 
					    when "consume" then consume(args[1..-1])
 | 
				
			||||||
 | 
					    when "consume-all" then consume_all(args[1], args[2..-1])
 | 
				
			||||||
    when "gen" then gen(args[1], args[2..-1].join(' '))
 | 
					    when "gen" then gen(args[1], args[2..-1].join(' '))
 | 
				
			||||||
    when "score" then score(args[1], args[2..-1].join(' '))
 | 
					    when "score" then score(args[1], args[2..-1].join(' '))
 | 
				
			||||||
    when "archive" then archive(args[1], args[2])
 | 
					    when "archive" then archive(args[1], args[2])
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -31,14 +31,12 @@ module Ebooks
 | 
				
			||||||
        end
 | 
					        end
 | 
				
			||||||
      end
 | 
					      end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      Twitter.configure do |config|
 | 
					      Twitter::REST::Client.new do |config|
 | 
				
			||||||
        config.consumer_key = @config[:consumer_key]
 | 
					        config.consumer_key = @config[:consumer_key]
 | 
				
			||||||
        config.consumer_secret = @config[:consumer_secret]
 | 
					        config.consumer_secret = @config[:consumer_secret]
 | 
				
			||||||
        config.oauth_token = @config[:oauth_token]
 | 
					        config.oauth_token = @config[:oauth_token]
 | 
				
			||||||
        config.oauth_token_secret = @config[:oauth_token_secret]
 | 
					        config.oauth_token_secret = @config[:oauth_token_secret]
 | 
				
			||||||
      end
 | 
					      end
 | 
				
			||||||
 | 
					 | 
				
			||||||
      Twitter::Client.new
 | 
					 | 
				
			||||||
    end
 | 
					    end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def initialize(username, path, client=nil)
 | 
					    def initialize(username, path, client=nil)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -43,15 +43,13 @@ module Ebooks
 | 
				
			||||||
        config.oauth_token_secret = @oauth_token_secret
 | 
					        config.oauth_token_secret = @oauth_token_secret
 | 
				
			||||||
      end
 | 
					      end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      Twitter.configure do |config|
 | 
					      @twitter = Twitter::REST::Client.new do |config|
 | 
				
			||||||
        config.consumer_key = @consumer_key
 | 
					        config.consumer_key = @consumer_key
 | 
				
			||||||
        config.consumer_secret = @consumer_secret
 | 
					        config.consumer_secret = @consumer_secret
 | 
				
			||||||
        config.oauth_token = @oauth_token
 | 
					        config.oauth_token = @oauth_token
 | 
				
			||||||
        config.oauth_token_secret = @oauth_token_secret
 | 
					        config.oauth_token_secret = @oauth_token_secret
 | 
				
			||||||
      end
 | 
					      end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      @twitter = Twitter::Client.new
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      needs_stream = [@on_follow, @on_message, @on_mention, @on_timeline].any? {|e| !e.nil?}
 | 
					      needs_stream = [@on_follow, @on_message, @on_mention, @on_timeline].any? {|e| !e.nil?}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      @stream = TweetStream::Client.new if needs_stream
 | 
					      @stream = TweetStream::Client.new if needs_stream
 | 
				
			||||||
| 
						 | 
					@ -90,19 +88,19 @@ module Ebooks
 | 
				
			||||||
      end
 | 
					      end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      @stream.userstream do |ev|
 | 
					      @stream.userstream do |ev|
 | 
				
			||||||
        next unless ev[:text] # If it's not a text-containing tweet, ignore it
 | 
					        next unless ev.text # If it's not a text-containing tweet, ignore it
 | 
				
			||||||
        next if ev[:user][:screen_name] == @username # Ignore our own tweets
 | 
					        next if ev.user.screen_name == @username # Ignore our own tweets
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        meta = {}
 | 
					        meta = {}
 | 
				
			||||||
        mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
 | 
					        mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        reply_mentions = mentions.reject { |m| m.downcase == @username.downcase }
 | 
					        reply_mentions = mentions.reject { |m| m.downcase == @username.downcase }
 | 
				
			||||||
        reply_mentions = [ev[:user][:screen_name]] + reply_mentions
 | 
					        reply_mentions = [ev.user.screen_name] + reply_mentions
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        meta[:reply_prefix] = reply_mentions.uniq.map { |m| '@'+m }.join(' ') + ' '
 | 
					        meta[:reply_prefix] = reply_mentions.uniq.map { |m| '@'+m }.join(' ') + ' '
 | 
				
			||||||
        meta[:limit] = 140 - meta[:reply_prefix].length
 | 
					        meta[:limit] = 140 - meta[:reply_prefix].length
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        mless = ev[:text]
 | 
					        mless = ev.text
 | 
				
			||||||
        begin
 | 
					        begin
 | 
				
			||||||
          ev.attrs[:entities][:user_mentions].reverse.each do |entity|
 | 
					          ev.attrs[:entities][:user_mentions].reverse.each do |entity|
 | 
				
			||||||
            last = mless[entity[:indices][1]..-1]||''
 | 
					            last = mless[entity[:indices][1]..-1]||''
 | 
				
			||||||
| 
						 | 
					@ -119,8 +117,8 @@ module Ebooks
 | 
				
			||||||
        # - The tweet mentions list contains our username
 | 
					        # - The tweet mentions list contains our username
 | 
				
			||||||
        # - The tweet is not being retweeted by somebody else
 | 
					        # - The tweet is not being retweeted by somebody else
 | 
				
			||||||
        # - Or soft-retweeted by somebody else
 | 
					        # - Or soft-retweeted by somebody else
 | 
				
			||||||
        if mentions.map(&:downcase).include?(@username.downcase) && !ev[:retweeted_status] && !ev[:text].start_with?('RT ')
 | 
					        if mentions.map(&:downcase).include?(@username.downcase) && !ev.retweeted_status? && !ev.text.start_with?('RT ')
 | 
				
			||||||
          log "Mention from @#{ev[:user][:screen_name]}: #{ev[:text]}"
 | 
					          log "Mention from @#{ev.user.screen_name}: #{ev.text}"
 | 
				
			||||||
          @on_mention.call(ev, meta) if @on_mention
 | 
					          @on_mention.call(ev, meta) if @on_mention
 | 
				
			||||||
        else
 | 
					        else
 | 
				
			||||||
          @on_timeline.call(ev, meta) if @on_timeline
 | 
					          @on_timeline.call(ev, meta) if @on_timeline
 | 
				
			||||||
| 
						 | 
					@ -144,8 +142,8 @@ module Ebooks
 | 
				
			||||||
        log "Sending DM to @#{ev[:sender][:screen_name]}: #{text}"
 | 
					        log "Sending DM to @#{ev[:sender][:screen_name]}: #{text}"
 | 
				
			||||||
        @twitter.direct_message_create(ev[:sender][:screen_name], text, opts)
 | 
					        @twitter.direct_message_create(ev[:sender][:screen_name], text, opts)
 | 
				
			||||||
      elsif ev.is_a? Twitter::Tweet
 | 
					      elsif ev.is_a? Twitter::Tweet
 | 
				
			||||||
        log "Replying to @#{ev[:user][:screen_name]} with: #{text}"
 | 
					        log "Replying to @#{ev.user.screen_name} with: #{text}"
 | 
				
			||||||
        @twitter.update(text, in_reply_to_status_id: ev[:id])
 | 
					        @twitter.update(text, in_reply_to_status_id: ev.id)
 | 
				
			||||||
      else
 | 
					      else
 | 
				
			||||||
        raise Exception("Don't know how to reply to a #{ev.class}")
 | 
					        raise Exception("Don't know how to reply to a #{ev.class}")
 | 
				
			||||||
      end
 | 
					      end
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -14,6 +14,10 @@ module Ebooks
 | 
				
			||||||
      Model.new.consume(txtpath)
 | 
					      Model.new.consume(txtpath)
 | 
				
			||||||
    end
 | 
					    end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def self.consume_all(paths)
 | 
				
			||||||
 | 
					      Model.new.consume_all(paths)
 | 
				
			||||||
 | 
					    end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def self.load(path)
 | 
					    def self.load(path)
 | 
				
			||||||
      model = Model.new
 | 
					      model = Model.new
 | 
				
			||||||
      model.instance_eval do
 | 
					      model.instance_eval do
 | 
				
			||||||
| 
						 | 
					@ -87,6 +91,10 @@ module Ebooks
 | 
				
			||||||
        lines = content.split("\n")
 | 
					        lines = content.split("\n")
 | 
				
			||||||
      end
 | 
					      end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      consume_lines(lines)
 | 
				
			||||||
 | 
					    end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def consume_lines(lines)
 | 
				
			||||||
      log "Removing commented lines and sorting mentions"
 | 
					      log "Removing commented lines and sorting mentions"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      statements = []
 | 
					      statements = []
 | 
				
			||||||
| 
						 | 
					@ -118,6 +126,36 @@ module Ebooks
 | 
				
			||||||
      self
 | 
					      self
 | 
				
			||||||
    end
 | 
					    end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def consume_all(paths)
 | 
				
			||||||
 | 
					      lines = []
 | 
				
			||||||
 | 
					      paths.each do |path|
 | 
				
			||||||
 | 
					        content = File.read(path, :encoding => 'utf-8')
 | 
				
			||||||
 | 
					        @hash = Digest::MD5.hexdigest(content)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if path.split('.')[-1] == "json"
 | 
				
			||||||
 | 
					          log "Reading json corpus from #{path}"
 | 
				
			||||||
 | 
					          l = JSON.parse(content).map do |tweet|
 | 
				
			||||||
 | 
					            tweet['text']
 | 
				
			||||||
 | 
					          end
 | 
				
			||||||
 | 
					          lines.concat(l)
 | 
				
			||||||
 | 
					        elsif path.split('.')[-1] == "csv"
 | 
				
			||||||
 | 
					          log "Reading CSV corpus from #{path}"
 | 
				
			||||||
 | 
					          content = CSV.parse(content)
 | 
				
			||||||
 | 
					          header = content.shift
 | 
				
			||||||
 | 
					          text_col = header.index('text')
 | 
				
			||||||
 | 
					          l = content.map do |tweet|
 | 
				
			||||||
 | 
					            tweet[text_col]
 | 
				
			||||||
 | 
					          end
 | 
				
			||||||
 | 
					          lines.concat(l)
 | 
				
			||||||
 | 
					        else
 | 
				
			||||||
 | 
					          log "Reading plaintext corpus from #{path}"
 | 
				
			||||||
 | 
					          l = content.split("\n")
 | 
				
			||||||
 | 
					          lines.concat(l)
 | 
				
			||||||
 | 
					        end
 | 
				
			||||||
 | 
					      end
 | 
				
			||||||
 | 
					      consume_lines(lines)
 | 
				
			||||||
 | 
					    end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def fix(tweet)
 | 
					    def fix(tweet)
 | 
				
			||||||
      # This seems to require an external api call
 | 
					      # This seems to require an external api call
 | 
				
			||||||
      #begin
 | 
					      #begin
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -19,7 +19,7 @@ Gem::Specification.new do |gem|
 | 
				
			||||||
  gem.add_development_dependency 'memory_profiler'
 | 
					  gem.add_development_dependency 'memory_profiler'
 | 
				
			||||||
  gem.add_development_dependency 'pry-byebug'
 | 
					  gem.add_development_dependency 'pry-byebug'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  gem.add_runtime_dependency 'twitter', '~> 4.0'
 | 
					  gem.add_runtime_dependency 'twitter', '~> 5.0'
 | 
				
			||||||
  gem.add_runtime_dependency 'simple_oauth', '~> 0.2.0'
 | 
					  gem.add_runtime_dependency 'simple_oauth', '~> 0.2.0'
 | 
				
			||||||
  gem.add_runtime_dependency 'tweetstream'
 | 
					  gem.add_runtime_dependency 'tweetstream'
 | 
				
			||||||
  gem.add_runtime_dependency 'rufus-scheduler'
 | 
					  gem.add_runtime_dependency 'rufus-scheduler'
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue