commit
763bfe5efd
5 changed files with 77 additions and 15 deletions
28
bin/ebooks
28
bin/ebooks
|
@ -62,6 +62,32 @@ STR
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.consume_all(name, paths)
|
||||||
|
usage = <<STR
|
||||||
|
Usage: ebooks consume-all <name> <corpus_path> [corpus_path2] [...]
|
||||||
|
|
||||||
|
Processes some number of text files or json tweet corpuses
|
||||||
|
into one usable model. It will be output at model/<name>.model
|
||||||
|
STR
|
||||||
|
|
||||||
|
if paths.empty?
|
||||||
|
log usage
|
||||||
|
exit
|
||||||
|
end
|
||||||
|
|
||||||
|
outpath = File.join(APP_PATH, 'model', "#{name}.model")
|
||||||
|
#pathes.each do |path|
|
||||||
|
# filename = File.basename(path)
|
||||||
|
# shortname = filename.split('.')[0..-2].join('.')
|
||||||
|
#
|
||||||
|
# outpath = File.join(APP_PATH, 'model', "#{shortname}.model")
|
||||||
|
# Model.consume(path).save(outpath)
|
||||||
|
# log "Corpus consumed to #{outpath}"
|
||||||
|
#end
|
||||||
|
Model.consume_all(paths).save(outpath)
|
||||||
|
log "Corpuses consumed to #{outpath}"
|
||||||
|
end
|
||||||
|
|
||||||
def self.gen(model_path, input)
|
def self.gen(model_path, input)
|
||||||
usage = <<STR
|
usage = <<STR
|
||||||
Usage: ebooks gen <model_path> [input]
|
Usage: ebooks gen <model_path> [input]
|
||||||
|
@ -187,6 +213,7 @@ STR
|
||||||
Usage:
|
Usage:
|
||||||
ebooks new <reponame>
|
ebooks new <reponame>
|
||||||
ebooks consume <corpus_path> [corpus_path2] [...]
|
ebooks consume <corpus_path> [corpus_path2] [...]
|
||||||
|
ebooks consume-all <corpus_path> [corpus_path2] [...]
|
||||||
ebooks gen <model_path> [input]
|
ebooks gen <model_path> [input]
|
||||||
ebooks score <model_path> <input>
|
ebooks score <model_path> <input>
|
||||||
ebooks archive <@user> <outpath>
|
ebooks archive <@user> <outpath>
|
||||||
|
@ -202,6 +229,7 @@ STR
|
||||||
case args[0]
|
case args[0]
|
||||||
when "new" then new(args[1])
|
when "new" then new(args[1])
|
||||||
when "consume" then consume(args[1..-1])
|
when "consume" then consume(args[1..-1])
|
||||||
|
when "consume-all" then consume_all(args[1], args[2..-1])
|
||||||
when "gen" then gen(args[1], args[2..-1].join(' '))
|
when "gen" then gen(args[1], args[2..-1].join(' '))
|
||||||
when "score" then score(args[1], args[2..-1].join(' '))
|
when "score" then score(args[1], args[2..-1].join(' '))
|
||||||
when "archive" then archive(args[1], args[2])
|
when "archive" then archive(args[1], args[2])
|
||||||
|
|
|
@ -31,14 +31,12 @@ module Ebooks
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
Twitter.configure do |config|
|
Twitter::REST::Client.new do |config|
|
||||||
config.consumer_key = @config[:consumer_key]
|
config.consumer_key = @config[:consumer_key]
|
||||||
config.consumer_secret = @config[:consumer_secret]
|
config.consumer_secret = @config[:consumer_secret]
|
||||||
config.oauth_token = @config[:oauth_token]
|
config.oauth_token = @config[:oauth_token]
|
||||||
config.oauth_token_secret = @config[:oauth_token_secret]
|
config.oauth_token_secret = @config[:oauth_token_secret]
|
||||||
end
|
end
|
||||||
|
|
||||||
Twitter::Client.new
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize(username, path, client=nil)
|
def initialize(username, path, client=nil)
|
||||||
|
|
|
@ -43,15 +43,13 @@ module Ebooks
|
||||||
config.oauth_token_secret = @oauth_token_secret
|
config.oauth_token_secret = @oauth_token_secret
|
||||||
end
|
end
|
||||||
|
|
||||||
Twitter.configure do |config|
|
@twitter = Twitter::REST::Client.new do |config|
|
||||||
config.consumer_key = @consumer_key
|
config.consumer_key = @consumer_key
|
||||||
config.consumer_secret = @consumer_secret
|
config.consumer_secret = @consumer_secret
|
||||||
config.oauth_token = @oauth_token
|
config.oauth_token = @oauth_token
|
||||||
config.oauth_token_secret = @oauth_token_secret
|
config.oauth_token_secret = @oauth_token_secret
|
||||||
end
|
end
|
||||||
|
|
||||||
@twitter = Twitter::Client.new
|
|
||||||
|
|
||||||
needs_stream = [@on_follow, @on_message, @on_mention, @on_timeline].any? {|e| !e.nil?}
|
needs_stream = [@on_follow, @on_message, @on_mention, @on_timeline].any? {|e| !e.nil?}
|
||||||
|
|
||||||
@stream = TweetStream::Client.new if needs_stream
|
@stream = TweetStream::Client.new if needs_stream
|
||||||
|
@ -90,19 +88,19 @@ module Ebooks
|
||||||
end
|
end
|
||||||
|
|
||||||
@stream.userstream do |ev|
|
@stream.userstream do |ev|
|
||||||
next unless ev[:text] # If it's not a text-containing tweet, ignore it
|
next unless ev.text # If it's not a text-containing tweet, ignore it
|
||||||
next if ev[:user][:screen_name] == @username # Ignore our own tweets
|
next if ev.user.screen_name == @username # Ignore our own tweets
|
||||||
|
|
||||||
meta = {}
|
meta = {}
|
||||||
mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
|
mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
|
||||||
|
|
||||||
reply_mentions = mentions.reject { |m| m.downcase == @username.downcase }
|
reply_mentions = mentions.reject { |m| m.downcase == @username.downcase }
|
||||||
reply_mentions = [ev[:user][:screen_name]] + reply_mentions
|
reply_mentions = [ev.user.screen_name] + reply_mentions
|
||||||
|
|
||||||
meta[:reply_prefix] = reply_mentions.uniq.map { |m| '@'+m }.join(' ') + ' '
|
meta[:reply_prefix] = reply_mentions.uniq.map { |m| '@'+m }.join(' ') + ' '
|
||||||
meta[:limit] = 140 - meta[:reply_prefix].length
|
meta[:limit] = 140 - meta[:reply_prefix].length
|
||||||
|
|
||||||
mless = ev[:text]
|
mless = ev.text
|
||||||
begin
|
begin
|
||||||
ev.attrs[:entities][:user_mentions].reverse.each do |entity|
|
ev.attrs[:entities][:user_mentions].reverse.each do |entity|
|
||||||
last = mless[entity[:indices][1]..-1]||''
|
last = mless[entity[:indices][1]..-1]||''
|
||||||
|
@ -119,8 +117,8 @@ module Ebooks
|
||||||
# - The tweet mentions list contains our username
|
# - The tweet mentions list contains our username
|
||||||
# - The tweet is not being retweeted by somebody else
|
# - The tweet is not being retweeted by somebody else
|
||||||
# - Or soft-retweeted by somebody else
|
# - Or soft-retweeted by somebody else
|
||||||
if mentions.map(&:downcase).include?(@username.downcase) && !ev[:retweeted_status] && !ev[:text].start_with?('RT ')
|
if mentions.map(&:downcase).include?(@username.downcase) && !ev.retweeted_status? && !ev.text.start_with?('RT ')
|
||||||
log "Mention from @#{ev[:user][:screen_name]}: #{ev[:text]}"
|
log "Mention from @#{ev.user.screen_name}: #{ev.text}"
|
||||||
@on_mention.call(ev, meta) if @on_mention
|
@on_mention.call(ev, meta) if @on_mention
|
||||||
else
|
else
|
||||||
@on_timeline.call(ev, meta) if @on_timeline
|
@on_timeline.call(ev, meta) if @on_timeline
|
||||||
|
@ -144,8 +142,8 @@ module Ebooks
|
||||||
log "Sending DM to @#{ev[:sender][:screen_name]}: #{text}"
|
log "Sending DM to @#{ev[:sender][:screen_name]}: #{text}"
|
||||||
@twitter.direct_message_create(ev[:sender][:screen_name], text, opts)
|
@twitter.direct_message_create(ev[:sender][:screen_name], text, opts)
|
||||||
elsif ev.is_a? Twitter::Tweet
|
elsif ev.is_a? Twitter::Tweet
|
||||||
log "Replying to @#{ev[:user][:screen_name]} with: #{text}"
|
log "Replying to @#{ev.user.screen_name} with: #{text}"
|
||||||
@twitter.update(text, in_reply_to_status_id: ev[:id])
|
@twitter.update(text, in_reply_to_status_id: ev.id)
|
||||||
else
|
else
|
||||||
raise Exception("Don't know how to reply to a #{ev.class}")
|
raise Exception("Don't know how to reply to a #{ev.class}")
|
||||||
end
|
end
|
||||||
|
|
|
@ -14,6 +14,10 @@ module Ebooks
|
||||||
Model.new.consume(txtpath)
|
Model.new.consume(txtpath)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.consume_all(paths)
|
||||||
|
Model.new.consume_all(paths)
|
||||||
|
end
|
||||||
|
|
||||||
def self.load(path)
|
def self.load(path)
|
||||||
model = Model.new
|
model = Model.new
|
||||||
model.instance_eval do
|
model.instance_eval do
|
||||||
|
@ -87,6 +91,10 @@ module Ebooks
|
||||||
lines = content.split("\n")
|
lines = content.split("\n")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
consume_lines(lines)
|
||||||
|
end
|
||||||
|
|
||||||
|
def consume_lines(lines)
|
||||||
log "Removing commented lines and sorting mentions"
|
log "Removing commented lines and sorting mentions"
|
||||||
|
|
||||||
statements = []
|
statements = []
|
||||||
|
@ -118,6 +126,36 @@ module Ebooks
|
||||||
self
|
self
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def consume_all(paths)
|
||||||
|
lines = []
|
||||||
|
paths.each do |path|
|
||||||
|
content = File.read(path, :encoding => 'utf-8')
|
||||||
|
@hash = Digest::MD5.hexdigest(content)
|
||||||
|
|
||||||
|
if path.split('.')[-1] == "json"
|
||||||
|
log "Reading json corpus from #{path}"
|
||||||
|
l = JSON.parse(content).map do |tweet|
|
||||||
|
tweet['text']
|
||||||
|
end
|
||||||
|
lines.concat(l)
|
||||||
|
elsif path.split('.')[-1] == "csv"
|
||||||
|
log "Reading CSV corpus from #{path}"
|
||||||
|
content = CSV.parse(content)
|
||||||
|
header = content.shift
|
||||||
|
text_col = header.index('text')
|
||||||
|
l = content.map do |tweet|
|
||||||
|
tweet[text_col]
|
||||||
|
end
|
||||||
|
lines.concat(l)
|
||||||
|
else
|
||||||
|
log "Reading plaintext corpus from #{path}"
|
||||||
|
l = content.split("\n")
|
||||||
|
lines.concat(l)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
consume_lines(lines)
|
||||||
|
end
|
||||||
|
|
||||||
def fix(tweet)
|
def fix(tweet)
|
||||||
# This seems to require an external api call
|
# This seems to require an external api call
|
||||||
#begin
|
#begin
|
||||||
|
|
|
@ -19,7 +19,7 @@ Gem::Specification.new do |gem|
|
||||||
gem.add_development_dependency 'memory_profiler'
|
gem.add_development_dependency 'memory_profiler'
|
||||||
gem.add_development_dependency 'pry-byebug'
|
gem.add_development_dependency 'pry-byebug'
|
||||||
|
|
||||||
gem.add_runtime_dependency 'twitter', '~> 4.0'
|
gem.add_runtime_dependency 'twitter', '~> 5.0'
|
||||||
gem.add_runtime_dependency 'simple_oauth', '~> 0.2.0'
|
gem.add_runtime_dependency 'simple_oauth', '~> 0.2.0'
|
||||||
gem.add_runtime_dependency 'tweetstream'
|
gem.add_runtime_dependency 'tweetstream'
|
||||||
gem.add_runtime_dependency 'rufus-scheduler'
|
gem.add_runtime_dependency 'rufus-scheduler'
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue