Merge remote-tracking branch 'mispy/master'
This commit is contained in:
commit
7f3d372a61
17 changed files with 292 additions and 68 deletions
2
.gitattributes
vendored
Normal file
2
.gitattributes
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
# Require \n style line endings
|
||||||
|
* text eol=lf
|
|
@ -1,5 +1,5 @@
|
||||||
rvm:
|
rvm:
|
||||||
- 2.1.4
|
- 2.1.7
|
||||||
script:
|
script:
|
||||||
- rspec spec
|
- rspec spec
|
||||||
notifications:
|
notifications:
|
||||||
|
|
16
README.md
16
README.md
|
@ -21,7 +21,7 @@ Note that 3.0 is not backwards compatible with 2.x, so upgrade carefully! In par
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
Requires Ruby 2.0+
|
Requires Ruby 2.1+. Ruby 2.3+ is recommended.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
gem install twitter_ebooks
|
gem install twitter_ebooks
|
||||||
|
@ -78,6 +78,16 @@ class MyBot < Ebooks::Bot
|
||||||
# Reply to a tweet in the bot's timeline
|
# Reply to a tweet in the bot's timeline
|
||||||
# reply(tweet, meta(tweet).reply_prefix + "nice tweet")
|
# reply(tweet, meta(tweet).reply_prefix + "nice tweet")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def on_favorite(user, tweet)
|
||||||
|
# Follow user who just favorited bot's tweet
|
||||||
|
# follow(user.screen_name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def on_retweet(tweet)
|
||||||
|
# Follow user who just retweeted bot's tweet
|
||||||
|
# follow(tweet.user.screen_name)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Make a MyBot and attach it to an account
|
# Make a MyBot and attach it to an account
|
||||||
|
@ -135,10 +145,10 @@ The secondary function is the "interesting keywords" list. For example, I use th
|
||||||
|
|
||||||
``` ruby
|
``` ruby
|
||||||
top100 = model.keywords.take(100)
|
top100 = model.keywords.take(100)
|
||||||
tokens = Ebooks::NLP.tokenize(tweet[:text])
|
tokens = Ebooks::NLP.tokenize(tweet.text)
|
||||||
|
|
||||||
if tokens.find { |t| top100.include?(t) }
|
if tokens.find { |t| top100.include?(t) }
|
||||||
bot.favorite(tweet[:id])
|
favorite(tweet)
|
||||||
end
|
end
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
69
bin/ebooks
69
bin/ebooks
|
@ -25,9 +25,12 @@ Usage:
|
||||||
ebooks auth
|
ebooks auth
|
||||||
ebooks consume <corpus_path> [corpus_path2] [...]
|
ebooks consume <corpus_path> [corpus_path2] [...]
|
||||||
ebooks consume-all <model_name> <corpus_path> [corpus_path2] [...]
|
ebooks consume-all <model_name> <corpus_path> [corpus_path2] [...]
|
||||||
|
ebooks append <model_name> <corpus_path>
|
||||||
ebooks gen <model_path> [input]
|
ebooks gen <model_path> [input]
|
||||||
ebooks archive <username> [path]
|
ebooks archive <username> [path]
|
||||||
|
ebooks sync <botname> [username]
|
||||||
ebooks tweet <model_path> <botname>
|
ebooks tweet <model_path> <botname>
|
||||||
|
ebooks version
|
||||||
STR
|
STR
|
||||||
|
|
||||||
def self.help(command=nil)
|
def self.help(command=nil)
|
||||||
|
@ -91,7 +94,9 @@ STR
|
||||||
filename = File.basename(path)
|
filename = File.basename(path)
|
||||||
shortname = filename.split('.')[0..-2].join('.')
|
shortname = filename.split('.')[0..-2].join('.')
|
||||||
|
|
||||||
|
FileUtils.mkdir_p(File.join(APP_PATH, 'model'))
|
||||||
outpath = File.join(APP_PATH, 'model', "#{shortname}.model")
|
outpath = File.join(APP_PATH, 'model', "#{shortname}.model")
|
||||||
|
|
||||||
Ebooks::Model.consume(path).save(outpath)
|
Ebooks::Model.consume(path).save(outpath)
|
||||||
log "Corpus consumed to #{outpath}"
|
log "Corpus consumed to #{outpath}"
|
||||||
end
|
end
|
||||||
|
@ -115,6 +120,24 @@ STR
|
||||||
log "Corpuses consumed to #{outpath}"
|
log "Corpuses consumed to #{outpath}"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
HELP.append = <<-STR
|
||||||
|
Usage: ebooks append <model_name> <corpus_path>
|
||||||
|
|
||||||
|
Process then append the provided corpus to the model
|
||||||
|
instead of overwriting.
|
||||||
|
STR
|
||||||
|
|
||||||
|
def self.append(name, path)
|
||||||
|
if !name || !path
|
||||||
|
help :append
|
||||||
|
exit 1
|
||||||
|
end
|
||||||
|
|
||||||
|
Ebooks::Model.consume(path).append(File.join(APP_PATH,'model',"#{name}.model"))
|
||||||
|
log "Corpus appended to #{name}.model"
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
HELP.jsonify = <<-STR
|
HELP.jsonify = <<-STR
|
||||||
Usage: ebooks jsonify <tweets.csv> [tweets.csv2] [...]
|
Usage: ebooks jsonify <tweets.csv> [tweets.csv2] [...]
|
||||||
|
|
||||||
|
@ -189,6 +212,11 @@ STR
|
||||||
Output defaults to corpus/<username>.json
|
Output defaults to corpus/<username>.json
|
||||||
Due to API limitations, this can only receive up to ~3000 tweets
|
Due to API limitations, this can only receive up to ~3000 tweets
|
||||||
into the past.
|
into the past.
|
||||||
|
|
||||||
|
The first time you run archive, you will need to enter the auth
|
||||||
|
details of some account to use for accessing the API. This info
|
||||||
|
will then be stored in ~/.ebooksrc for later use, and can be
|
||||||
|
modified there if needed.
|
||||||
STR
|
STR
|
||||||
|
|
||||||
def self.archive(username, outpath=nil)
|
def self.archive(username, outpath=nil)
|
||||||
|
@ -200,6 +228,25 @@ STR
|
||||||
Ebooks::Archive.new(username, outpath).sync
|
Ebooks::Archive.new(username, outpath).sync
|
||||||
end
|
end
|
||||||
|
|
||||||
|
HELP.sync = <<-STR
|
||||||
|
Usage: ebooks sync <botname> <username>
|
||||||
|
|
||||||
|
Copies and flips <username>'s avatar and cover photo, uploading them to <botname>'s profile.
|
||||||
|
|
||||||
|
Stores saved avatar's and covers in image/.
|
||||||
|
|
||||||
|
STR
|
||||||
|
|
||||||
|
def self.sync(botname, username)
|
||||||
|
if botname.nil?
|
||||||
|
help :sync
|
||||||
|
exit 1
|
||||||
|
end
|
||||||
|
|
||||||
|
load File.join(APP_PATH, 'bots.rb')
|
||||||
|
Ebooks::Sync::run(botname, username)
|
||||||
|
end
|
||||||
|
|
||||||
HELP.tweet = <<-STR
|
HELP.tweet = <<-STR
|
||||||
Usage: ebooks tweet <model_path> <botname>
|
Usage: ebooks tweet <model_path> <botname>
|
||||||
|
|
||||||
|
@ -217,6 +264,10 @@ STR
|
||||||
model = Ebooks::Model.load(modelpath)
|
model = Ebooks::Model.load(modelpath)
|
||||||
statement = model.make_statement
|
statement = model.make_statement
|
||||||
bot = Ebooks::Bot.get(botname)
|
bot = Ebooks::Bot.get(botname)
|
||||||
|
if bot.nil?
|
||||||
|
log "No such bot configured in bots.rb: #{botname}"
|
||||||
|
exit 1
|
||||||
|
end
|
||||||
bot.configure
|
bot.configure
|
||||||
bot.tweet(statement)
|
bot.tweet(statement)
|
||||||
end
|
end
|
||||||
|
@ -259,8 +310,8 @@ STR
|
||||||
access_token = request_token.get_access_token(oauth_verifier: pin)
|
access_token = request_token.get_access_token(oauth_verifier: pin)
|
||||||
|
|
||||||
log "Account authorized successfully. Make sure to put these in your bots.rb!\n" +
|
log "Account authorized successfully. Make sure to put these in your bots.rb!\n" +
|
||||||
" access token: #{access_token.token}\n" +
|
" bot.access_token = \"#{access_token.token}\"\n" +
|
||||||
" access token secret: #{access_token.secret}"
|
" bot.access_token_secret = \"#{access_token.secret}\""
|
||||||
end
|
end
|
||||||
|
|
||||||
HELP.console = <<-STR
|
HELP.console = <<-STR
|
||||||
|
@ -275,6 +326,17 @@ STR
|
||||||
require 'pry'; Ebooks.module_exec { pry }
|
require 'pry'; Ebooks.module_exec { pry }
|
||||||
end
|
end
|
||||||
|
|
||||||
|
HELP.version = <<-STR
|
||||||
|
Usage: ebooks version
|
||||||
|
|
||||||
|
Shows you twitter_ebooks' version number.
|
||||||
|
STR
|
||||||
|
|
||||||
|
def self.version
|
||||||
|
require File.expand_path('../../lib/twitter_ebooks/version', __FILE__)
|
||||||
|
log Ebooks::VERSION
|
||||||
|
end
|
||||||
|
|
||||||
HELP.start = <<-STR
|
HELP.start = <<-STR
|
||||||
Usage: ebooks s[tart] [botname]
|
Usage: ebooks s[tart] [botname]
|
||||||
|
|
||||||
|
@ -368,8 +430,10 @@ STR
|
||||||
when "new" then new(args[1])
|
when "new" then new(args[1])
|
||||||
when "consume" then consume(args[1..-1])
|
when "consume" then consume(args[1..-1])
|
||||||
when "consume-all" then consume_all(args[1], args[2..-1])
|
when "consume-all" then consume_all(args[1], args[2..-1])
|
||||||
|
when "append" then append(args[1],args[2])
|
||||||
when "gen" then gen(args[1], args[2..-1].join(' '))
|
when "gen" then gen(args[1], args[2..-1].join(' '))
|
||||||
when "archive" then archive(args[1], args[2])
|
when "archive" then archive(args[1], args[2])
|
||||||
|
when "sync" then sync(args[1], args[2])
|
||||||
when "tweet" then tweet(args[1], args[2])
|
when "tweet" then tweet(args[1], args[2])
|
||||||
when "jsonify" then jsonify(args[1..-1])
|
when "jsonify" then jsonify(args[1..-1])
|
||||||
when "auth" then auth
|
when "auth" then auth
|
||||||
|
@ -378,6 +442,7 @@ STR
|
||||||
when "start" then start(args[1])
|
when "start" then start(args[1])
|
||||||
when "s" then start(args[1])
|
when "s" then start(args[1])
|
||||||
when "help" then help(args[1])
|
when "help" then help(args[1])
|
||||||
|
when "version" then version
|
||||||
else
|
else
|
||||||
log "No such command '#{args[0]}'"
|
log "No such command '#{args[0]}'"
|
||||||
help
|
help
|
||||||
|
|
|
@ -16,6 +16,7 @@ end
|
||||||
|
|
||||||
require 'twitter_ebooks/nlp'
|
require 'twitter_ebooks/nlp'
|
||||||
require 'twitter_ebooks/archive'
|
require 'twitter_ebooks/archive'
|
||||||
|
require 'twitter_ebooks/sync'
|
||||||
require 'twitter_ebooks/suffix'
|
require 'twitter_ebooks/suffix'
|
||||||
require 'twitter_ebooks/model'
|
require 'twitter_ebooks/model'
|
||||||
require 'twitter_ebooks/bot'
|
require 'twitter_ebooks/bot'
|
||||||
|
|
|
@ -49,8 +49,9 @@ module Ebooks
|
||||||
|
|
||||||
@client = client || make_client
|
@client = client || make_client
|
||||||
|
|
||||||
if File.exists?(@path)
|
if (File.exists?(@path) && !File.zero?(@path))
|
||||||
@tweets = JSON.parse(File.read(@path, :encoding => 'utf-8'), symbolize_names: true)
|
@filetext = File.read(@path, :encoding => 'utf-8')
|
||||||
|
@tweets = JSON.parse(@filetext, symbolize_names: true)
|
||||||
log "Currently #{@tweets.length} tweets for #{@username}"
|
log "Currently #{@tweets.length} tweets for #{@username}"
|
||||||
else
|
else
|
||||||
@tweets.nil?
|
@tweets.nil?
|
||||||
|
@ -59,6 +60,21 @@ module Ebooks
|
||||||
end
|
end
|
||||||
|
|
||||||
def sync
|
def sync
|
||||||
|
# We use this structure to ensure that
|
||||||
|
# a) if there's an issue opening the file, we error out before download
|
||||||
|
# b) if there's an issue during download we restore the original
|
||||||
|
File.open(@path, 'w') do |file|
|
||||||
|
begin
|
||||||
|
sync_to(file)
|
||||||
|
rescue Exception
|
||||||
|
file.seek(0)
|
||||||
|
file.write(@filetext)
|
||||||
|
raise
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def sync_to(file)
|
||||||
retries = 0
|
retries = 0
|
||||||
tweets = []
|
tweets = []
|
||||||
max_id = nil
|
max_id = nil
|
||||||
|
@ -93,10 +109,8 @@ module Ebooks
|
||||||
@tweets = tweets.map(&:attrs).each { |tw|
|
@tweets = tweets.map(&:attrs).each { |tw|
|
||||||
tw.delete(:entities)
|
tw.delete(:entities)
|
||||||
} + @tweets
|
} + @tweets
|
||||||
File.open(@path, 'w') do |f|
|
|
||||||
f.write(JSON.pretty_generate(@tweets))
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
file.write(JSON.pretty_generate(@tweets))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -2,6 +2,14 @@
|
||||||
require 'twitter'
|
require 'twitter'
|
||||||
require 'rufus/scheduler'
|
require 'rufus/scheduler'
|
||||||
|
|
||||||
|
# Monkeypatch hack to fix upstream dependency issue
|
||||||
|
# https://github.com/sferik/twitter/issues/709
|
||||||
|
class HTTP::URI
|
||||||
|
def port
|
||||||
|
443 if self.https?
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
module Ebooks
|
module Ebooks
|
||||||
class ConfigurationError < Exception
|
class ConfigurationError < Exception
|
||||||
end
|
end
|
||||||
|
@ -29,12 +37,10 @@ module Ebooks
|
||||||
usertweets = @tweets.select { |t| t.user.screen_name.downcase == username.downcase }
|
usertweets = @tweets.select { |t| t.user.screen_name.downcase == username.downcase }
|
||||||
|
|
||||||
if usertweets.length > 2
|
if usertweets.length > 2
|
||||||
if (usertweets[-1].created_at - usertweets[-3].created_at) < 10
|
if username.include?('ebooks') || (usertweets[-1].created_at - usertweets[-3].created_at) < 12
|
||||||
return true
|
return true
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
username.include?("ebooks")
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# Figure out whether to keep this user in the reply prefix
|
# Figure out whether to keep this user in the reply prefix
|
||||||
|
@ -162,7 +168,7 @@ module Ebooks
|
||||||
# @param username [String]
|
# @param username [String]
|
||||||
# @return [Ebooks::Bot]
|
# @return [Ebooks::Bot]
|
||||||
def self.get(username)
|
def self.get(username)
|
||||||
all.find { |bot| bot.username == username }
|
all.find { |bot| bot.username.downcase == username.downcase }
|
||||||
end
|
end
|
||||||
|
|
||||||
# Logs info to stdout in the context of this bot
|
# Logs info to stdout in the context of this bot
|
||||||
|
@ -262,6 +268,12 @@ module Ebooks
|
||||||
return unless ev.text # If it's not a text-containing tweet, ignore it
|
return unless ev.text # If it's not a text-containing tweet, ignore it
|
||||||
return if ev.user.id == @user.id # Ignore our own tweets
|
return if ev.user.id == @user.id # Ignore our own tweets
|
||||||
|
|
||||||
|
if ev.retweet? && ev.retweeted_tweet.user.id == @user.id
|
||||||
|
# Someone retweeted our tweet!
|
||||||
|
fire(:retweet, ev)
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
meta = meta(ev)
|
meta = meta(ev)
|
||||||
|
|
||||||
if blacklisted?(ev.user.screen_name)
|
if blacklisted?(ev.user.screen_name)
|
||||||
|
@ -363,7 +375,7 @@ module Ebooks
|
||||||
# Delay an action for a variable period of time
|
# Delay an action for a variable period of time
|
||||||
# @param range [Range, Integer] range of seconds to choose for delay
|
# @param range [Range, Integer] range of seconds to choose for delay
|
||||||
def delay(range=@delay_range, &b)
|
def delay(range=@delay_range, &b)
|
||||||
time = range.to_a.sample unless range.is_a? Integer
|
time = rand(range) unless range.is_a? Integer
|
||||||
sleep time
|
sleep time
|
||||||
b.call
|
b.call
|
||||||
end
|
end
|
||||||
|
|
|
@ -69,6 +69,35 @@ module Ebooks
|
||||||
self
|
self
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Append a generated model to existing model file instead of overwriting it
|
||||||
|
# @param path [String]
|
||||||
|
def append(path)
|
||||||
|
existing = File.file?(path)
|
||||||
|
if !existing
|
||||||
|
log "No existing model found at #{path}"
|
||||||
|
return
|
||||||
|
else
|
||||||
|
#read-in and deserialize existing model
|
||||||
|
props = Marshal.load(File.open(path,'rb') { |old| old.read })
|
||||||
|
old_tokens = props[:tokens]
|
||||||
|
old_sentences = props[:sentences]
|
||||||
|
old_mentions = props[:mentions]
|
||||||
|
old_keywords = props[:keywords]
|
||||||
|
|
||||||
|
#append existing properties to new ones and overwrite with new model
|
||||||
|
File.open(path, 'wb') do |f|
|
||||||
|
f.write(Marshal.dump({
|
||||||
|
tokens: @tokens.concat(old_tokens),
|
||||||
|
sentences: @sentences.concat(old_sentences),
|
||||||
|
mentions: @mentions.concat(old_mentions),
|
||||||
|
keywords: @keywords.concat(old_keywords)
|
||||||
|
}))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
self
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
def initialize
|
def initialize
|
||||||
@tokens = []
|
@tokens = []
|
||||||
|
|
||||||
|
@ -80,7 +109,13 @@ module Ebooks
|
||||||
# @param token [String]
|
# @param token [String]
|
||||||
# @return [Integer]
|
# @return [Integer]
|
||||||
def tikify(token)
|
def tikify(token)
|
||||||
@tikis[token] or (@tokens << token and @tikis[token] = @tokens.length-1)
|
if @tikis.has_key?(token) then
|
||||||
|
return @tikis[token]
|
||||||
|
else
|
||||||
|
(@tokens.length+1)%1000 == 0 and puts "#{@tokens.length+1} tokens"
|
||||||
|
@tokens << token
|
||||||
|
return @tikis[token] = @tokens.length-1
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Convert a body of text into arrays of tikis
|
# Convert a body of text into arrays of tikis
|
||||||
|
@ -143,18 +178,19 @@ module Ebooks
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
text = statements.join("\n")
|
text = statements.join("\n").encode('UTF-8', :invalid => :replace)
|
||||||
mention_text = mentions.join("\n")
|
mention_text = mentions.join("\n").encode('UTF-8', :invalid => :replace)
|
||||||
|
|
||||||
lines = nil; statements = nil; mentions = nil # Allow garbage collection
|
lines = nil; statements = nil; mentions = nil # Allow garbage collection
|
||||||
|
|
||||||
log "Tokenizing #{text.count('\n')} statements and #{mention_text.count('\n')} mentions"
|
log "Tokenizing #{text.count("\n")} statements and #{mention_text.count("\n")} mentions"
|
||||||
|
|
||||||
@sentences = mass_tikify(text)
|
@sentences = mass_tikify(text)
|
||||||
@mentions = mass_tikify(mention_text)
|
@mentions = mass_tikify(mention_text)
|
||||||
|
|
||||||
log "Ranking keywords"
|
log "Ranking keywords"
|
||||||
@keywords = NLP.keywords(text).top(200).map(&:to_s)
|
@keywords = NLP.keywords(text).top(200).map(&:to_s)
|
||||||
|
log "Top keywords: #{@keywords[0]} #{@keywords[1]} #{@keywords[2]}"
|
||||||
|
|
||||||
self
|
self
|
||||||
end
|
end
|
||||||
|
@ -218,14 +254,15 @@ module Ebooks
|
||||||
tweet = ""
|
tweet = ""
|
||||||
|
|
||||||
while (tikis = generator.generate(3, :bigrams)) do
|
while (tikis = generator.generate(3, :bigrams)) do
|
||||||
next if tikis.length <= 3 && !responding
|
#log "Attempting to produce tweet try #{retries+1}/#{retry_limit}"
|
||||||
break if valid_tweet?(tikis, limit)
|
break if (tikis.length > 3 || responding) && valid_tweet?(tikis, limit)
|
||||||
|
|
||||||
retries += 1
|
retries += 1
|
||||||
break if retries >= retry_limit
|
break if retries >= retry_limit
|
||||||
end
|
end
|
||||||
|
|
||||||
if verbatim?(tikis) && tikis.length > 3 # We made a verbatim tweet by accident
|
if verbatim?(tikis) && tikis.length > 3 # We made a verbatim tweet by accident
|
||||||
|
#log "Attempting to produce unigram tweet try #{retries+1}/#{retry_limit}"
|
||||||
while (tikis = generator.generate(3, :unigrams)) do
|
while (tikis = generator.generate(3, :unigrams)) do
|
||||||
break if valid_tweet?(tikis, limit) && !verbatim?(tikis)
|
break if valid_tweet?(tikis, limit) && !verbatim?(tikis)
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
require 'fast-stemmer'
|
require 'fast-stemmer'
|
||||||
require 'highscore'
|
require 'highscore'
|
||||||
|
require 'htmlentities'
|
||||||
|
|
||||||
module Ebooks
|
module Ebooks
|
||||||
module NLP
|
module NLP
|
||||||
|
@ -13,10 +14,10 @@ module Ebooks
|
||||||
# to be using it all of the time
|
# to be using it all of the time
|
||||||
|
|
||||||
# Lazily loads an array of stopwords
|
# Lazily loads an array of stopwords
|
||||||
# Stopwords are common English words that should often be ignored
|
# Stopwords are common words that should often be ignored
|
||||||
# @return [Array<String>]
|
# @return [Array<String>]
|
||||||
def self.stopwords
|
def self.stopwords
|
||||||
@stopwords ||= File.read(File.join(DATA_PATH, 'stopwords.txt')).split
|
@stopwords ||= File.exists?('stopwords.txt') ? File.read('stopwords.txt').split : []
|
||||||
end
|
end
|
||||||
|
|
||||||
# Lazily loads an array of known English nouns
|
# Lazily loads an array of known English nouns
|
||||||
|
@ -42,7 +43,6 @@ module Ebooks
|
||||||
# Lazily load HTML entity decoder
|
# Lazily load HTML entity decoder
|
||||||
# @return [HTMLEntities]
|
# @return [HTMLEntities]
|
||||||
def self.htmlentities
|
def self.htmlentities
|
||||||
require 'htmlentities'
|
|
||||||
@htmlentities ||= HTMLEntities.new
|
@htmlentities ||= HTMLEntities.new
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -99,7 +99,7 @@ module Ebooks
|
||||||
#set :vowels, 1 # => default: 0 = not considered
|
#set :vowels, 1 # => default: 0 = not considered
|
||||||
#set :consonants, 5 # => default: 0 = not considered
|
#set :consonants, 5 # => default: 0 = not considered
|
||||||
#set :ignore_case, true # => default: false
|
#set :ignore_case, true # => default: false
|
||||||
set :word_pattern, /(?<!@)(?<=\s)[\w']+/ # => default: /\w+/
|
set :word_pattern, /(?<!@)(?<=\s)[\p{Word}']+/ # => default: /\w+/
|
||||||
#set :stemming, true # => default: false
|
#set :stemming, true # => default: false
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,15 @@
|
||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
|
|
||||||
module Ebooks
|
module Ebooks
|
||||||
# This generator uses data identical to a markov model, but
|
# This generator uses data similar to a Markov model, but
|
||||||
# instead of making a chain by looking up bigrams it uses the
|
# instead of making a chain by looking up bigrams it uses the
|
||||||
# positions to randomly replace suffixes in one sentence with
|
# positions to randomly replace token array suffixes in one sentence
|
||||||
# matching suffixes in another
|
# with matching suffixes in another
|
||||||
class SuffixGenerator
|
class SuffixGenerator
|
||||||
# Build a generator from a corpus of tikified sentences
|
# Build a generator from a corpus of tikified sentences
|
||||||
|
# "tikis" are token indexes-- a way of representing words
|
||||||
|
# and punctuation as their integer position in a big array
|
||||||
|
# of such tokens
|
||||||
# @param sentences [Array<Array<Integer>>]
|
# @param sentences [Array<Array<Integer>>]
|
||||||
# @return [SuffixGenerator]
|
# @return [SuffixGenerator]
|
||||||
def self.build(sentences)
|
def self.build(sentences)
|
||||||
|
@ -14,11 +17,14 @@ module Ebooks
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize(sentences)
|
def initialize(sentences)
|
||||||
@sentences = sentences.reject { |s| s.length < 2 }
|
@sentences = sentences.reject { |s| s.empty? }
|
||||||
@unigrams = {}
|
@unigrams = {}
|
||||||
@bigrams = {}
|
@bigrams = {}
|
||||||
|
|
||||||
@sentences.each_with_index do |tikis, i|
|
@sentences.each_with_index do |tikis, i|
|
||||||
|
if (i % 10000 == 0) then
|
||||||
|
log ("Building: sentence #{i} of #{sentences.length}")
|
||||||
|
end
|
||||||
last_tiki = INTERIM
|
last_tiki = INTERIM
|
||||||
tikis.each_with_index do |tiki, j|
|
tikis.each_with_index do |tiki, j|
|
||||||
@unigrams[last_tiki] ||= []
|
@unigrams[last_tiki] ||= []
|
||||||
|
@ -42,7 +48,6 @@ module Ebooks
|
||||||
self
|
self
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
# Generate a recombined sequence of tikis
|
# Generate a recombined sequence of tikis
|
||||||
# @param passes [Integer] number of times to recombine
|
# @param passes [Integer] number of times to recombine
|
||||||
# @param n [Symbol] :unigrams or :bigrams (affects how conservative the model is)
|
# @param n [Symbol] :unigrams or :bigrams (affects how conservative the model is)
|
||||||
|
@ -86,7 +91,11 @@ module Ebooks
|
||||||
break if variant
|
break if variant
|
||||||
end
|
end
|
||||||
|
|
||||||
tikis = variant if variant
|
# If we failed to produce a variation from any alternative, there
|
||||||
|
# is no use running additional passes-- they'll have the same result.
|
||||||
|
break if variant.nil?
|
||||||
|
|
||||||
|
tikis = variant
|
||||||
end
|
end
|
||||||
|
|
||||||
tikis
|
tikis
|
||||||
|
|
52
lib/twitter_ebooks/sync.rb
Normal file
52
lib/twitter_ebooks/sync.rb
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
#!/usr/bin/env ruby
|
||||||
|
# encoding: utf-8
|
||||||
|
|
||||||
|
require 'twitter'
|
||||||
|
require 'json'
|
||||||
|
require 'mini_magick'
|
||||||
|
require 'open-uri'
|
||||||
|
require 'pry'
|
||||||
|
|
||||||
|
module Ebooks
|
||||||
|
class Sync
|
||||||
|
|
||||||
|
def self.run(botname, username)
|
||||||
|
bot = Ebooks::Bot.get(botname)
|
||||||
|
bot.configure
|
||||||
|
source_user = username
|
||||||
|
ebooks_user = bot.username
|
||||||
|
user = bot.twitter.user(source_user)
|
||||||
|
if user.profile_image_url then
|
||||||
|
Ebooks::Sync::get(user.profile_image_url(:original), "image/#{source_user}_avatar")
|
||||||
|
avatar = MiniMagick::Image.open("image/#{source_user}_avatar")
|
||||||
|
avatar.flip
|
||||||
|
avatar.write("image/#{ebooks_user}_avatar")
|
||||||
|
avatar64 = Base64.encode64(File.read("image/#{ebooks_user}_avatar"))
|
||||||
|
bot.twitter.update_profile_image(avatar64)
|
||||||
|
p "Updated profile image for #{ebooks_user} from #{source_user}."
|
||||||
|
else
|
||||||
|
p "#{source_user} does not have a profile image to clone."
|
||||||
|
end
|
||||||
|
if user.profile_banner_url then
|
||||||
|
Ebooks::Sync::get(user.profile_banner_url, "image/#{source_user}banner")
|
||||||
|
banner = MiniMagick::Image.open("image/#{source_user}banner")
|
||||||
|
banner.flip
|
||||||
|
banner.write("image/#{ebooks_user}_banner")
|
||||||
|
banner64 = Base64.encode64(File.read("image/#{ebooks_user}_banner"))
|
||||||
|
bot.twitter.update_profile_banner(banner64)
|
||||||
|
p "Updated cover image for #{ebooks_user} from #{source_user}."
|
||||||
|
else
|
||||||
|
p "#{source_user} does not have a cover image to clone."
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.get(url, destination)
|
||||||
|
File.open(destination, "wb") do |saved_file|
|
||||||
|
open(url, "rb") do |read_file|
|
||||||
|
saved_file.write(read_file.read)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
|
@ -1,3 +1,3 @@
|
||||||
module Ebooks
|
module Ebooks
|
||||||
VERSION = "3.1.0"
|
VERSION = "3.1.6"
|
||||||
end
|
end
|
||||||
|
|
|
@ -51,6 +51,11 @@ class MyBot < Ebooks::Bot
|
||||||
# Follow user who just favorited bot's tweet
|
# Follow user who just favorited bot's tweet
|
||||||
# follow(user.screen_name)
|
# follow(user.screen_name)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def on_retweet(tweet)
|
||||||
|
# Follow user who just retweeted bot's tweet
|
||||||
|
# follow(tweet.user.screen_name)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Make a MyBot and attach it to an account
|
# Make a MyBot and attach it to an account
|
||||||
|
|
0
skeleton/image/.gitignore
vendored
Normal file
0
skeleton/image/.gitignore
vendored
Normal file
|
@ -36,7 +36,7 @@ describe Ebooks::Model do
|
||||||
report2 = MemoryUsage.report do
|
report2 = MemoryUsage.report do
|
||||||
model = Ebooks::Model.load(file.path)
|
model = Ebooks::Model.load(file.path)
|
||||||
end
|
end
|
||||||
expect(report2.total_memsize).to be < 3000000
|
expect(report2.total_memsize).to be < 4000000
|
||||||
|
|
||||||
expect(model.tokens[0]).to be_a String
|
expect(model.tokens[0]).to be_a String
|
||||||
expect(model.sentences[0][0]).to be_a Fixnum
|
expect(model.sentences[0][0]).to be_a Fixnum
|
||||||
|
@ -70,5 +70,19 @@ describe Ebooks::Model do
|
||||||
|
|
||||||
file.unlink
|
file.unlink
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'handles strange unicode edge-cases' do
|
||||||
|
file = Tempfile.new('unicode')
|
||||||
|
file.write("💞\n💞")
|
||||||
|
file.close
|
||||||
|
|
||||||
|
model = Ebooks::Model.consume(file.path)
|
||||||
|
expect(model.mentions.count).to eq 0
|
||||||
|
expect(model.sentences.count).to eq 2
|
||||||
|
|
||||||
|
file.unlink
|
||||||
|
|
||||||
|
p model.make_statement
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -2,6 +2,8 @@
|
||||||
require File.expand_path('../lib/twitter_ebooks/version', __FILE__)
|
require File.expand_path('../lib/twitter_ebooks/version', __FILE__)
|
||||||
|
|
||||||
Gem::Specification.new do |gem|
|
Gem::Specification.new do |gem|
|
||||||
|
gem.required_ruby_version = '~> 2.1'
|
||||||
|
|
||||||
gem.authors = ["Jaiden Mispy"]
|
gem.authors = ["Jaiden Mispy"]
|
||||||
gem.email = ["^_^@mispy.me"]
|
gem.email = ["^_^@mispy.me"]
|
||||||
gem.description = %q{Markov chains for all your friends~}
|
gem.description = %q{Markov chains for all your friends~}
|
||||||
|
@ -22,7 +24,7 @@ Gem::Specification.new do |gem|
|
||||||
gem.add_development_dependency 'pry-byebug'
|
gem.add_development_dependency 'pry-byebug'
|
||||||
gem.add_development_dependency 'yard'
|
gem.add_development_dependency 'yard'
|
||||||
|
|
||||||
gem.add_runtime_dependency 'twitter', '~> 5.0'
|
gem.add_runtime_dependency 'twitter', '~> 5.15'
|
||||||
gem.add_runtime_dependency 'rufus-scheduler'
|
gem.add_runtime_dependency 'rufus-scheduler'
|
||||||
gem.add_runtime_dependency 'gingerice'
|
gem.add_runtime_dependency 'gingerice'
|
||||||
gem.add_runtime_dependency 'htmlentities'
|
gem.add_runtime_dependency 'htmlentities'
|
||||||
|
@ -31,4 +33,5 @@ Gem::Specification.new do |gem|
|
||||||
gem.add_runtime_dependency 'highscore'
|
gem.add_runtime_dependency 'highscore'
|
||||||
gem.add_runtime_dependency 'pry'
|
gem.add_runtime_dependency 'pry'
|
||||||
gem.add_runtime_dependency 'oauth'
|
gem.add_runtime_dependency 'oauth'
|
||||||
|
gem.add_runtime_dependency 'mini_magick'
|
||||||
end
|
end
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue