Threading!
This commit is contained in:
parent
29beb23502
commit
b72a6db0e1
3 changed files with 31 additions and 62 deletions
|
@ -1,7 +1,7 @@
|
|||
$debug = false
|
||||
|
||||
def log(*args)
|
||||
STDERR.puts args.map(&:to_s).join(' ')
|
||||
STDERR.print args.map(&:to_s).join(' ') + "\n"
|
||||
STDERR.flush
|
||||
end
|
||||
|
||||
|
|
|
@ -1,51 +1,8 @@
|
|||
#!/usr/bin/env ruby
|
||||
# encoding: utf-8
|
||||
require 'twitter'
|
||||
require 'rufus/scheduler'
|
||||
require 'eventmachine'
|
||||
|
||||
module Ebooks
|
||||
# Wrap SSLSocket so that readpartial yields the fiber instead of
|
||||
# blocking when there is no data
|
||||
#
|
||||
# We hand this to the twitter library so we can select on the sockets
|
||||
# and thus run multiple streams without them blocking
|
||||
class FiberSSLSocket
|
||||
def initialize(*args)
|
||||
@socket = OpenSSL::SSL::SSLSocket.new(*args)
|
||||
end
|
||||
|
||||
def readpartial(maxlen)
|
||||
data = ""
|
||||
|
||||
loop do
|
||||
begin
|
||||
data = @socket.read_nonblock(maxlen)
|
||||
rescue IO::WaitReadable
|
||||
end
|
||||
break if data.length > 0
|
||||
Fiber.yield(@socket)
|
||||
end
|
||||
|
||||
data
|
||||
end
|
||||
|
||||
def method_missing(m, *args)
|
||||
@socket.send(m, *args)
|
||||
end
|
||||
end
|
||||
|
||||
# An EventMachine handler which resumes a fiber on incoming data
|
||||
class FiberSocketHandler < EventMachine::Connection
|
||||
def initialize(fiber)
|
||||
@fiber = fiber
|
||||
end
|
||||
|
||||
def notify_readable
|
||||
@fiber.resume
|
||||
end
|
||||
end
|
||||
|
||||
class ConfigurationError < Exception
|
||||
end
|
||||
|
||||
|
@ -106,7 +63,7 @@ module Ebooks
|
|||
attr_accessor :consumer_key, :consumer_secret,
|
||||
:access_token, :access_token_secret
|
||||
|
||||
attr_reader :twitter, :stream
|
||||
attr_reader :twitter, :stream, :thread
|
||||
|
||||
# Configuration
|
||||
attr_accessor :username, :delay_range, :blacklist
|
||||
|
@ -119,7 +76,7 @@ module Ebooks
|
|||
end
|
||||
|
||||
def log(*args)
|
||||
STDOUT.puts "@#{@username}: " + args.map(&:to_s).join(' ')
|
||||
STDOUT.print "@#{@username}: " + args.map(&:to_s).join(' ') + "\n"
|
||||
STDOUT.flush
|
||||
end
|
||||
|
||||
|
@ -154,9 +111,7 @@ module Ebooks
|
|||
config.access_token_secret = @access_token_secret
|
||||
end
|
||||
|
||||
@stream = Twitter::Streaming::Client.new(
|
||||
ssl_socket_class: FiberSSLSocket
|
||||
) do |config|
|
||||
@stream = Twitter::Streaming::Client.new do |config|
|
||||
config.consumer_key = @consumer_key
|
||||
config.consumer_secret = @consumer_secret
|
||||
config.access_token = @access_token
|
||||
|
@ -239,14 +194,13 @@ module Ebooks
|
|||
end
|
||||
|
||||
def start_stream
|
||||
log "starting stream for #@username"
|
||||
log "starting tweet stream"
|
||||
@stream.user do |ev|
|
||||
receive_event ev
|
||||
end
|
||||
end
|
||||
|
||||
# Connects to tweetstream and opens event handlers for this bot
|
||||
def start
|
||||
def prepare
|
||||
# Sanity check
|
||||
if @username.nil?
|
||||
raise ConfigurationError, "bot.username cannot be nil"
|
||||
|
@ -254,15 +208,11 @@ module Ebooks
|
|||
|
||||
make_client
|
||||
fire(:startup)
|
||||
|
||||
fiber = Fiber.new do
|
||||
start_stream
|
||||
end
|
||||
|
||||
socket = fiber.resume
|
||||
|
||||
conn = EM.watch socket.io, FiberSocketHandler, fiber
|
||||
conn.notify_readable = true
|
||||
# Connects to tweetstream and opens event handlers for this bot
|
||||
def start
|
||||
start_stream
|
||||
end
|
||||
|
||||
# Fire an event
|
||||
|
|
|
@ -4,14 +4,33 @@
|
|||
require 'json'
|
||||
require 'set'
|
||||
require 'digest/md5'
|
||||
require 'fileutils'
|
||||
require 'csv'
|
||||
|
||||
module Ebooks
|
||||
class Model
|
||||
attr_accessor :hash, :tokens, :sentences, :mentions, :keywords
|
||||
|
||||
def self.consume(txtpath)
|
||||
Model.new.consume(txtpath)
|
||||
# Consume a corpus file to create a model
|
||||
# @param corpus_path Path to a json, text or csv file to consume
|
||||
# @param cache Optional path to a directory to store cached models
|
||||
def self.consume(corpus_path, cache: nil)
|
||||
if cache
|
||||
FileUtils::mkdir_p cache
|
||||
|
||||
cache_path = File.join(cache, Digest::MD5.file(corpus_path).to_s)
|
||||
if File.exists?(cache_path)
|
||||
log "Reading model from cache at #{cache_path}"
|
||||
return Model.load(cache_path)
|
||||
end
|
||||
end
|
||||
|
||||
model = Model.new.consume(corpus_path)
|
||||
|
||||
if cache
|
||||
log "Caching model at #{cache_path}"
|
||||
model.save(cache_path)
|
||||
end
|
||||
end
|
||||
|
||||
def self.consume_all(paths)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue