Support consuming tweets.csv from official twitter archives

This commit is contained in:
Joel McCoy 2014-04-30 20:30:54 -04:00
parent 17ef359de2
commit 872dabdbf8
2 changed files with 7 additions and 1 deletions

View file

@ -4,6 +4,7 @@
require 'json'
require 'set'
require 'digest/md5'
require 'csv'
module Ebooks
class Model
@ -26,6 +27,11 @@ module Ebooks
lines = JSON.parse(content, symbolize_names: true).map do |tweet|
tweet[:text]
end
elsif path.split('.')[-1] == "csv"
log "Reading CSV corpus from #{path}"
lines = CSV.read(path).drop(1).map do |tweet|
tweet[5]
end
else
log "Reading plaintext corpus from #{path}"
lines = content.split("\n")

View file

@ -1,3 +1,3 @@
module Ebooks
VERSION = "2.2.3"
VERSION = "2.2.4"
end