From 872dabdbf8117b35dd05f287ba507be3476f3eb9 Mon Sep 17 00:00:00 2001 From: Joel McCoy Date: Wed, 30 Apr 2014 20:30:54 -0400 Subject: [PATCH] Support consuming tweets.csv from official twitter archives --- lib/twitter_ebooks/model.rb | 6 ++++++ lib/twitter_ebooks/version.rb | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/twitter_ebooks/model.rb b/lib/twitter_ebooks/model.rb index 9924584..96674d2 100644 --- a/lib/twitter_ebooks/model.rb +++ b/lib/twitter_ebooks/model.rb @@ -4,6 +4,7 @@ require 'json' require 'set' require 'digest/md5' +require 'csv' module Ebooks class Model @@ -26,6 +27,11 @@ module Ebooks lines = JSON.parse(content, symbolize_names: true).map do |tweet| tweet[:text] end + elsif path.split('.')[-1] == "csv" + log "Reading CSV corpus from #{path}" + lines = CSV.read(path).drop(1).map do |tweet| + tweet[5] + end else log "Reading plaintext corpus from #{path}" lines = content.split("\n") diff --git a/lib/twitter_ebooks/version.rb b/lib/twitter_ebooks/version.rb index c8da643..ab9a31b 100644 --- a/lib/twitter_ebooks/version.rb +++ b/lib/twitter_ebooks/version.rb @@ -1,3 +1,3 @@ module Ebooks - VERSION = "2.2.3" + VERSION = "2.2.4" end