Support consuming tweets.csv from official twitter archives
This commit is contained in:
parent
17ef359de2
commit
872dabdbf8
2 changed files with 7 additions and 1 deletions
|
@ -4,6 +4,7 @@
|
|||
require 'json'
|
||||
require 'set'
|
||||
require 'digest/md5'
|
||||
require 'csv'
|
||||
|
||||
module Ebooks
|
||||
class Model
|
||||
|
@ -26,6 +27,11 @@ module Ebooks
|
|||
lines = JSON.parse(content, symbolize_names: true).map do |tweet|
|
||||
tweet[:text]
|
||||
end
|
||||
elsif path.split('.')[-1] == "csv"
|
||||
log "Reading CSV corpus from #{path}"
|
||||
lines = CSV.read(path).drop(1).map do |tweet|
|
||||
tweet[5]
|
||||
end
|
||||
else
|
||||
log "Reading plaintext corpus from #{path}"
|
||||
lines = content.split("\n")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue