Support consuming tweets.csv from official twitter archives
This commit is contained in:
parent
17ef359de2
commit
872dabdbf8
2 changed files with 7 additions and 1 deletions
|
@ -4,6 +4,7 @@
|
||||||
require 'json'
|
require 'json'
|
||||||
require 'set'
|
require 'set'
|
||||||
require 'digest/md5'
|
require 'digest/md5'
|
||||||
|
require 'csv'
|
||||||
|
|
||||||
module Ebooks
|
module Ebooks
|
||||||
class Model
|
class Model
|
||||||
|
@ -26,6 +27,11 @@ module Ebooks
|
||||||
lines = JSON.parse(content, symbolize_names: true).map do |tweet|
|
lines = JSON.parse(content, symbolize_names: true).map do |tweet|
|
||||||
tweet[:text]
|
tweet[:text]
|
||||||
end
|
end
|
||||||
|
elsif path.split('.')[-1] == "csv"
|
||||||
|
log "Reading CSV corpus from #{path}"
|
||||||
|
lines = CSV.read(path).drop(1).map do |tweet|
|
||||||
|
tweet[5]
|
||||||
|
end
|
||||||
else
|
else
|
||||||
log "Reading plaintext corpus from #{path}"
|
log "Reading plaintext corpus from #{path}"
|
||||||
lines = content.split("\n")
|
lines = content.split("\n")
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
module Ebooks
|
module Ebooks
|
||||||
VERSION = "2.2.3"
|
VERSION = "2.2.4"
|
||||||
end
|
end
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue