Allow consumption of json archives

This commit is contained in:
Mispy 2013-11-27 05:12:54 -08:00
parent acc2f42b38
commit 306c9ab873
2 changed files with 35 additions and 25 deletions

View file

@ -17,14 +17,22 @@ module Ebooks
Marshal.load(File.read(path))
end
def consume(txtpath)
# Record hash of source file so we know to update later
@hash = Digest::MD5.hexdigest(File.read(txtpath))
def consume(path)
content = File.read(path)
@hash = Digest::MD5.hexdigest(content)
if path.split('.')[-1] == "json"
log "Reading json corpus from #{path}"
lines = JSON.parse(content, symbolize_names: true).map do |tweet|
tweet[:text]
end
else
log "Reading plaintext corpus from #{path}"
lines = content.split("\n")
end
text = File.read(txtpath)
log "Removing commented lines and sorting mentions"
lines = text.split("\n")
keeping = []
mentions = []
lines.each do |l|