Support consuming tweets.csv from official twitter archives
This commit is contained in:
		
							parent
							
								
									17ef359de2
								
							
						
					
					
						commit
						872dabdbf8
					
				
					 2 changed files with 7 additions and 1 deletions
				
			
		| 
						 | 
					@ -4,6 +4,7 @@
 | 
				
			||||||
require 'json'
 | 
					require 'json'
 | 
				
			||||||
require 'set'
 | 
					require 'set'
 | 
				
			||||||
require 'digest/md5'
 | 
					require 'digest/md5'
 | 
				
			||||||
 | 
					require 'csv'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
module Ebooks
 | 
					module Ebooks
 | 
				
			||||||
  class Model
 | 
					  class Model
 | 
				
			||||||
| 
						 | 
					@ -26,6 +27,11 @@ module Ebooks
 | 
				
			||||||
        lines = JSON.parse(content, symbolize_names: true).map do |tweet|
 | 
					        lines = JSON.parse(content, symbolize_names: true).map do |tweet|
 | 
				
			||||||
          tweet[:text]
 | 
					          tweet[:text]
 | 
				
			||||||
        end
 | 
					        end
 | 
				
			||||||
 | 
					      elsif path.split('.')[-1] == "csv"
 | 
				
			||||||
 | 
					        log "Reading CSV corpus from #{path}"
 | 
				
			||||||
 | 
					        lines = CSV.read(path).drop(1).map do |tweet|
 | 
				
			||||||
 | 
					          tweet[5]
 | 
				
			||||||
 | 
					        end
 | 
				
			||||||
      else
 | 
					      else
 | 
				
			||||||
        log "Reading plaintext corpus from #{path}"
 | 
					        log "Reading plaintext corpus from #{path}"
 | 
				
			||||||
        lines = content.split("\n")
 | 
					        lines = content.split("\n")
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,3 +1,3 @@
 | 
				
			||||||
module Ebooks
 | 
					module Ebooks
 | 
				
			||||||
  VERSION = "2.2.3"
 | 
					  VERSION = "2.2.4"
 | 
				
			||||||
end
 | 
					end
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue