Allow consumption of json archives
This commit is contained in:
		
							parent
							
								
									acc2f42b38
								
							
						
					
					
						commit
						306c9ab873
					
				
					 2 changed files with 35 additions and 25 deletions
				
			
		| 
						 | 
				
			
			@ -17,14 +17,22 @@ module Ebooks
 | 
			
		|||
      Marshal.load(File.read(path))
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def consume(txtpath)
 | 
			
		||||
      # Record hash of source file so we know to update later
 | 
			
		||||
      @hash = Digest::MD5.hexdigest(File.read(txtpath))
 | 
			
		||||
    def consume(path)
 | 
			
		||||
      content = File.read(path)
 | 
			
		||||
      @hash = Digest::MD5.hexdigest(content)
 | 
			
		||||
 | 
			
		||||
      if path.split('.')[-1] == "json"
 | 
			
		||||
        log "Reading json corpus from #{path}"
 | 
			
		||||
        lines = JSON.parse(content, symbolize_names: true).map do |tweet|
 | 
			
		||||
          tweet[:text]
 | 
			
		||||
        end
 | 
			
		||||
      else
 | 
			
		||||
        log "Reading plaintext corpus from #{path}"
 | 
			
		||||
        lines = content.split("\n")
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      text = File.read(txtpath)
 | 
			
		||||
      log "Removing commented lines and sorting mentions"
 | 
			
		||||
 | 
			
		||||
      lines = text.split("\n")
 | 
			
		||||
      keeping = []
 | 
			
		||||
      mentions = []
 | 
			
		||||
      lines.each do |l|
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue