Github time!
This commit is contained in:
		
						commit
						e87dc5862b
					
				
					 27 changed files with 20178 additions and 0 deletions
				
			
		
							
								
								
									
										14696
									
								
								test/corpus/0xabad1dea.tweets
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										14696
									
								
								test/corpus/0xabad1dea.tweets
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										18
									
								
								test/keywords.rb
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										18
									
								
								test/keywords.rb
									
										
									
									
									
										Executable file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,18 @@
 | 
			
		|||
#!/usr/bin/env ruby
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
 | 
			
		||||
require 'twitter_ebooks'
 | 
			
		||||
require 'minitest/autorun'
 | 
			
		||||
require 'benchmark'
 | 
			
		||||
 | 
			
		||||
module Ebooks
 | 
			
		||||
  class TestKeywords < Minitest::Test
 | 
			
		||||
    corpus = NLP.normalize(File.read(ARGV[0]))
 | 
			
		||||
    puts "Finding and ranking keywords"
 | 
			
		||||
    puts Benchmark.measure {
 | 
			
		||||
      NLP.keywords(corpus).top(50).each do |keyword|
 | 
			
		||||
        puts "#{keyword.text} #{keyword.weight}"
 | 
			
		||||
      end
 | 
			
		||||
    }
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
							
								
								
									
										18
									
								
								test/tokenize.rb
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										18
									
								
								test/tokenize.rb
									
										
									
									
									
										Executable file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,18 @@
 | 
			
		|||
#!/usr/bin/env ruby
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
 | 
			
		||||
require 'twitter_ebooks'
 | 
			
		||||
require 'minitest/autorun'
 | 
			
		||||
 | 
			
		||||
module Ebooks
 | 
			
		||||
  class TestTokenize < Minitest::Test
 | 
			
		||||
    corpus = NLP.normalize(File.read(TEST_CORPUS_PATH))
 | 
			
		||||
    sents = NLP.sentences(corpus).sample(10)
 | 
			
		||||
 | 
			
		||||
    NLP.sentences(corpus).sample(10).each do |sent|
 | 
			
		||||
      p sent
 | 
			
		||||
      p NLP.tokenize(sent)
 | 
			
		||||
      puts
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue