Github time!
This commit is contained in:
commit
e87dc5862b
27 changed files with 20178 additions and 0 deletions
14696
test/corpus/0xabad1dea.tweets
Normal file
14696
test/corpus/0xabad1dea.tweets
Normal file
File diff suppressed because it is too large
Load diff
18
test/keywords.rb
Executable file
18
test/keywords.rb
Executable file
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/env ruby
|
||||
# encoding: utf-8
|
||||
|
||||
require 'twitter_ebooks'
|
||||
require 'minitest/autorun'
|
||||
require 'benchmark'
|
||||
|
||||
module Ebooks
|
||||
class TestKeywords < Minitest::Test
|
||||
corpus = NLP.normalize(File.read(ARGV[0]))
|
||||
puts "Finding and ranking keywords"
|
||||
puts Benchmark.measure {
|
||||
NLP.keywords(corpus).top(50).each do |keyword|
|
||||
puts "#{keyword.text} #{keyword.weight}"
|
||||
end
|
||||
}
|
||||
end
|
||||
end
|
18
test/tokenize.rb
Executable file
18
test/tokenize.rb
Executable file
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/env ruby
|
||||
# encoding: utf-8
|
||||
|
||||
require 'twitter_ebooks'
|
||||
require 'minitest/autorun'
|
||||
|
||||
module Ebooks
|
||||
class TestTokenize < Minitest::Test
|
||||
corpus = NLP.normalize(File.read(TEST_CORPUS_PATH))
|
||||
sents = NLP.sentences(corpus).sample(10)
|
||||
|
||||
NLP.sentences(corpus).sample(10).each do |sent|
|
||||
p sent
|
||||
p NLP.tokenize(sent)
|
||||
puts
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Add table
Add a link
Reference in a new issue