Github time!

This commit is contained in:
Mispy 2013-11-08 06:02:05 +11:00
commit e87dc5862b
27 changed files with 20178 additions and 0 deletions

14696
test/corpus/0xabad1dea.tweets Normal file

File diff suppressed because it is too large Load diff

18
test/keywords.rb Executable file
View file

@ -0,0 +1,18 @@
#!/usr/bin/env ruby
# encoding: utf-8
require 'twitter_ebooks'
require 'minitest/autorun'
require 'benchmark'
module Ebooks
class TestKeywords < Minitest::Test
corpus = NLP.normalize(File.read(ARGV[0]))
puts "Finding and ranking keywords"
puts Benchmark.measure {
NLP.keywords(corpus).top(50).each do |keyword|
puts "#{keyword.text} #{keyword.weight}"
end
}
end
end

18
test/tokenize.rb Executable file
View file

@ -0,0 +1,18 @@
#!/usr/bin/env ruby
# encoding: utf-8
require 'twitter_ebooks'
require 'minitest/autorun'
module Ebooks
class TestTokenize < Minitest::Test
corpus = NLP.normalize(File.read(TEST_CORPUS_PATH))
sents = NLP.sentences(corpus).sample(10)
NLP.sentences(corpus).sample(10).each do |sent|
p sent
p NLP.tokenize(sent)
puts
end
end
end