Github time!

This commit is contained in:
Mispy 2013-11-08 06:02:05 +11:00
commit e87dc5862b
27 changed files with 20178 additions and 0 deletions

18
test/tokenize.rb Executable file
View file

@ -0,0 +1,18 @@
#!/usr/bin/env ruby
# encoding: utf-8
require 'twitter_ebooks'
require 'minitest/autorun'
module Ebooks
class TestTokenize < Minitest::Test
corpus = NLP.normalize(File.read(TEST_CORPUS_PATH))
sents = NLP.sentences(corpus).sample(10)
NLP.sentences(corpus).sample(10).each do |sent|
p sent
p NLP.tokenize(sent)
puts
end
end
end