twitter-ebooks/test/tokenize.rb
2014-02-12 16:23:49 +01:00

18 lines
367 B
Ruby

#!/usr/bin/env ruby
# encoding: utf-8
require 'twitter_ebooks'
require 'minitest/autorun'
module Ebooks
class TestTokenize < Minitest::Test
corpus = NLP.normalize(File.read(TEST_CORPUS_PATH))
sents = NLP.sentences(corpus).sample(10)
NLP.sentences(corpus).sample(10).each do |sent|
p sent
p NLP.tokenize(sent)
puts
end
end
end