From 36a64736bf45eac179b8b4c150fb9ac4aa3d25ad Mon Sep 17 00:00:00 2001 From: Jaiden Mispy Date: Fri, 5 Feb 2016 05:37:37 +1100 Subject: [PATCH] Add note to suffix.rb about tikis --- lib/twitter_ebooks/suffix.rb | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/twitter_ebooks/suffix.rb b/lib/twitter_ebooks/suffix.rb index f409588..5c250c0 100644 --- a/lib/twitter_ebooks/suffix.rb +++ b/lib/twitter_ebooks/suffix.rb @@ -1,12 +1,15 @@ # encoding: utf-8 module Ebooks - # This generator uses data identical to a markov model, but + # This generator uses data similar to a Markov model, but # instead of making a chain by looking up bigrams it uses the - # positions to randomly replace suffixes in one sentence with - # matching suffixes in another + # positions to randomly replace token array suffixes in one sentence + # with matching suffixes in another class SuffixGenerator # Build a generator from a corpus of tikified sentences + # "tikis" are token indexes-- a way of representing words + # and punctuation as their integer position in a big array + # of such tokens # @param sentences [Array>] # @return [SuffixGenerator] def self.build(sentences) @@ -45,7 +48,6 @@ module Ebooks self end - # Generate a recombined sequence of tikis # @param passes [Integer] number of times to recombine # @param n [Symbol] :unigrams or :bigrams (affects how conservative the model is)