Add note to suffix.rb about tikis

This commit is contained in:
Jaiden Mispy 2016-02-05 05:37:37 +11:00
parent 7df08f9531
commit 36a64736bf

View file

@ -1,12 +1,15 @@
# encoding: utf-8 # encoding: utf-8
module Ebooks module Ebooks
# This generator uses data identical to a markov model, but # This generator uses data similar to a Markov model, but
# instead of making a chain by looking up bigrams it uses the # instead of making a chain by looking up bigrams it uses the
# positions to randomly replace suffixes in one sentence with # positions to randomly replace token array suffixes in one sentence
# matching suffixes in another # with matching suffixes in another
class SuffixGenerator class SuffixGenerator
# Build a generator from a corpus of tikified sentences # Build a generator from a corpus of tikified sentences
# "tikis" are token indexes-- a way of representing words
# and punctuation as their integer position in a big array
# of such tokens
# @param sentences [Array<Array<Integer>>] # @param sentences [Array<Array<Integer>>]
# @return [SuffixGenerator] # @return [SuffixGenerator]
def self.build(sentences) def self.build(sentences)
@ -45,7 +48,6 @@ module Ebooks
self self
end end
# Generate a recombined sequence of tikis # Generate a recombined sequence of tikis
# @param passes [Integer] number of times to recombine # @param passes [Integer] number of times to recombine
# @param n [Symbol] :unigrams or :bigrams (affects how conservative the model is) # @param n [Symbol] :unigrams or :bigrams (affects how conservative the model is)