Add note to suffix.rb about tikis
This commit is contained in:
		
							parent
							
								
									7df08f9531
								
							
						
					
					
						commit
						36a64736bf
					
				
					 1 changed files with 6 additions and 4 deletions
				
			
		| 
						 | 
				
			
			@ -1,12 +1,15 @@
 | 
			
		|||
# encoding: utf-8
 | 
			
		||||
 | 
			
		||||
module Ebooks
 | 
			
		||||
  # This generator uses data identical to a markov model, but
 | 
			
		||||
  # This generator uses data similar to a Markov model, but
 | 
			
		||||
  # instead of making a chain by looking up bigrams it uses the
 | 
			
		||||
  # positions to randomly replace suffixes in one sentence with
 | 
			
		||||
  # matching suffixes in another
 | 
			
		||||
  # positions to randomly replace token array suffixes in one sentence
 | 
			
		||||
  # with matching suffixes in another
 | 
			
		||||
  class SuffixGenerator
 | 
			
		||||
    # Build a generator from a corpus of tikified sentences
 | 
			
		||||
    # "tikis" are token indexes-- a way of representing words
 | 
			
		||||
    # and punctuation as their integer position in a big array
 | 
			
		||||
    # of such tokens
 | 
			
		||||
    # @param sentences [Array<Array<Integer>>]
 | 
			
		||||
    # @return [SuffixGenerator]
 | 
			
		||||
    def self.build(sentences)
 | 
			
		||||
| 
						 | 
				
			
			@ -45,7 +48,6 @@ module Ebooks
 | 
			
		|||
      self
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # Generate a recombined sequence of tikis
 | 
			
		||||
    # @param passes [Integer] number of times to recombine
 | 
			
		||||
    # @param n [Symbol] :unigrams or :bigrams (affects how conservative the model is)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue