Add note to suffix.rb about tikis
This commit is contained in:
		
							parent
							
								
									7df08f9531
								
							
						
					
					
						commit
						36a64736bf
					
				
					 1 changed files with 6 additions and 4 deletions
				
			
		|  | @ -1,12 +1,15 @@ | |||
| # encoding: utf-8 | ||||
| 
 | ||||
| module Ebooks | ||||
|   # This generator uses data identical to a markov model, but | ||||
|   # This generator uses data similar to a Markov model, but | ||||
|   # instead of making a chain by looking up bigrams it uses the | ||||
|   # positions to randomly replace suffixes in one sentence with | ||||
|   # matching suffixes in another | ||||
|   # positions to randomly replace token array suffixes in one sentence | ||||
|   # with matching suffixes in another | ||||
|   class SuffixGenerator | ||||
|     # Build a generator from a corpus of tikified sentences | ||||
|     # "tikis" are token indexes-- a way of representing words | ||||
|     # and punctuation as their integer position in a big array | ||||
|     # of such tokens | ||||
|     # @param sentences [Array<Array<Integer>>] | ||||
|     # @return [SuffixGenerator] | ||||
|     def self.build(sentences) | ||||
|  | @ -45,7 +48,6 @@ module Ebooks | |||
|       self | ||||
|     end | ||||
| 
 | ||||
| 
 | ||||
|     # Generate a recombined sequence of tikis | ||||
|     # @param passes [Integer] number of times to recombine | ||||
|     # @param n [Symbol] :unigrams or :bigrams (affects how conservative the model is) | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Jaiden Mispy
						Jaiden Mispy