Merge pull request #92 from Gotos/master
Fix utf8 in keywords and make stopwords accessable for bot developers
This commit is contained in:
commit
8a67a69649
2 changed files with 3 additions and 3 deletions
|
@ -14,10 +14,10 @@ module Ebooks
|
|||
# to be using it all of the time
|
||||
|
||||
# Lazily loads an array of stopwords
|
||||
# Stopwords are common English words that should often be ignored
|
||||
# Stopwords are common words that should often be ignored
|
||||
# @return [Array<String>]
|
||||
def self.stopwords
|
||||
@stopwords ||= File.read(File.join(DATA_PATH, 'stopwords.txt')).split
|
||||
@stopwords ||= File.exists?('stopwords.txt') ? File.read('stopwords.txt').split : []
|
||||
end
|
||||
|
||||
# Lazily loads an array of known English nouns
|
||||
|
@ -99,7 +99,7 @@ module Ebooks
|
|||
#set :vowels, 1 # => default: 0 = not considered
|
||||
#set :consonants, 5 # => default: 0 = not considered
|
||||
#set :ignore_case, true # => default: false
|
||||
set :word_pattern, /(?<!@)(?<=\s)[\w']+/ # => default: /\w+/
|
||||
set :word_pattern, /(?<!@)(?<=\s)[\p{Word}']+/ # => default: /\w+/
|
||||
#set :stemming, true # => default: false
|
||||
end
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue