Merge pull request #92 from Gotos/master

Fix utf8 in keywords and make stopwords accessable for bot developers
This commit is contained in:
Jaiden Mispy 2015-09-30 04:49:10 +08:00
commit 8a67a69649
2 changed files with 3 additions and 3 deletions

View file

@ -14,10 +14,10 @@ module Ebooks
# to be using it all of the time
# Lazily loads an array of stopwords
# Stopwords are common English words that should often be ignored
# Stopwords are common words that should often be ignored
# @return [Array<String>]
def self.stopwords
@stopwords ||= File.read(File.join(DATA_PATH, 'stopwords.txt')).split
@stopwords ||= File.exists?('stopwords.txt') ? File.read('stopwords.txt').split : []
end
# Lazily loads an array of known English nouns
@ -99,7 +99,7 @@ module Ebooks
#set :vowels, 1 # => default: 0 = not considered
#set :consonants, 5 # => default: 0 = not considered
#set :ignore_case, true # => default: false
set :word_pattern, /(?<!@)(?<=\s)[\w']+/ # => default: /\w+/
set :word_pattern, /(?<!@)(?<=\s)[\p{Word}']+/ # => default: /\w+/
#set :stemming, true # => default: false
end