本文整理汇总了Golang中github.com/kljensen/snowball/snowballword.SnowballWord类的典型用法代码示例。如果您正苦于以下问题:Golang SnowballWord类的具体用法?Golang SnowballWord怎么用?Golang SnowballWord使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SnowballWord类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: removeVerbEnding
// Remove verb endings and return true if one was removed.
//
func removeVerbEnding(word *snowballword.SnowballWord) bool {
suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
"уйте", "ейте", "ыть", "ыло", "ыли", "ыла", "уют", "ует",
"нно", "йте", "ишь", "ить", "ите", "ило", "или", "ила",
"ешь", "ете", "ены", "ено", "ена", "ят", "ют", "ыт", "ым",
"ыл", "ую", "уй", "ть", "ны", "но", "на", "ло", "ли", "ла",
"ит", "им", "ил", "ет", "ен", "ем", "ей", "ю", "н", "л", "й",
)
switch suffix {
case "ла", "на", "ете", "йте", "ли", "й", "л", "ем", "н",
"ло", "но", "ет", "ют", "ны", "ть", "ешь", "нно":
// These are "Group 1" verb endings.
// Group 1 endings must follow а (a) or я (ia) in RV.
if precededByARinRV(word, len(suffixRunes)) == false {
suffix = ""
}
}
if suffix != "" {
word.RemoveLastNRunes(len(suffixRunes))
return true
}
return false
}
示例2: findRegions
// Find the starting point of the regions R1, R2, & RV
//
func findRegions(word *snowballword.SnowballWord) (r1start, r2start, rvstart int) {
// R1 & R2 are defined in the standard manner.
r1start = romance.VnvSuffix(word, isLowerVowel, 0)
r2start = romance.VnvSuffix(word, isLowerVowel, r1start)
// Set RV, by default, as empty.
rvstart = len(word.RS)
// Handle the three special cases: "par", "col", & "tap"
//
prefix, prefixRunes := word.FirstPrefix("par", "col", "tap")
if prefix != "" {
rvstart = len(prefixRunes)
return
}
// If the word begins with two vowels, RV is the region after the third letter
if len(word.RS) >= 3 && isLowerVowel(word.RS[0]) && isLowerVowel(word.RS[1]) {
rvstart = 3
return
}
// Otherwise the region after the first vowel not at the beginning of the word.
for i := 1; i < len(word.RS); i++ {
if isLowerVowel(word.RS[i]) {
rvstart = i + 1
return
}
}
return
}
示例3: step2
// Step 2 is the removal of the "и" suffix.
//
func step2(word *snowballword.SnowballWord) bool {
suffix, _ := word.RemoveFirstSuffixIn(word.RVstart, "и")
if suffix != "" {
return true
}
return false
}
示例4: step5
// Step 5 Undouble non-vowel endings
//
func step5(word *snowballword.SnowballWord) bool {
suffix, _ := word.FirstSuffix("enn", "onn", "ett", "ell", "eill")
if suffix != "" {
word.RemoveLastNRunes(1)
}
return false
}
示例5: preprocess
func preprocess(word *snowballword.SnowballWord) {
r1start, r2start, rvstart := findRegions(word)
word.R1start = r1start
word.R2start = r2start
word.RVstart = rvstart
}
示例6: step0
// Step 0 is to strip off apostrophes and "s".
//
func step0(w *snowballword.SnowballWord) bool {
suffix, suffixRunes := w.FirstSuffix("'s'", "'s", "'")
if suffix == "" {
return false
}
w.RemoveLastNRunes(len(suffixRunes))
return true
}
示例7: step2a
// Step 2a is the removal of verb suffixes beginning y,
// Search for the longest among the following suffixes
// in RV, and if found, delete if preceded by u.
//
func step2a(word *snowballword.SnowballWord) bool {
suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS), "ya", "ye", "yan", "yen", "yeron", "yendo", "yo", "yó", "yas", "yes", "yais", "yamos")
if suffix != "" {
idx := len(word.RS) - len(suffixRunes) - 1
if idx >= 0 && word.RS[idx] == 117 {
word.RemoveLastNRunes(len(suffixRunes))
return true
}
}
return false
}
示例8: step3
// Step 3 is the removal of the derivational suffix.
//
func step3(word *snowballword.SnowballWord) bool {
// Search for a DERIVATIONAL ending in R2 (i.e. the entire
// ending must lie in R2), and if one is found, remove it.
suffix, _ := word.RemoveFirstSuffixIn(word.R2start, "ост", "ость")
if suffix != "" {
return true
}
return false
}
示例9: r1r2
// Find the starting point of the two regions R1 & R2.
//
// R1 is the region after the first non-vowel following a vowel,
// or is the null region at the end of the word if there is no
// such non-vowel.
//
// R2 is the region after the first non-vowel following a vowel
// in R1, or is the null region at the end of the word if there
// is no such non-vowel.
//
// See http://snowball.tartarus.org/texts/r1r2.html
//
func r1r2(word *snowballword.SnowballWord) (r1start, r2start int) {
specialPrefix, _ := word.FirstPrefix("gener", "commun", "arsen")
if specialPrefix != "" {
r1start = len(specialPrefix)
} else {
r1start = romance.VnvSuffix(word, isLowerVowel, 0)
}
r2start = romance.VnvSuffix(word, isLowerVowel, r1start)
return
}
示例10: trimLeftApostrophes
// Trim off leading apostropes. (Slight variation from
// NLTK implementation here, in which only the first is removed.)
//
func trimLeftApostrophes(word *snowballword.SnowballWord) {
var (
numApostrophes int
r rune
)
for numApostrophes, r = range word.RS {
// Check for "'", which is unicode code point 39
if r != 39 {
break
}
}
if numApostrophes > 0 {
word.RS = word.RS[numApostrophes:]
word.R1start = word.R1start - numApostrophes
word.R2start = word.R2start - numApostrophes
}
}
示例11: removePerfectiveGerundEnding
// Remove perfective gerund endings and return true if one was removed.
//
func removePerfectiveGerundEnding(word *snowballword.SnowballWord) bool {
suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
"ившись", "ывшись", "вшись", "ивши", "ывши", "вши", "ив", "ыв", "в",
)
switch suffix {
case "в", "вши", "вшись":
// These are "Group 1" perfective gerund endings.
// Group 1 endings must follow а (a) or я (ia) in RV.
if precededByARinRV(word, len(suffixRunes)) == false {
suffix = ""
}
}
if suffix != "" {
word.RemoveLastNRunes(len(suffixRunes))
return true
}
return false
}
示例12: removeAdjectivalEnding
// Remove adjectival endings and return true if one was removed.
//
func removeAdjectivalEnding(word *snowballword.SnowballWord) bool {
// Remove adjectival endings. Start by looking for
// an adjective ending.
//
suffix, _ := word.RemoveFirstSuffixIn(word.RVstart,
"ими", "ыми", "его", "ого", "ему", "ому", "ее", "ие",
"ые", "ое", "ей", "ий", "ый", "ой", "ем", "им", "ым",
"ом", "их", "ых", "ую", "юю", "ая", "яя", "ою", "ею",
)
if suffix != "" {
// We found an adjective ending. Remove optional participle endings.
//
newSuffix, newSuffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
"ивш", "ывш", "ующ",
"ем", "нн", "вш", "ющ", "щ",
)
switch newSuffix {
case "ем", "нн", "вш", "ющ", "щ":
// These are "Group 1" participle endings.
// Group 1 endings must follow а (a) or я (ia) in RV.
if precededByARinRV(word, len(newSuffixRunes)) == false {
newSuffix = ""
}
}
if newSuffix != "" {
word.RemoveLastNRunes(len(newSuffixRunes))
}
return true
}
return false
}
示例13: step1
// Step 1 is the removal of standard suffixes, all of which must
// occur in RV.
//
//
// Search for a PERFECTIVE GERUND ending. If one is found remove it, and
// that is then the end of step 1. Otherwise try and remove a REFLEXIVE
// ending, and then search in turn for (1) an ADJECTIVAL, (2) a VERB or
// (3) a NOUN ending. As soon as one of the endings (1) to (3) is found
// remove it, and terminate step 1.
//
func step1(word *snowballword.SnowballWord) bool {
// `stop` will be used to signal early termination
var stop bool
// Search for a PERFECTIVE GERUND ending
stop = removePerfectiveGerundEnding(word)
if stop {
return true
}
// Next remove reflexive endings
word.RemoveFirstSuffixIn(word.RVstart, "ся", "сь")
// Next remove adjectival endings
stop = removeAdjectivalEnding(word)
if stop {
return true
}
// Next remove verb endings
stop = removeVerbEnding(word)
if stop {
return true
}
// Next remove noun endings
suffix, _ := word.RemoveFirstSuffixIn(word.RVstart,
"иями", "ями", "иях", "иям", "ием", "ией", "ами", "ях",
"ям", "ья", "ью", "ье", "ом", "ой", "ов", "ия", "ию",
"ий", "ии", "ие", "ем", "ей", "еи", "ев", "ах", "ам",
"я", "ю", "ь", "ы", "у", "о", "й", "и", "е", "а",
)
if suffix != "" {
return true
}
return false
}
示例14: step2b
// Step 2b is the removal of verb suffixes beginning y,
// Search for the longest among the following suffixes
// in RV, and if found, delete if preceded by u.
//
func step2b(word *snowballword.SnowballWord) bool {
suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
"iésemos", "iéramos", "iríamos", "eríamos", "aríamos", "ásemos",
"áramos", "ábamos", "isteis", "iríais", "iremos", "ieseis",
"ierais", "eríais", "eremos", "asteis", "aríais", "aremos",
"íamos", "irías", "irían", "iréis", "ieses", "iesen", "ieron",
"ieras", "ieran", "iendo", "erías", "erían", "eréis", "aseis",
"arías", "arían", "aréis", "arais", "abais", "íais", "iste",
"iría", "irás", "irán", "imos", "iese", "iera", "idos", "idas",
"ería", "erás", "erán", "aste", "ases", "asen", "aría", "arás",
"arán", "aron", "aras", "aran", "ando", "amos", "ados", "adas",
"abas", "aban", "ías", "ían", "éis", "áis", "iré", "irá", "ido",
"ida", "eré", "erá", "emos", "ase", "aré", "ará", "ara", "ado",
"ada", "aba", "ís", "ía", "ió", "ir", "id", "es", "er", "en",
"ed", "as", "ar", "an", "ad",
)
switch suffix {
case "":
return false
case "en", "es", "éis", "emos":
// Delete, and if preceded by gu delete the u (the gu need not be in RV)
word.RemoveLastNRunes(len(suffixRunes))
guSuffix, _ := word.FirstSuffix("gu")
if guSuffix != "" {
word.RemoveLastNRunes(1)
}
default:
// Delete
word.RemoveLastNRunes(len(suffixRunes))
}
return true
}
示例15: step1a
// Step 1a is normalization of various special "s"-endings.
//
func step1a(w *snowballword.SnowballWord) bool {
suffix, suffixRunes := w.FirstSuffix("sses", "ied", "ies", "us", "ss", "s")
switch suffix {
case "sses":
// Replace by ss
w.ReplaceSuffixRunes(suffixRunes, []rune("ss"), true)
return true
case "ies", "ied":
// Replace by i if preceded by more than one letter,
// otherwise by ie (so ties -> tie, cries -> cri).
var repl string
if len(w.RS) > 4 {
repl = "i"
} else {
repl = "ie"
}
w.ReplaceSuffixRunes(suffixRunes, []rune(repl), true)
return true
case "us", "ss":
// Do nothing
return false
case "s":
// Delete if the preceding word part contains a vowel
// not immediately before the s (so gas and this retain
// the s, gaps and kiwis lose it)
//
for i := 0; i < len(w.RS)-2; i++ {
if isLowerVowel(w.RS[i]) {
w.RemoveLastNRunes(len(suffixRunes))
return true
}
}
}
return false
}