本文整理汇总了Golang中github.com/kljensen/snowball/snowballword.SnowballWord.ReplaceSuffixRunes方法的典型用法代码示例。如果您正苦于以下问题:Golang SnowballWord.ReplaceSuffixRunes方法的具体用法?Golang SnowballWord.ReplaceSuffixRunes怎么用?Golang SnowballWord.ReplaceSuffixRunes使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类github.com/kljensen/snowball/snowballword.SnowballWord
的用法示例。
在下文中一共展示了SnowballWord.ReplaceSuffixRunes方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: step3
// Step 3 is the stemming of various longer sufficies
// found in R1.
//
func step3(w *snowballword.SnowballWord) bool {
suffix, suffixRunes := w.FirstSuffix(
"ational", "tional", "alize", "icate", "ative",
"iciti", "ical", "ful", "ness",
)
// If it is not in R1, do nothing
if suffix == "" || len(suffixRunes) > len(w.RS)-w.R1start {
return false
}
// Handle special cases where we're not just going to
// replace the suffix with another suffix: there are
// other things we need to do.
//
if suffix == "ative" {
// If in R2, delete.
//
if len(w.RS)-w.R2start >= 5 {
w.RemoveLastNRunes(len(suffixRunes))
return true
}
return false
}
// Handle a suffix that was found, which is going
// to be replaced with a different suffix.
//
var repl string
switch suffix {
case "ational":
repl = "ate"
case "tional":
repl = "tion"
case "alize":
repl = "al"
case "icate", "iciti", "ical":
repl = "ic"
case "ful", "ness":
repl = ""
}
w.ReplaceSuffixRunes(suffixRunes, []rune(repl), true)
return true
}
示例2: step1a
// Step 1a is normalization of various special "s"-endings.
//
func step1a(w *snowballword.SnowballWord) bool {
suffix, suffixRunes := w.FirstSuffix("sses", "ied", "ies", "us", "ss", "s")
switch suffix {
case "sses":
// Replace by ss
w.ReplaceSuffixRunes(suffixRunes, []rune("ss"), true)
return true
case "ies", "ied":
// Replace by i if preceded by more than one letter,
// otherwise by ie (so ties -> tie, cries -> cri).
var repl string
if len(w.RS) > 4 {
repl = "i"
} else {
repl = "ie"
}
w.ReplaceSuffixRunes(suffixRunes, []rune(repl), true)
return true
case "us", "ss":
// Do nothing
return false
case "s":
// Delete if the preceding word part contains a vowel
// not immediately before the s (so gas and this retain
// the s, gaps and kiwis lose it)
//
for i := 0; i < len(w.RS)-2; i++ {
if isLowerVowel(w.RS[i]) {
w.RemoveLastNRunes(len(suffixRunes))
return true
}
}
}
return false
}
示例3: step2
// Step 2 is the stemming of various endings found in
// R1 including "al", "ness", and "li".
//
func step2(w *snowballword.SnowballWord) bool {
// Possible sufficies for this step, longest first.
suffix, suffixRunes := w.FirstSuffix(
"ational", "fulness", "iveness", "ization", "ousness",
"biliti", "lessli", "tional", "alism", "aliti", "ation",
"entli", "fulli", "iviti", "ousli", "anci", "abli",
"alli", "ator", "enci", "izer", "bli", "ogi", "li",
)
// If it is not in R1, do nothing
if suffix == "" || len(suffixRunes) > len(w.RS)-w.R1start {
return false
}
// Handle special cases where we're not just going to
// replace the suffix with another suffix: there are
// other things we need to do.
//
switch suffix {
case "li":
// Delete if preceded by a valid li-ending. Valid li-endings inlude the
// following charaters: cdeghkmnrt. (Note, the unicode code points for
// these characters are, respectively, as follows:
// 99 100 101 103 104 107 109 110 114 116)
//
rsLen := len(w.RS)
if rsLen >= 3 {
switch w.RS[rsLen-3] {
case 99, 100, 101, 103, 104, 107, 109, 110, 114, 116:
w.RemoveLastNRunes(len(suffixRunes))
return true
}
}
return false
case "ogi":
// Replace by og if preceded by l.
// (Note, the unicode code point for l is 108)
//
rsLen := len(w.RS)
if rsLen >= 4 && w.RS[rsLen-4] == 108 {
w.ReplaceSuffixRunes(suffixRunes, []rune("og"), true)
}
return true
}
// Handle a suffix that was found, which is going
// to be replaced with a different suffix.
//
var repl string
switch suffix {
case "tional":
repl = "tion"
case "enci":
repl = "ence"
case "anci":
repl = "ance"
case "abli":
repl = "able"
case "entli":
repl = "ent"
case "izer", "ization":
repl = "ize"
case "ational", "ation", "ator":
repl = "ate"
case "alism", "aliti", "alli":
repl = "al"
case "fulness":
repl = "ful"
case "ousli", "ousness":
repl = "ous"
case "iveness", "iviti":
repl = "ive"
case "biliti", "bli":
repl = "ble"
case "fulli":
repl = "ful"
case "lessli":
repl = "less"
}
w.ReplaceSuffixRunes(suffixRunes, []rune(repl), true)
return true
}
示例4: step4
// Step 4 is the cleaning up of residual suffixes.
//
func step4(word *snowballword.SnowballWord) bool {
hadChange := false
if word.String() == "voudrion" {
log.Println("...", word)
}
// If the word ends s (unicode code point 115),
// not preceded by a, i, o, u, è or s, delete it.
//
if idx := len(word.RS) - 1; idx >= 1 && word.RS[idx] == 115 {
switch word.RS[idx-1] {
case 97, 105, 111, 117, 232, 115:
// Do nothing, preceded by a, i, o, u, è or s
return false
default:
word.RemoveLastNRunes(1)
hadChange = true
}
}
// Note: all the following are restricted to the RV region.
// Search for the longest among the following suffixes in RV.
//
suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
"Ière", "ière", "Ier", "ier", "ion", "e", "ë",
)
switch suffix {
case "":
return hadChange
case "ion":
// Delete if in R2 and preceded by s or t in RV
const sLen int = 3 // equivalently, len(suffixRunes)
idx := len(word.RS) - sLen - 1
if word.FitsInR2(sLen) && idx >= 0 && word.FitsInRV(sLen+1) {
if word.RS[idx] == 115 || word.RS[idx] == 116 {
word.RemoveLastNRunes(sLen)
return true
}
}
return hadChange
case "ier", "ière", "Ier", "Ière":
// Replace with i
word.ReplaceSuffixRunes(suffixRunes, []rune("i"), true)
return true
case "e":
word.RemoveLastNRunes(1)
return true
case "ë":
// If preceded by gu (unicode code point 103 & 117), delete
idx := len(word.RS) - 1
if idx >= 2 && word.RS[idx-2] == 103 && word.RS[idx-1] == 117 {
word.RemoveLastNRunes(1)
return true
}
return hadChange
}
return true
}
示例5: step1
// Step 1 is the removal of standard suffixes
//
func step1(word *snowballword.SnowballWord) bool {
suffix, suffixRunes := word.FirstSuffix(
"issements", "issement", "atrices", "utions", "usions", "logies",
"emment", "ements", "atrice", "ations", "ateurs", "amment", "ution",
"usion", "ments", "logie", "istes", "ismes", "iqUes", "euses",
"ences", "ement", "ation", "ateur", "ances", "ables", "ment",
"ités", "iste", "isme", "iqUe", "euse", "ence", "eaux", "ance",
"able", "ives", "ité", "eux", "aux", "ive", "ifs", "if",
)
if suffix == "" {
return false
}
isInR1 := (word.R1start <= len(word.RS)-len(suffixRunes))
isInR2 := (word.R2start <= len(word.RS)-len(suffixRunes))
isInRV := (word.RVstart <= len(word.RS)-len(suffixRunes))
// Handle simple replacements & deletions in R2 first
if isInR2 {
// Handle simple replacements in R2
repl := ""
switch suffix {
case "logie", "logies":
repl = "log"
case "usion", "ution", "usions", "utions":
repl = "u"
case "ence", "ences":
repl = "ent"
}
if repl != "" {
word.ReplaceSuffixRunes(suffixRunes, []rune(repl), true)
return true
}
// Handle simple deletions in R2
switch suffix {
case "ance", "iqUe", "isme", "able", "iste", "eux", "ances", "iqUes", "ismes", "ables", "istes":
word.RemoveLastNRunes(len(suffixRunes))
return true
}
}
// Handle simple replacements in RV
if isInRV {
// NOTE: these are "special" suffixes in that
// we must still do steps 2a and 2b of the
// French stemmer even when these suffixes are
// found in step1. Therefore, we are returning
// `false` here.
repl := ""
switch suffix {
case "amment":
repl = "ant"
case "emment":
repl = "ent"
}
if repl != "" {
word.ReplaceSuffixRunes(suffixRunes, []rune(repl), true)
return false
}
// Delete if preceded by a vowel that is also in RV
if suffix == "ment" || suffix == "ments" {
idx := len(word.RS) - len(suffixRunes) - 1
if idx >= word.RVstart && isLowerVowel(word.RS[idx]) {
word.RemoveLastNRunes(len(suffixRunes))
return false
}
return false
}
}
// Handle all the other "special" cases. All of these
// return true immediately after changing the word.
//
switch suffix {
case "eaux":
// Replace with eau
word.ReplaceSuffixRunes(suffixRunes, []rune("eau"), true)
return true
case "aux":
// Replace with al if in R1
if isInR1 {
word.ReplaceSuffixRunes(suffixRunes, []rune("al"), true)
return true
}
case "euse", "euses":
// Delete if in R2, else replace by eux if in R1
if isInR2 {
//.........这里部分代码省略.........
示例6: step0
// Step 0 is the removal of attached pronouns
//
func step0(word *snowballword.SnowballWord) bool {
// Search for the longest among the following suffixes
suffix1, suffix1Runes := word.FirstSuffixIn(word.RVstart, len(word.RS),
"selas", "selos", "sela", "selo", "las", "les",
"los", "nos", "me", "se", "la", "le", "lo",
)
// If the suffix empty or not in RV, we have nothing to do.
if suffix1 == "" {
return false
}
// We'll remove suffix1, if comes after one of the following
suffix2, suffix2Runes := word.FirstSuffixIn(word.RVstart, len(word.RS)-len(suffix1),
"iéndo", "iendo", "yendo", "ando", "ándo",
"ár", "ér", "ír", "ar", "er", "ir",
)
switch suffix2 {
case "":
// Nothing to do
return false
case "iéndo", "ándo", "ár", "ér", "ír":
// In these cases, deletion is followed by removing
// the acute accent (e.g., haciéndola -> haciendo).
var suffix2repl string
switch suffix2 {
case "":
return false
case "iéndo":
suffix2repl = "iendo"
case "ándo":
suffix2repl = "ando"
case "ár":
suffix2repl = "ar"
case "ír":
suffix2repl = "ir"
}
word.RemoveLastNRunes(len(suffix1Runes))
word.ReplaceSuffixRunes(suffix2Runes, []rune(suffix2repl), true)
return true
case "ando", "iendo", "ar", "er", "ir":
word.RemoveLastNRunes(len(suffix1Runes))
return true
case "yendo":
// In the case of "yendo", the "yendo" must lie in RV,
// and be preceded by a "u" somewhere in the word.
for i := 0; i < len(word.RS)-(len(suffix1)+len(suffix2)); i++ {
// Note, the unicode code point for "u" is 117.
if word.RS[i] == 117 {
word.RemoveLastNRunes(len(suffix1Runes))
return true
}
}
}
return false
}
示例7: step1b
// Step 1b is the normalization of various "ly" and "ed" sufficies.
//
func step1b(w *snowballword.SnowballWord) bool {
suffix, suffixRunes := w.FirstSuffix("eedly", "ingly", "edly", "ing", "eed", "ed")
switch suffix {
case "":
// No suffix found
return false
case "eed", "eedly":
// Replace by ee if in R1
if len(suffixRunes) <= len(w.RS)-w.R1start {
w.ReplaceSuffixRunes(suffixRunes, []rune("ee"), true)
}
return true
case "ed", "edly", "ing", "ingly":
hasLowerVowel := false
for i := 0; i < len(w.RS)-len(suffixRunes); i++ {
if isLowerVowel(w.RS[i]) {
hasLowerVowel = true
break
}
}
if hasLowerVowel {
// This case requires a two-step transformation and, due
// to the way we've implemented the `ReplaceSuffix` method
// here, information about R1 and R2 would be lost between
// the two. Therefore, we need to keep track of the
// original R1 & R2, so that we may set them below, at the
// end of this case.
//
originalR1start := w.R1start
originalR2start := w.R2start
// Delete if the preceding word part contains a vowel
w.RemoveLastNRunes(len(suffixRunes))
// ...and after the deletion...
newSuffix, newSuffixRunes := w.FirstSuffix("at", "bl", "iz", "bb", "dd", "ff", "gg", "mm", "nn", "pp", "rr", "tt")
switch newSuffix {
case "":
// If the word is short, add "e"
if isShortWord(w) {
// By definition, r1 and r2 are the empty string for
// short words.
w.RS = append(w.RS, []rune("e")...)
w.R1start = len(w.RS)
w.R2start = len(w.RS)
return true
}
case "at", "bl", "iz":
// If the word ends "at", "bl" or "iz" add "e"
w.ReplaceSuffixRunes(newSuffixRunes, []rune(newSuffix+"e"), true)
case "bb", "dd", "ff", "gg", "mm", "nn", "pp", "rr", "tt":
// If the word ends with a double remove the last letter.
// Note that, "double" does not include all possible doubles,
// just those shown above.
//
w.RemoveLastNRunes(1)
}
// Because we did a double replacement, we need to fix
// R1 and R2 manually. This is just becase of how we've
// implemented the `ReplaceSuffix` method.
//
rsLen := len(w.RS)
if originalR1start < rsLen {
w.R1start = originalR1start
} else {
w.R1start = rsLen
}
if originalR2start < rsLen {
w.R2start = originalR2start
} else {
w.R2start = rsLen
}
return true
}
}
return false
}