当前位置: 首页>>代码示例>>Golang>>正文


Golang snowballword.SnowballWord类代码示例

本文整理汇总了Golang中github.com/kljensen/snowball/snowballword.SnowballWord的典型用法代码示例。如果您正苦于以下问题:Golang SnowballWord类的具体用法?Golang SnowballWord怎么用?Golang SnowballWord使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了SnowballWord类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。

示例1: removeVerbEnding

// Remove verb endings and return true if one was removed.
//
func removeVerbEnding(word *snowballword.SnowballWord) bool {
	suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
		"уйте", "ейте", "ыть", "ыло", "ыли", "ыла", "уют", "ует",
		"нно", "йте", "ишь", "ить", "ите", "ило", "или", "ила",
		"ешь", "ете", "ены", "ено", "ена", "ят", "ют", "ыт", "ым",
		"ыл", "ую", "уй", "ть", "ны", "но", "на", "ло", "ли", "ла",
		"ит", "им", "ил", "ет", "ен", "ем", "ей", "ю", "н", "л", "й",
	)
	switch suffix {
	case "ла", "на", "ете", "йте", "ли", "й", "л", "ем", "н",
		"ло", "но", "ет", "ют", "ны", "ть", "ешь", "нно":

		// These are "Group 1" verb endings.
		// Group 1 endings must follow а (a) or я (ia) in RV.
		if precededByARinRV(word, len(suffixRunes)) == false {
			suffix = ""
		}

	}

	if suffix != "" {
		word.RemoveLastNRunes(len(suffixRunes))
		return true
	}
	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:28,代码来源:step1.go

示例2: findRegions

// Find the starting point of the regions R1, R2, & RV
//
func findRegions(word *snowballword.SnowballWord) (r1start, r2start, rvstart int) {

	// R1 & R2 are defined in the standard manner.
	r1start = romance.VnvSuffix(word, isLowerVowel, 0)
	r2start = romance.VnvSuffix(word, isLowerVowel, r1start)

	// Set RV, by default, as empty.
	rvstart = len(word.RS)

	// Handle the three special cases: "par", "col", & "tap"
	//
	prefix, prefixRunes := word.FirstPrefix("par", "col", "tap")
	if prefix != "" {
		rvstart = len(prefixRunes)
		return
	}

	// If the word begins with two vowels, RV is the region after the third letter
	if len(word.RS) >= 3 && isLowerVowel(word.RS[0]) && isLowerVowel(word.RS[1]) {
		rvstart = 3
		return
	}

	// Otherwise the region after the first vowel not at the beginning of the word.
	for i := 1; i < len(word.RS); i++ {
		if isLowerVowel(word.RS[i]) {
			rvstart = i + 1
			return
		}
	}

	return
}
开发者ID:kljensen,项目名称:snowball,代码行数:35,代码来源:common.go

示例3: step2

// Step 2 is the removal of the "и" suffix.
//
func step2(word *snowballword.SnowballWord) bool {
	suffix, _ := word.RemoveFirstSuffixIn(word.RVstart, "и")
	if suffix != "" {
		return true
	}
	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:9,代码来源:step2.go

示例4: step5

// Step 5 Undouble non-vowel endings
//
func step5(word *snowballword.SnowballWord) bool {

	suffix, _ := word.FirstSuffix("enn", "onn", "ett", "ell", "eill")
	if suffix != "" {
		word.RemoveLastNRunes(1)
	}
	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:10,代码来源:step5.go

示例5: preprocess

func preprocess(word *snowballword.SnowballWord) {

	r1start, r2start, rvstart := findRegions(word)
	word.R1start = r1start
	word.R2start = r2start
	word.RVstart = rvstart

}
开发者ID:kljensen,项目名称:snowball,代码行数:8,代码来源:preprocess.go

示例6: step0

// Step 0 is to strip off apostrophes and "s".
//
func step0(w *snowballword.SnowballWord) bool {
	suffix, suffixRunes := w.FirstSuffix("'s'", "'s", "'")
	if suffix == "" {
		return false
	}
	w.RemoveLastNRunes(len(suffixRunes))
	return true
}
开发者ID:kljensen,项目名称:snowball,代码行数:10,代码来源:step0.go

示例7: step2a

// Step 2a is the removal of verb suffixes beginning y,
// Search for the longest among the following suffixes
// in RV, and if found, delete if preceded by u.
//
func step2a(word *snowballword.SnowballWord) bool {
	suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS), "ya", "ye", "yan", "yen", "yeron", "yendo", "yo", "yó", "yas", "yes", "yais", "yamos")
	if suffix != "" {
		idx := len(word.RS) - len(suffixRunes) - 1
		if idx >= 0 && word.RS[idx] == 117 {
			word.RemoveLastNRunes(len(suffixRunes))
			return true
		}
	}
	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:15,代码来源:step2a.go

示例8: step3

// Step 3 is the removal of the derivational suffix.
//
func step3(word *snowballword.SnowballWord) bool {

	// Search for a DERIVATIONAL ending in R2 (i.e. the entire
	// ending must lie in R2), and if one is found, remove it.

	suffix, _ := word.RemoveFirstSuffixIn(word.R2start, "ост", "ость")
	if suffix != "" {
		return true
	}
	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:13,代码来源:step3.go

示例9: r1r2

// Find the starting point of the two regions R1 & R2.
//
// R1 is the region after the first non-vowel following a vowel,
// or is the null region at the end of the word if there is no
// such non-vowel.
//
// R2 is the region after the first non-vowel following a vowel
// in R1, or is the null region at the end of the word if there
// is no such non-vowel.
//
// See http://snowball.tartarus.org/texts/r1r2.html
//
func r1r2(word *snowballword.SnowballWord) (r1start, r2start int) {

	specialPrefix, _ := word.FirstPrefix("gener", "commun", "arsen")

	if specialPrefix != "" {
		r1start = len(specialPrefix)
	} else {
		r1start = romance.VnvSuffix(word, isLowerVowel, 0)
	}
	r2start = romance.VnvSuffix(word, isLowerVowel, r1start)
	return
}
开发者ID:kljensen,项目名称:snowball,代码行数:24,代码来源:common.go

示例10: trimLeftApostrophes

// Trim off leading apostropes.  (Slight variation from
// NLTK implementation here, in which only the first is removed.)
//
func trimLeftApostrophes(word *snowballword.SnowballWord) {
	var (
		numApostrophes int
		r              rune
	)

	for numApostrophes, r = range word.RS {

		// Check for "'", which is unicode code point 39
		if r != 39 {
			break
		}
	}
	if numApostrophes > 0 {
		word.RS = word.RS[numApostrophes:]
		word.R1start = word.R1start - numApostrophes
		word.R2start = word.R2start - numApostrophes
	}
}
开发者ID:kljensen,项目名称:snowball,代码行数:22,代码来源:common.go

示例11: removePerfectiveGerundEnding

// Remove perfective gerund endings and return true if one was removed.
//
func removePerfectiveGerundEnding(word *snowballword.SnowballWord) bool {
	suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
		"ившись", "ывшись", "вшись", "ивши", "ывши", "вши", "ив", "ыв", "в",
	)
	switch suffix {
	case "в", "вши", "вшись":

		// These are "Group 1" perfective gerund endings.
		// Group 1 endings must follow а (a) or я (ia) in RV.
		if precededByARinRV(word, len(suffixRunes)) == false {
			suffix = ""
		}

	}

	if suffix != "" {
		word.RemoveLastNRunes(len(suffixRunes))
		return true
	}
	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:23,代码来源:step1.go

示例12: removeAdjectivalEnding

// Remove adjectival endings and return true if one was removed.
//
func removeAdjectivalEnding(word *snowballword.SnowballWord) bool {

	// Remove adjectival endings.  Start by looking for
	// an adjective ending.
	//
	suffix, _ := word.RemoveFirstSuffixIn(word.RVstart,
		"ими", "ыми", "его", "ого", "ему", "ому", "ее", "ие",
		"ые", "ое", "ей", "ий", "ый", "ой", "ем", "им", "ым",
		"ом", "их", "ых", "ую", "юю", "ая", "яя", "ою", "ею",
	)
	if suffix != "" {

		// We found an adjective ending.  Remove optional participle endings.
		//
		newSuffix, newSuffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
			"ивш", "ывш", "ующ",
			"ем", "нн", "вш", "ющ", "щ",
		)
		switch newSuffix {
		case "ем", "нн", "вш", "ющ", "щ":

			// These are "Group 1" participle endings.
			// Group 1 endings must follow а (a) or я (ia) in RV.
			if precededByARinRV(word, len(newSuffixRunes)) == false {
				newSuffix = ""
			}
		}

		if newSuffix != "" {
			word.RemoveLastNRunes(len(newSuffixRunes))
		}
		return true
	}
	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:37,代码来源:step1.go

示例13: step1

// Step 1 is the removal of standard suffixes, all of which must
// occur in RV.
//
//
// Search for a PERFECTIVE GERUND ending. If one is found remove it, and
// that is then the end of step 1. Otherwise try and remove a REFLEXIVE
// ending, and then search in turn for (1) an ADJECTIVAL, (2) a VERB or
// (3) a NOUN ending. As soon as one of the endings (1) to (3) is found
// remove it, and terminate step 1.
//
func step1(word *snowballword.SnowballWord) bool {

	// `stop` will be used to signal early termination
	var stop bool

	// Search for a PERFECTIVE GERUND ending
	stop = removePerfectiveGerundEnding(word)
	if stop {
		return true
	}

	// Next remove reflexive endings
	word.RemoveFirstSuffixIn(word.RVstart, "ся", "сь")

	// Next remove adjectival endings
	stop = removeAdjectivalEnding(word)
	if stop {
		return true
	}

	// Next remove verb endings
	stop = removeVerbEnding(word)
	if stop {
		return true
	}

	// Next remove noun endings
	suffix, _ := word.RemoveFirstSuffixIn(word.RVstart,
		"иями", "ями", "иях", "иям", "ием", "ией", "ами", "ях",
		"ям", "ья", "ью", "ье", "ом", "ой", "ов", "ия", "ию",
		"ий", "ии", "ие", "ем", "ей", "еи", "ев", "ах", "ам",
		"я", "ю", "ь", "ы", "у", "о", "й", "и", "е", "а",
	)
	if suffix != "" {
		return true
	}

	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:49,代码来源:step1.go

示例14: step2b

// Step 2b is the removal of verb suffixes beginning y,
// Search for the longest among the following suffixes
// in RV, and if found, delete if preceded by u.
//
func step2b(word *snowballword.SnowballWord) bool {
	suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
		"iésemos", "iéramos", "iríamos", "eríamos", "aríamos", "ásemos",
		"áramos", "ábamos", "isteis", "iríais", "iremos", "ieseis",
		"ierais", "eríais", "eremos", "asteis", "aríais", "aremos",
		"íamos", "irías", "irían", "iréis", "ieses", "iesen", "ieron",
		"ieras", "ieran", "iendo", "erías", "erían", "eréis", "aseis",
		"arías", "arían", "aréis", "arais", "abais", "íais", "iste",
		"iría", "irás", "irán", "imos", "iese", "iera", "idos", "idas",
		"ería", "erás", "erán", "aste", "ases", "asen", "aría", "arás",
		"arán", "aron", "aras", "aran", "ando", "amos", "ados", "adas",
		"abas", "aban", "ías", "ían", "éis", "áis", "iré", "irá", "ido",
		"ida", "eré", "erá", "emos", "ase", "aré", "ará", "ara", "ado",
		"ada", "aba", "ís", "ía", "ió", "ir", "id", "es", "er", "en",
		"ed", "as", "ar", "an", "ad",
	)
	switch suffix {
	case "":
		return false

	case "en", "es", "éis", "emos":

		// Delete, and if preceded by gu delete the u (the gu need not be in RV)
		word.RemoveLastNRunes(len(suffixRunes))
		guSuffix, _ := word.FirstSuffix("gu")
		if guSuffix != "" {
			word.RemoveLastNRunes(1)
		}

	default:

		// Delete
		word.RemoveLastNRunes(len(suffixRunes))
	}
	return true
}
开发者ID:kljensen,项目名称:snowball,代码行数:40,代码来源:step2b.go

示例15: step1a

// Step 1a is normalization of various special "s"-endings.
//
func step1a(w *snowballword.SnowballWord) bool {

	suffix, suffixRunes := w.FirstSuffix("sses", "ied", "ies", "us", "ss", "s")
	switch suffix {

	case "sses":

		// Replace by ss
		w.ReplaceSuffixRunes(suffixRunes, []rune("ss"), true)
		return true

	case "ies", "ied":

		// Replace by i if preceded by more than one letter,
		// otherwise by ie (so ties -> tie, cries -> cri).

		var repl string
		if len(w.RS) > 4 {
			repl = "i"
		} else {
			repl = "ie"
		}
		w.ReplaceSuffixRunes(suffixRunes, []rune(repl), true)
		return true

	case "us", "ss":

		// Do nothing
		return false

	case "s":

		// Delete if the preceding word part contains a vowel
		// not immediately before the s (so gas and this retain
		// the s, gaps and kiwis lose it)
		//
		for i := 0; i < len(w.RS)-2; i++ {
			if isLowerVowel(w.RS[i]) {
				w.RemoveLastNRunes(len(suffixRunes))
				return true
			}
		}
	}
	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:47,代码来源:step1a.go


注:本文中的github.com/kljensen/snowball/snowballword.SnowballWord类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。