本文整理匯總了Golang中github.com/blevesearch/bleve/analysis.BuildTermFromRunes函數的典型用法代碼示例。如果您正苦於以下問題:Golang BuildTermFromRunes函數的具體用法?Golang BuildTermFromRunes怎麽用?Golang BuildTermFromRunes使用的例子?那麽, 這裏精選的函數代碼示例或許可以為您提供幫助。
在下文中一共展示了BuildTermFromRunes函數的12個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Golang代碼示例。
示例1: Filter
func (s *CJKWidthFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
runeCount := utf8.RuneCount(token.Term)
runes := bytes.Runes(token.Term)
for i := 0; i < runeCount; i++ {
ch := runes[i]
if ch >= 0xFF01 && ch <= 0xFF5E {
// fullwidth ASCII variants
runes[i] -= 0xFEE0
} else if ch >= 0xFF65 && ch <= 0xFF9F {
// halfwidth Katakana variants
if (ch == 0xFF9E || ch == 0xFF9F) && i > 0 && combine(runes, i, ch) {
runes = analysis.DeleteRune(runes, i)
i--
runeCount = len(runes)
} else {
runes[i] = kanaNorm[ch-0xFF65]
}
}
}
token.Term = analysis.BuildTermFromRunes(runes)
}
return input
}
示例2: Filter
func (s *NgramFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, len(input))
for _, token := range input {
runeCount := utf8.RuneCount(token.Term)
runes := bytes.Runes(token.Term)
for i := 0; i < runeCount; i++ {
// index of the starting rune for this token
for ngramSize := s.minLength; ngramSize <= s.maxLength; ngramSize++ {
// build an ngram of this size starting at i
if i+ngramSize <= runeCount {
ngramTerm := analysis.BuildTermFromRunes(runes[i : i+ngramSize])
token := analysis.Token{
Position: token.Position,
Start: token.Start,
End: token.End,
Type: token.Type,
Term: ngramTerm,
}
rv = append(rv, &token)
}
}
}
}
return rv
}
示例3: Filter
func (s *FrenchLightStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
runes := bytes.Runes(token.Term)
runes = stem(runes)
token.Term = analysis.BuildTermFromRunes(runes)
}
return input
}
示例4: Filter
func (s *IndicNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
runes := bytes.Runes(token.Term)
runes = normalize(runes)
token.Term = analysis.BuildTermFromRunes(runes)
}
return input
}
示例5: Filter
func (s *PorterStemmer) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
// if it is not a protected keyword, stem it
if !token.KeyWord {
termRunes := bytes.Runes(token.Term)
stemmedRunes := porterstemmer.StemWithoutLowerCasing(termRunes)
token.Term = analysis.BuildTermFromRunes(stemmedRunes)
}
}
return input
}
示例6: stem
func stem(input []byte) []byte {
runes := bytes.Runes(input)
// Strip a single prefix.
for _, p := range prefixes {
if canStemPrefix(runes, p) {
runes = runes[len(p):]
break
}
}
// Strip off multiple suffixes, in their order in the suffixes array.
for _, s := range suffixes {
if canStemSuffix(runes, s) {
runes = runes[:len(runes)-len(s)]
}
}
return analysis.BuildTermFromRunes(runes)
}
示例7: normalize
func normalize(input []byte) []byte {
runes := bytes.Runes(input)
for i := 0; i < len(runes); i++ {
switch runes[i] {
case AlefMadda, AlefHamzaAbove, AlefHamzaBelow:
runes[i] = Alef
case DotlessYeh:
runes[i] = Yeh
case TehMarbuta:
runes[i] = Heh
case Tatweel, Kasratan, Dammatan, Fathatan, Fatha, Damma, Kasra, Shadda, Sukun:
runes = analysis.DeleteRune(runes, i)
i--
}
}
return analysis.BuildTermFromRunes(runes)
}
示例8: normalize
func normalize(input []byte) []byte {
runes := bytes.Runes(input)
for i := 0; i < len(runes); i++ {
switch runes[i] {
case FarsiYeh, YehBarree:
runes[i] = Yeh
case Keheh:
runes[i] = Kaf
case HehYeh, HehGoal:
runes[i] = Heh
case HamzaAbove: // necessary for HEH + HAMZA
runes = analysis.DeleteRune(runes, i)
i--
}
}
return analysis.BuildTermFromRunes(runes)
}
示例9: normalize
func normalize(input []byte) []byte {
state := N
runes := bytes.Runes(input)
for i := 0; i < len(runes); i++ {
switch runes[i] {
case 'a', 'o':
state = U
case 'u':
if state == N {
state = U
} else {
state = V
}
case 'e':
if state == U {
runes = analysis.DeleteRune(runes, i)
i--
}
state = V
case 'i', 'q', 'y':
state = V
case 'ä':
runes[i] = 'a'
state = V
case 'ö':
runes[i] = 'o'
state = V
case 'ü':
runes[i] = 'u'
state = V
case 'ß':
runes[i] = 's'
i++
// newrunes := make([]rune, len(runes)+1)
// copy(newrunes, runes)
// runes = newrunes
// runes[i] = 's'
runes = analysis.InsertRune(runes, i, 's')
state = N
default:
state = N
}
}
return analysis.BuildTermFromRunes(runes)
}
示例10: normalize
func normalize(input []byte) []byte {
runes := bytes.Runes(input)
for i := 0; i < len(runes); i++ {
switch runes[i] {
case Yeh, DotlessYeh:
runes[i] = FarsiYeh
case Kaf:
runes[i] = Keheh
case Zwnj:
if i > 0 && runes[i-1] == Heh {
runes[i-1] = Ae
}
runes = analysis.DeleteRune(runes, i)
i--
case Heh:
if i == len(runes)-1 {
runes[i] = Ae
}
case TehMarbuta:
runes[i] = Ae
case HehDoachashmee:
runes[i] = Heh
case Reh:
if i == 0 {
runes[i] = Rreh
}
case RrehAbove:
runes[i] = Rreh
case Tatweel, Kasratan, Dammatan, Fathatan, Fatha, Damma, Kasra, Shadda, Sukun:
runes = analysis.DeleteRune(runes, i)
i--
default:
if unicode.In(runes[i], unicode.Cf) {
runes = analysis.DeleteRune(runes, i)
i--
}
}
}
return analysis.BuildTermFromRunes(runes)
}
示例11: normalize
func normalize(input []byte) []byte {
runes := bytes.Runes(input)
for i := 0; i < len(runes); i++ {
switch runes[i] {
// dead n -> bindu
case '\u0928':
if i+1 < len(runes) && runes[i+1] == '\u094D' {
runes[i] = '\u0902'
runes = analysis.DeleteRune(runes, i+1)
}
// candrabindu -> bindu
case '\u0901':
runes[i] = '\u0902'
// nukta deletions
case '\u093C':
runes = analysis.DeleteRune(runes, i)
i--
case '\u0929':
runes[i] = '\u0928'
case '\u0931':
runes[i] = '\u0930'
case '\u0934':
runes[i] = '\u0933'
case '\u0958':
runes[i] = '\u0915'
case '\u0959':
runes[i] = '\u0916'
case '\u095A':
runes[i] = '\u0917'
case '\u095B':
runes[i] = '\u091C'
case '\u095C':
runes[i] = '\u0921'
case '\u095D':
runes[i] = '\u0922'
case '\u095E':
runes[i] = '\u092B'
case '\u095F':
runes[i] = '\u092F'
// zwj/zwnj -> delete
case '\u200D', '\u200C':
runes = analysis.DeleteRune(runes, i)
i--
// virama -> delete
case '\u094D':
runes = analysis.DeleteRune(runes, i)
i--
// chandra/short -> replace
case '\u0945', '\u0946':
runes[i] = '\u0947'
case '\u0949', '\u094A':
runes[i] = '\u094B'
case '\u090D', '\u090E':
runes[i] = '\u090F'
case '\u0911', '\u0912':
runes[i] = '\u0913'
case '\u0972':
runes[i] = '\u0905'
// long -> short ind. vowels
case '\u0906':
runes[i] = '\u0905'
case '\u0908':
runes[i] = '\u0907'
case '\u090A':
runes[i] = '\u0909'
case '\u0960':
runes[i] = '\u090B'
case '\u0961':
runes[i] = '\u090C'
case '\u0910':
runes[i] = '\u090F'
case '\u0914':
runes[i] = '\u0913'
// long -> short dep. vowels
case '\u0940':
runes[i] = '\u093F'
case '\u0942':
runes[i] = '\u0941'
case '\u0944':
runes[i] = '\u0943'
case '\u0963':
runes[i] = '\u0962'
case '\u0948':
runes[i] = '\u0947'
case '\u094C':
runes[i] = '\u094B'
}
}
return analysis.BuildTermFromRunes(runes)
}
示例12: buildTokenFromTerm
func buildTokenFromTerm(buffer []rune) *analysis.Token {
return &analysis.Token{
Term: analysis.BuildTermFromRunes(buffer),
}
}