本文整理汇总了Golang中regexp.Regexp.Split方法的典型用法代码示例。如果您正苦于以下问题:Golang Regexp.Split方法的具体用法?Golang Regexp.Split怎么用?Golang Regexp.Split使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类regexp.Regexp
的用法示例。
在下文中一共展示了Regexp.Split方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: PerformTokenization
func PerformTokenization(text string, splitToken *regexp.Regexp) (words map[string]int64) {
words = make(map[string]int64)
for _, w := range splitToken.Split(text, -1) {
if len(w) > 2 {
words[strings.ToLower(w)]++
}
}
return
}
示例2: Cut
func Cut(sentence string, cut_all bool, HMM bool) []string {
result := make([]string, 0)
var re_han, re_skip *regexp.Regexp
if cut_all {
re_han = regexp.MustCompile(`\p{Han}+`)
re_skip = regexp.MustCompile(`[^[:alnum:]+#\n]`)
} else {
re_han = regexp.MustCompile(`([\p{Han}+[:alnum:]+#&\._]+)`)
re_skip = regexp.MustCompile(`(\r\n|\s)`)
}
blocks := RegexpSplit(re_han, sentence)
var cut_block cutAction
if HMM {
cut_block = cut_DAG
} else {
cut_block = cut_DAG_NO_HMM
}
if cut_all {
cut_block = cut_All
}
for _, blk := range blocks {
if len(blk) == 0 {
continue
}
if re_han.MatchString(blk) {
for _, word := range cut_block(blk) {
result = append(result, word)
}
} else {
type skipSplitFunc func(sentence string) []string
var ssf skipSplitFunc
if cut_all {
ssf = func(sentence string) []string {
return re_skip.Split(sentence, -1)
}
} else {
ssf = func(sentence string) []string {
return RegexpSplit(re_skip, sentence)
}
}
for _, x := range ssf(blk) {
if re_skip.MatchString(x) {
result = append(result, x)
} else if !cut_all {
for _, xx := range x {
result = append(result, string(xx))
}
} else {
result = append(result, x)
}
}
}
}
return result
}
示例3: SplitTokenizer
// given a channel of lines, split into tokens given an re object
// results go to an output chan of strings
func SplitTokenizer(split_re *regexp.Regexp, lines <-chan string, tokens chan<- string) {
for line := range lines {
for _, token := range split_re.Split(line, -1) {
if token == "" {
continue
}
tokens <- token
}
}
close(tokens)
}
示例4: SeparateString
// SeparateString is an abstraction of stringToSlice that takes two kinds of
// separators, and splits a string into a 2D slice based on those separators
func SeparateString(rowSep *regexp.Regexp, colSep *regexp.Regexp, str string) (output Table) {
lines := rowSep.Split(str, -1)
for _, line := range lines {
rawRow := colSep.Split(line, -1)
row := []string{}
for _, cell := range rawRow {
row = append(row, strings.TrimSpace(cell))
}
if len(row) > 0 && HasNonEmpty(row) {
output = append(output, row)
}
}
return output
}
示例5: getColumnRegex
// getColumnRegex is the core of the logic. It determines which regex most
// accurately splits the data into columns by testing the deviation in the
// row lengths using different regexps.
func getColumnRegex(str string, rowSep *regexp.Regexp) *regexp.Regexp {
// matchesMost is used to ensure that our regexp actually is splitting the
// lines of a table, instead of just returning them whole.
matchesMost := func(re *regexp.Regexp, rows []string) bool {
count := 0
for _, row := range rows {
if re.MatchString(row) {
count++
}
}
return count >= (len(rows) / 2)
}
// getRowLengths returns row length counts for each table
getRowLengths := func(table Table) (lengths []int) {
for _, row := range table {
lengths = append(lengths, len(row))
}
return lengths
}
// getVariance returns the variance of the split provided by a regexp,
// after discarding a number of outliers
getVariance := func(colSep *regexp.Regexp, outliers int) float64 {
table := SeparateString(rowSep, colSep, str)
rowLengths := getRowLengths(table)
for i := 0; i < outliers; i++ {
rowLengths = chauvenet(rowLengths)
}
return variance(rowLengths)
}
// testRegexp determines whether or not a given regexp gives perfectly even
// line lengths, including discarding of a number of outliers
testRegexp := func(colSep *regexp.Regexp, outliers int) bool {
for i := 0; i < outliers; i++ {
variance := getVariance(colSep, i)
if variance <= .1 {
return true
}
}
return false
}
// different column separators to try out
initialColSeps := []*regexp.Regexp{
regexp.MustCompile(`\t+`), // tabs
regexp.MustCompile(`\s{4}`), // exactly four whitespaces
regexp.MustCompile(`\s{2,}`), // two+ whitespace (spaces in cols)
regexp.MustCompile(`\s+`), // any whitespace
}
// filter regexps that have no matches at all - they will always return
// rows of even length (length 1).
colSeps := []*regexp.Regexp{}
rows := rowSep.Split(str, -1)
for _, re := range initialColSeps {
if matchesMost(re, rows) {
colSeps = append(colSeps, re)
}
}
if len(colSeps) < 1 {
log.WithFields(log.Fields{
"attempted": initialColSeps,
"table": str,
}).Warn("ProbabalisticSplit couldn't find a column separator.")
colSeps = initialColSeps
}
// discarding up to passes outliers, test each regexp for row length
// consistency
passes := 3
for i := 0; i < passes; i++ {
for _, re := range colSeps {
if testRegexp(re, i) {
return re
}
}
}
// if still not done, just pick the one with the lowest variance
log.WithFields(log.Fields{
"attempted": initialColSeps,
"outliers": passes,
}).Debug("ProbabalisticSplit couldn't find a consistent regexp")
var variances []float64
for _, colSep := range colSeps {
variances = append(variances, getVariance(colSep, passes))
}
// ensure that index can be found in tables
minVarianceIndex := extremaIndex(minFunc, variances)
if len(colSeps) <= minVarianceIndex {
msg := "Internal error: minVarianceIndex couldn't be found in colSeps"
log.WithFields(log.Fields{
"index": minVarianceIndex,
"colSeps": colSeps,
}).Fatal(msg)
}
return colSeps[minVarianceIndex]
}
示例6: main
//.........这里部分代码省略.........
for scanner.Scan() {
if scanner.Err() != nil {
log.Fatal(scanner.Err())
}
line := scanner.Text()
lineno += 1
if *afterlinen >= lineno {
continue
}
if len_afterline > 0 {
if strings.Contains(line, *afterline) {
len_afterline = 0
}
continue
}
if len_after > 0 {
i := strings.Index(line, *after)
if i < 0 {
continue // no match
}
line = line[i+len_after:]
}
fields := []string{line} // $0 is the full line
if grep_pattern != nil {
if matches := grep_pattern.FindStringSubmatch(line); matches != nil {
fields = matches
} else {
continue
}
} else if split_pattern != nil {
if matches := split_pattern.FindStringSubmatch(line); matches != nil {
fields = matches
}
} else if split_re != nil {
// split line according to input regular expression
fields = append(fields, split_re.Split(line, -1)...)
} else if *ifs == " " {
// split line on spaces (compact multiple spaces)
fields = append(fields, SPACES.Split(strings.TrimSpace(line), -1)...)
} else {
// split line according to input field separator
fields = append(fields, strings.Split(line, *ifs)...)
}
if *debug {
log.Printf("input fields: %q\n", fields)
if len(pos) > 0 {
log.Printf("output fields: %q\n", pos)
}
}
var result []string
// do some processing
if len(pos) > 0 {
result = make([]string, 0)
for _, p := range pos {
result = append(result, Slice(fields, p)...)
}
} else {
result = fields[1:]
}
if *unquote {
result = Unquote(result)
}
if *quote {
result = Quote(result)
}
if *printline {
fmt.Printf("%d: ", lineno)
}
if len(*format) > 0 {
Print(*format, result)
} else {
// join the result according to output field separator
fmt.Println(strings.Join(result, *ofs))
}
if match_pattern != nil && match_pattern.MatchString(line) {
status_code = MATCH_FOUND
}
}
os.Exit(status_code)
}
示例7: Parents
// Public method to provider API
// Actually you could not pass method's parameters
func Parents(args ...string) []string {
var (
isWindows = runtime.GOOS == "windows"
reg *regexp.Regexp
init array.Array
cwd string
sep string
c string
)
if n := len(args); n == 0 {
cwd, _ = os.Getwd()
} else if n == 1 {
cwd = args[0]
} else {
cwd = args[0]
isWindows = strings.HasPrefix(args[1], "win")
}
if isWindows {
c = `[\\\/]`
init = array.Array{""}
} else {
c = `/`
init = array.Array{"/"}
}
reg = regexp.MustCompile(c)
var join = func(x, y interface{}) array.Array {
tmpArray := array.Array{x, y}
var ps = tmpArray.Filter(func(p interface{}, args ...interface{}) bool {
switch p.(type) {
case string:
if p.(string) != "" {
return true
}
return false
default:
return false
}
return false
})
if isWindows {
sep = "\\"
} else {
sep = "/"
}
return array.Array{path.Clean(ps.Join(sep))}
}
var res = path.Clean(cwd)
arr := array.Array{}
for _, v := range reg.Split(res, -1) {
arr.Push(v)
}
arrReduce := arr.Reduce(func(acc, dir interface{}, ix ...interface{}) interface{} {
tmpAcc := acc.(array.Array)
index := ix[0].(int)
tmpAcc = tmpAcc.Concat(join(tmpAcc[index], dir))
return tmpAcc
}, init)
// Not like javascript support method links
// everytime you should assign
arrSlice := arrReduce.(array.Array)
arrReverse := arrSlice.Slice(1, 0)
arrReverse.Reverse()
if len(arrReverse) >= 2 {
if arrReverse[0] == arrReverse[1] {
return []string{arrReverse[0].(string)}
}
}
if isWindows && strings.HasPrefix(cwd, "\\") {
cut := arrReverse.Slice(0, -1)
cut.Map(func(d interface{}, args ...interface{}) interface{} {
var ch = d.(string)[0]
if ch == '\\' {
return d
} else if ch == '.' {
return "\\" + d.(string)[1:]
} else {
return "\\" + d.(string)
}
})
return cut.ToString()
}
return arrReverse.ToString()
}