当前位置: 首页>>代码示例>>Golang>>正文


Golang Regexp.Split方法代码示例

本文整理汇总了Golang中regexp.Regexp.Split方法的典型用法代码示例。如果您正苦于以下问题:Golang Regexp.Split方法的具体用法?Golang Regexp.Split怎么用?Golang Regexp.Split使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在regexp.Regexp的用法示例。


在下文中一共展示了Regexp.Split方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。

示例1: PerformTokenization

func PerformTokenization(text string, splitToken *regexp.Regexp) (words map[string]int64) {
	words = make(map[string]int64)
	for _, w := range splitToken.Split(text, -1) {
		if len(w) > 2 {
			words[strings.ToLower(w)]++
		}
	}
	return
}
开发者ID:ruianderson,项目名称:shield,代码行数:9,代码来源:tokenizer.go

示例2: Cut

func Cut(sentence string, cut_all bool, HMM bool) []string {
	result := make([]string, 0)
	var re_han, re_skip *regexp.Regexp
	if cut_all {
		re_han = regexp.MustCompile(`\p{Han}+`)
		re_skip = regexp.MustCompile(`[^[:alnum:]+#\n]`)
	} else {
		re_han = regexp.MustCompile(`([\p{Han}+[:alnum:]+#&\._]+)`)
		re_skip = regexp.MustCompile(`(\r\n|\s)`)
	}
	blocks := RegexpSplit(re_han, sentence)
	var cut_block cutAction
	if HMM {
		cut_block = cut_DAG
	} else {
		cut_block = cut_DAG_NO_HMM
	}
	if cut_all {
		cut_block = cut_All
	}
	for _, blk := range blocks {
		if len(blk) == 0 {
			continue
		}
		if re_han.MatchString(blk) {
			for _, word := range cut_block(blk) {
				result = append(result, word)
			}
		} else {
			type skipSplitFunc func(sentence string) []string
			var ssf skipSplitFunc
			if cut_all {
				ssf = func(sentence string) []string {
					return re_skip.Split(sentence, -1)
				}
			} else {
				ssf = func(sentence string) []string {
					return RegexpSplit(re_skip, sentence)
				}
			}

			for _, x := range ssf(blk) {
				if re_skip.MatchString(x) {
					result = append(result, x)
				} else if !cut_all {
					for _, xx := range x {
						result = append(result, string(xx))
					}
				} else {
					result = append(result, x)
				}
			}
		}
	}
	return result
}
开发者ID:kennylixi,项目名称:jiebago,代码行数:56,代码来源:jieba.go

示例3: SplitTokenizer

// given a channel of lines, split into tokens given an re object
// results go to an output chan of strings
func SplitTokenizer(split_re *regexp.Regexp, lines <-chan string, tokens chan<- string) {

	for line := range lines {
		for _, token := range split_re.Split(line, -1) {
			if token == "" {
				continue
			}
			tokens <- token
		}
	}
	close(tokens)
}
开发者ID:toma63,项目名称:parse,代码行数:14,代码来源:parse.go

示例4: SeparateString

// SeparateString is an abstraction of stringToSlice that takes two kinds of
// separators, and splits a string into a 2D slice based on those separators
func SeparateString(rowSep *regexp.Regexp, colSep *regexp.Regexp, str string) (output Table) {
	lines := rowSep.Split(str, -1)
	for _, line := range lines {
		rawRow := colSep.Split(line, -1)
		row := []string{}
		for _, cell := range rawRow {
			row = append(row, strings.TrimSpace(cell))
		}
		if len(row) > 0 && HasNonEmpty(row) {
			output = append(output, row)
		}
	}
	return output
}
开发者ID:allenbhuiyan,项目名称:distributive,代码行数:16,代码来源:tabular.go

示例5: getColumnRegex

// getColumnRegex is the core of the logic. It determines which regex most
// accurately splits the data into columns by testing the deviation in the
// row lengths using different regexps.
func getColumnRegex(str string, rowSep *regexp.Regexp) *regexp.Regexp {
	// matchesMost is used to ensure that our regexp actually is splitting the
	// lines of a table, instead of just returning them whole.
	matchesMost := func(re *regexp.Regexp, rows []string) bool {
		count := 0
		for _, row := range rows {
			if re.MatchString(row) {
				count++
			}
		}
		return count >= (len(rows) / 2)
	}
	// getRowLengths returns row length counts for each table
	getRowLengths := func(table Table) (lengths []int) {
		for _, row := range table {
			lengths = append(lengths, len(row))
		}
		return lengths
	}
	// getVariance returns the variance of the split provided by a regexp,
	// after discarding a number of outliers
	getVariance := func(colSep *regexp.Regexp, outliers int) float64 {
		table := SeparateString(rowSep, colSep, str)
		rowLengths := getRowLengths(table)
		for i := 0; i < outliers; i++ {
			rowLengths = chauvenet(rowLengths)
		}
		return variance(rowLengths)
	}
	// testRegexp determines whether or not a given regexp gives perfectly even
	// line lengths, including discarding of a number of outliers
	testRegexp := func(colSep *regexp.Regexp, outliers int) bool {
		for i := 0; i < outliers; i++ {
			variance := getVariance(colSep, i)
			if variance <= .1 {
				return true
			}
		}
		return false
	}
	// different column separators to try out
	initialColSeps := []*regexp.Regexp{
		regexp.MustCompile(`\t+`),    // tabs
		regexp.MustCompile(`\s{4}`),  // exactly four whitespaces
		regexp.MustCompile(`\s{2,}`), // two+ whitespace (spaces in cols)
		regexp.MustCompile(`\s+`),    // any whitespace
	}
	// filter regexps that have no matches at all - they will always return
	// rows of even length (length 1).
	colSeps := []*regexp.Regexp{}
	rows := rowSep.Split(str, -1)
	for _, re := range initialColSeps {
		if matchesMost(re, rows) {
			colSeps = append(colSeps, re)
		}
	}
	if len(colSeps) < 1 {
		log.WithFields(log.Fields{
			"attempted": initialColSeps,
			"table":     str,
		}).Warn("ProbabalisticSplit couldn't find a column separator.")
		colSeps = initialColSeps
	}
	// discarding up to passes outliers, test each regexp for row length
	// consistency
	passes := 3
	for i := 0; i < passes; i++ {
		for _, re := range colSeps {
			if testRegexp(re, i) {
				return re
			}
		}
	}
	// if still not done, just pick the one with the lowest variance
	log.WithFields(log.Fields{
		"attempted": initialColSeps,
		"outliers":  passes,
	}).Debug("ProbabalisticSplit couldn't find a consistent regexp")
	var variances []float64
	for _, colSep := range colSeps {
		variances = append(variances, getVariance(colSep, passes))
	}
	// ensure that index can be found in tables
	minVarianceIndex := extremaIndex(minFunc, variances)
	if len(colSeps) <= minVarianceIndex {
		msg := "Internal error: minVarianceIndex couldn't be found in colSeps"
		log.WithFields(log.Fields{
			"index":   minVarianceIndex,
			"colSeps": colSeps,
		}).Fatal(msg)
	}
	return colSeps[minVarianceIndex]
}
开发者ID:allenbhuiyan,项目名称:distributive,代码行数:96,代码来源:probabalisticSplit.go

示例6: main


//.........这里部分代码省略.........

	for scanner.Scan() {
		if scanner.Err() != nil {
			log.Fatal(scanner.Err())
		}

		line := scanner.Text()

		lineno += 1

		if *afterlinen >= lineno {
			continue
		}

		if len_afterline > 0 {
			if strings.Contains(line, *afterline) {
				len_afterline = 0
			}

			continue
		}

		if len_after > 0 {
			i := strings.Index(line, *after)
			if i < 0 {
				continue // no match
			}

			line = line[i+len_after:]
		}

		fields := []string{line} // $0 is the full line

		if grep_pattern != nil {
			if matches := grep_pattern.FindStringSubmatch(line); matches != nil {
				fields = matches
			} else {
				continue
			}
		} else if split_pattern != nil {
			if matches := split_pattern.FindStringSubmatch(line); matches != nil {
				fields = matches
			}
		} else if split_re != nil {
			// split line according to input regular expression
			fields = append(fields, split_re.Split(line, -1)...)
		} else if *ifs == " " {
			// split line on spaces (compact multiple spaces)
			fields = append(fields, SPACES.Split(strings.TrimSpace(line), -1)...)
		} else {
			// split line according to input field separator
			fields = append(fields, strings.Split(line, *ifs)...)
		}

		if *debug {
			log.Printf("input fields: %q\n", fields)
			if len(pos) > 0 {
				log.Printf("output fields: %q\n", pos)
			}
		}

		var result []string

		// do some processing
		if len(pos) > 0 {
			result = make([]string, 0)

			for _, p := range pos {
				result = append(result, Slice(fields, p)...)
			}
		} else {
			result = fields[1:]
		}

		if *unquote {
			result = Unquote(result)
		}

		if *quote {
			result = Quote(result)
		}

		if *printline {
			fmt.Printf("%d: ", lineno)
		}

		if len(*format) > 0 {
			Print(*format, result)
		} else {
			// join the result according to output field separator
			fmt.Println(strings.Join(result, *ofs))
		}

		if match_pattern != nil && match_pattern.MatchString(line) {
			status_code = MATCH_FOUND
		}
	}

	os.Exit(status_code)
}
开发者ID:raff,项目名称:glin,代码行数:101,代码来源:glin.go

示例7: Parents

// Public method to provider API
// Actually you could not pass method's parameters
func Parents(args ...string) []string {
	var (
		isWindows = runtime.GOOS == "windows"
		reg       *regexp.Regexp
		init      array.Array
		cwd       string
		sep       string
		c         string
	)
	if n := len(args); n == 0 {
		cwd, _ = os.Getwd()
	} else if n == 1 {
		cwd = args[0]
	} else {
		cwd = args[0]
		isWindows = strings.HasPrefix(args[1], "win")
	}

	if isWindows {
		c = `[\\\/]`
		init = array.Array{""}
	} else {
		c = `/`
		init = array.Array{"/"}
	}

	reg = regexp.MustCompile(c)

	var join = func(x, y interface{}) array.Array {
		tmpArray := array.Array{x, y}
		var ps = tmpArray.Filter(func(p interface{}, args ...interface{}) bool {
			switch p.(type) {
			case string:
				if p.(string) != "" {
					return true
				}
				return false
			default:
				return false
			}
			return false
		})
		if isWindows {
			sep = "\\"
		} else {
			sep = "/"
		}
		return array.Array{path.Clean(ps.Join(sep))}
	}

	var res = path.Clean(cwd)
	arr := array.Array{}
	for _, v := range reg.Split(res, -1) {
		arr.Push(v)
	}

	arrReduce := arr.Reduce(func(acc, dir interface{}, ix ...interface{}) interface{} {
		tmpAcc := acc.(array.Array)
		index := ix[0].(int)
		tmpAcc = tmpAcc.Concat(join(tmpAcc[index], dir))
		return tmpAcc
	}, init)

	// Not like javascript support method links
	// everytime you should assign
	arrSlice := arrReduce.(array.Array)
	arrReverse := arrSlice.Slice(1, 0)
	arrReverse.Reverse()
	if len(arrReverse) >= 2 {
		if arrReverse[0] == arrReverse[1] {
			return []string{arrReverse[0].(string)}
		}
	}

	if isWindows && strings.HasPrefix(cwd, "\\") {
		cut := arrReverse.Slice(0, -1)
		cut.Map(func(d interface{}, args ...interface{}) interface{} {
			var ch = d.(string)[0]
			if ch == '\\' {
				return d
			} else if ch == '.' {
				return "\\" + d.(string)[1:]
			} else {
				return "\\" + d.(string)
			}
		})
		return cut.ToString()
	}
	return arrReverse.ToString()
}
开发者ID:feiquanbifeng,项目名称:go-projects,代码行数:92,代码来源:parents.go


注:本文中的regexp.Regexp.Split方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。