Golang tabula.DatasetInterface类代码示例

本文整理汇总了Golang中github.com/shuLhan/tabula.DatasetInterface类的典型用法代码示例。如果您正苦于以下问题：Golang DatasetInterface类的具体用法？Golang DatasetInterface怎么用？Golang DatasetInterface使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了DatasetInterface类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Golang代码示例。

示例1: Compute

/*
Compute frequency of all words.
*/
func (ftr *WordsAllFrequency) Compute(dataset tabula.DatasetInterface) {
	allWords := GetAllWordList()

	col := dataset.GetColumnByName("additions")

	for _, rec := range col.Records {
		r := tabula.NewRecordReal(float64(0))

		s := rec.String()
		if len(s) == 0 {
			ftr.PushBack(r)
			continue
		}

		s = clean.WikiText(s)
		if len(s) == 0 {
			ftr.PushBack(r)
			continue
		}

		inWords := tekstus.StringSplitWords(s, true, false)

		freq := tekstus.WordsFrequenciesOf(inWords, allWords, false)

		r.SetFloat(Round(freq))

		ftr.PushBack(r)
	}
}

开发者ID:shuLhan，项目名称:wvcgen，代码行数:32，代码来源:words_all_frequency.go

示例2: Compute

/*
Compute the longest word in inserted text.
*/
func (ftr *LongestWord) Compute(dataset tabula.DatasetInterface) {
	adds := dataset.GetColumnByName("additions")
	addslen := adds.Len()

	for x, rec := range adds.Records {
		text := rec.String()
		textlen := len(text)

		if textlen == 0 {
			ftr.PushBack(tabula.NewRecordInt(int64(0)))
			continue
		}

		text = clean.WikiText(text)
		inWords := tekstus.StringSplitWords(text, true, true)
		slong, _ := tekstus.WordsFindLongest(inWords)

		if DEBUG >= 2 {
			fmt.Printf("[feature] %d/%d longest word: %q\n", x, addslen,
				slong)
		}

		slonglen := int64(len(slong))

		ftr.PushBack(tabula.NewRecordInt(slonglen))
	}
}

开发者ID:shuLhan，项目名称:wvcgen，代码行数:30，代码来源:longest_word.go

示例3: Compute

/*
Compute describe what this feature do.
*/
func (ftr *Template) Compute(dataset tabula.DatasetInterface) {
	// Get the column from dataset. This is a reference to `InputMetadata`
	// in `features.dsv`.
	// To see the list of column that we can process, see `features.dsv`
	// for an example.
	col := dataset.GetColumnByName("editid")

	for _, rec := range col.Records {
		// This is where the computed value will be saved.
		r := &tabula.Record{}

		// Get the field value from dataset
		s := rec.String()

		// Process the field value `s`, (e.g. cleaning, etc).
		// ...

		// Set the feature value after processing
		e := r.SetValue(s, ftr.GetType())
		if e == nil {
			r.SetInteger(0)
		}

		// Save the record value
		ftr.PushBack(r)
	}
}

开发者ID:shuLhan，项目名称:wvcgen，代码行数:30，代码来源:template.go

示例4: Compute

/*
Compute number of good token in inserted text.
*/
func (ftr *GoodToken) Compute(dataset tabula.DatasetInterface) {
	col := dataset.GetColumnByName("additions")

	for _, rec := range col.Records {
		cnt := tekstus.StringCountTokens(rec.String(), tokens, false)

		ftr.PushBack(tabula.NewRecordInt(int64(cnt)))
	}
}

开发者ID:shuLhan，项目名称:wvcgen，代码行数:12，代码来源:good_token.go

示例5: Compute

/*
Compute compress rate of inserted text.
*/
func (ftr *CompressRate) Compute(dataset tabula.DatasetInterface) {
	adds := dataset.GetColumnByName("additions")

	for _, rec := range adds.Records {
		v, _ := compressRateLzw(rec.String())

		ftr.PushBack(tabula.NewRecordReal(Round(v)))
	}
}

开发者ID:shuLhan，项目名称:wvcgen，代码行数:12，代码来源:compress_rate.go

示例6: Compute

/*
Compute non-alphanumeric ratio with all character in inserted text.
*/
func (ftr *NonAlnumRatio) Compute(dataset tabula.DatasetInterface) {
	adds := dataset.GetColumnByName("additions")

	for _, rec := range adds.Records {
		ratio := tekstus.RatioNonAlnumChar(rec.String(), false)

		ftr.PushBack(tabula.NewRecordReal(Round(ratio)))
	}
}

开发者ID:shuLhan，项目名称:wvcgen，代码行数:12，代码来源:non_alnum_ratio.go

示例7: Compute

/*
Compute maximum sequence of character at inserted text.
*/
func (ftr *LongestCharSeq) Compute(dataset tabula.DatasetInterface) {
	col := dataset.GetColumnByName("additions")

	for _, rec := range col.Records {
		text := rec.String()

		_, v := tekstus.GetMaxCharSequence(text)

		ftr.PushBack(tabula.NewRecordInt(int64(v)))
	}
}

开发者ID:shuLhan，项目名称:wvcgen，代码行数:14，代码来源:longest_char_sequence.go

示例8: Compute

/*
Compute frequency vulgar words in inserted text.
*/
func (ftr *WordsVulgarFrequency) Compute(dataset tabula.DatasetInterface) {
	col := dataset.GetColumnByName("additions")

	for _, rec := range col.Records {
		s := clean.WikiText(rec.String())

		freq := tekstus.StringFrequenciesOf(s, tekstus.VulgarWords,
			false)

		ftr.PushBack(tabula.NewRecordReal(Round(freq)))
	}
}

开发者ID:shuLhan，项目名称:wvcgen，代码行数:15，代码来源:words_vulgar_frequency.go

示例9: Compute

/*
Compute character diversity.
*/
func (ftr *CharDiversity) Compute(dataset tabula.DatasetInterface) {
	adds := dataset.GetColumnByName("additions")

	for _, rec := range adds.Records {
		intext := rec.String()
		textlen := float64(len(intext))
		nuniq := tekstus.CountUniqChar(intext)
		v := math.Pow(textlen, 1/float64(1+nuniq))

		ftr.PushBack(tabula.NewRecordReal(Round(v)))
	}
}

开发者ID:shuLhan，项目名称:wvcgen，代码行数:15，代码来源:char_diversity.go

示例10: Compute

// Compute will count number of bytes that is used in comment, NOT including
// the header content "/* ... */".
func (ftr *CommentLength) Compute(dataset tabula.DatasetInterface) {
	col := dataset.GetColumnByName("editcomment")
	leftcap := []byte("/*")
	rightcap := []byte("*/")

	for _, rec := range col.Records {
		cmt := rec.Bytes()

		cmt, _ = tekstus.BytesRemoveUntil(cmt, leftcap, rightcap)

		ftr.PushBack(tabula.NewRecordInt(int64(len(cmt))))
	}
}

开发者ID:shuLhan，项目名称:wvcgen，代码行数:15，代码来源:comment_length.go

示例11: Compute

/*
Compute change the classification from text to numeric. The "regular" edit
will become 0 and the "vandalism" will become 1.
*/
func (ftr *Class) Compute(dataset tabula.DatasetInterface) {
	col := dataset.GetColumnByName("class")

	for _, rec := range col.Records {
		r := tabula.NewRecordInt(0)

		if rec.String() == "vandalism" {
			r.SetInteger(1)
		}

		ftr.PushBack(r)
	}
}

开发者ID:shuLhan，项目名称:wvcgen，代码行数:17，代码来源:class.go

示例12: Compute

/*
Compute if record in column is IP address then it is an anonim and set
their value to 1, otherwise set to 0.
*/
func (anon *Anonim) Compute(dataset tabula.DatasetInterface) {
	col := dataset.GetColumnByName("editor")

	for _, rec := range col.Records {
		r := tabula.NewRecordReal(0)

		IP := net.ParseIP(rec.String())

		if IP != nil {
			r.SetFloat(1.0)
		}

		anon.PushBack(r)
	}
}

开发者ID:shuLhan，项目名称:wvcgen，代码行数:19，代码来源:anonim.go

示例13: Compute

/*
Compute ratio of size between new and old revision.
*/
func (ftr *SizeRatio) Compute(dataset tabula.DatasetInterface) {
	oldid := dataset.GetColumnByName("oldrevisionid")
	newid := dataset.GetColumnByName("newrevisionid")

	oldidlen := newid.Len()

	for x, rec := range newid.Records {
		if x >= oldidlen {
			// Just in case additions is greater than deletions
			break
		}

		newlen := revision.GetSize(rec.String())
		oldlen := revision.GetSize(oldid.Records[x].String())
		difflen := float64(1+newlen) / float64(1+oldlen)

		ftr.PushBack(tabula.NewRecordReal(Round(difflen)))
	}
}

开发者ID:shuLhan，项目名称:wvcgen，代码行数:22，代码来源:size_ratio.go

示例14: doDiff

/*
doDiff read old and new revisions from edit and compare both of them to get
deletions in old rev and additions in new rev.

Deletions and additions then combined into one string and appended to dataset.
*/
func doDiff(readset dsv.ReaderInterface, ds tabula.DatasetInterface) {
	oldids := ds.GetColumnByName("oldrevisionid").ToStringSlice()
	newids := ds.GetColumnByName("newrevisionid").ToStringSlice()

	revision.SetDir(dRevisions)

	diffset, e := revision.Diff(oldids, newids, ".txt")
	if e != nil {
		panic(e)
	}

	// Create input metadata for diff
	md := dsv.NewMetadata("deletions", "string", ",", "\"", "\"", nil)
	readset.AddInputMetadata(md)

	md = dsv.NewMetadata("additions", "string", ",", "\"", "\"", nil)
	readset.AddInputMetadata(md)

	ds.MergeColumns(diffset)
}

开发者ID:shuLhan，项目名称:wvcgen，代码行数:26，代码来源:main.go

示例15: Compute

/*
Compute the frequency of inserted words.
*/
func (ftr *TermFrequency) Compute(dataset tabula.DatasetInterface) {
	newrevidx := dataset.GetColumnByName("newrevisionid")
	adds := dataset.GetColumnByName("additions")
	recordslen := len(adds.Records)

	for x, rec := range adds.Records {
		r := tabula.NewRecordReal(float64(0))
		// Get inserted words.
		intext := rec.String()

		if len(intext) == 0 {
			ftr.PushBack(r)
			continue
		}

		intext = clean.WikiText(intext)
		inWords := tekstus.StringSplitWords(intext, true, true)

		// Get content of new revision.
		revid := newrevidx.Records[x].String()

		if DEBUG >= 2 {
			fmt.Printf("[feature] term_frequency: %d/%d processing %q\n",
				x, recordslen, revid)
		}

		newtext, e := revision.GetContentClean(revid)
		if e != nil {
			ftr.PushBack(r)
			continue
		}

		newWords := tekstus.StringSplitWords(newtext, true, false)

		freq := tekstus.WordsFrequenciesOf(newWords, inWords, false)

		r.SetFloat(Round(freq))

		ftr.PushBack(r)
	}
}

开发者ID:shuLhan，项目名称:wvcgen，代码行数:44，代码来源:term_frequency.go

注：本文中的github.com/shuLhan/tabula.DatasetInterface类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。