本文整理汇总了Golang中github.com/shuLhan/tabula.DatasetInterface类的典型用法代码示例。如果您正苦于以下问题:Golang DatasetInterface类的具体用法?Golang DatasetInterface怎么用?Golang DatasetInterface使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DatasetInterface类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: Compute
/*
Compute frequency of all words.
*/
func (ftr *WordsAllFrequency) Compute(dataset tabula.DatasetInterface) {
allWords := GetAllWordList()
col := dataset.GetColumnByName("additions")
for _, rec := range col.Records {
r := tabula.NewRecordReal(float64(0))
s := rec.String()
if len(s) == 0 {
ftr.PushBack(r)
continue
}
s = clean.WikiText(s)
if len(s) == 0 {
ftr.PushBack(r)
continue
}
inWords := tekstus.StringSplitWords(s, true, false)
freq := tekstus.WordsFrequenciesOf(inWords, allWords, false)
r.SetFloat(Round(freq))
ftr.PushBack(r)
}
}
示例2: Compute
/*
Compute the longest word in inserted text.
*/
func (ftr *LongestWord) Compute(dataset tabula.DatasetInterface) {
adds := dataset.GetColumnByName("additions")
addslen := adds.Len()
for x, rec := range adds.Records {
text := rec.String()
textlen := len(text)
if textlen == 0 {
ftr.PushBack(tabula.NewRecordInt(int64(0)))
continue
}
text = clean.WikiText(text)
inWords := tekstus.StringSplitWords(text, true, true)
slong, _ := tekstus.WordsFindLongest(inWords)
if DEBUG >= 2 {
fmt.Printf("[feature] %d/%d longest word: %q\n", x, addslen,
slong)
}
slonglen := int64(len(slong))
ftr.PushBack(tabula.NewRecordInt(slonglen))
}
}
示例3: Compute
/*
Compute describe what this feature do.
*/
func (ftr *Template) Compute(dataset tabula.DatasetInterface) {
// Get the column from dataset. This is a reference to `InputMetadata`
// in `features.dsv`.
// To see the list of column that we can process, see `features.dsv`
// for an example.
col := dataset.GetColumnByName("editid")
for _, rec := range col.Records {
// This is where the computed value will be saved.
r := &tabula.Record{}
// Get the field value from dataset
s := rec.String()
// Process the field value `s`, (e.g. cleaning, etc).
// ...
// Set the feature value after processing
e := r.SetValue(s, ftr.GetType())
if e == nil {
r.SetInteger(0)
}
// Save the record value
ftr.PushBack(r)
}
}
示例4: Compute
/*
Compute number of good token in inserted text.
*/
func (ftr *GoodToken) Compute(dataset tabula.DatasetInterface) {
col := dataset.GetColumnByName("additions")
for _, rec := range col.Records {
cnt := tekstus.StringCountTokens(rec.String(), tokens, false)
ftr.PushBack(tabula.NewRecordInt(int64(cnt)))
}
}
示例5: Compute
/*
Compute compress rate of inserted text.
*/
func (ftr *CompressRate) Compute(dataset tabula.DatasetInterface) {
adds := dataset.GetColumnByName("additions")
for _, rec := range adds.Records {
v, _ := compressRateLzw(rec.String())
ftr.PushBack(tabula.NewRecordReal(Round(v)))
}
}
示例6: Compute
/*
Compute non-alphanumeric ratio with all character in inserted text.
*/
func (ftr *NonAlnumRatio) Compute(dataset tabula.DatasetInterface) {
adds := dataset.GetColumnByName("additions")
for _, rec := range adds.Records {
ratio := tekstus.RatioNonAlnumChar(rec.String(), false)
ftr.PushBack(tabula.NewRecordReal(Round(ratio)))
}
}
示例7: Compute
/*
Compute maximum sequence of character at inserted text.
*/
func (ftr *LongestCharSeq) Compute(dataset tabula.DatasetInterface) {
col := dataset.GetColumnByName("additions")
for _, rec := range col.Records {
text := rec.String()
_, v := tekstus.GetMaxCharSequence(text)
ftr.PushBack(tabula.NewRecordInt(int64(v)))
}
}
示例8: Compute
/*
Compute frequency vulgar words in inserted text.
*/
func (ftr *WordsVulgarFrequency) Compute(dataset tabula.DatasetInterface) {
col := dataset.GetColumnByName("additions")
for _, rec := range col.Records {
s := clean.WikiText(rec.String())
freq := tekstus.StringFrequenciesOf(s, tekstus.VulgarWords,
false)
ftr.PushBack(tabula.NewRecordReal(Round(freq)))
}
}
示例9: Compute
/*
Compute character diversity.
*/
func (ftr *CharDiversity) Compute(dataset tabula.DatasetInterface) {
adds := dataset.GetColumnByName("additions")
for _, rec := range adds.Records {
intext := rec.String()
textlen := float64(len(intext))
nuniq := tekstus.CountUniqChar(intext)
v := math.Pow(textlen, 1/float64(1+nuniq))
ftr.PushBack(tabula.NewRecordReal(Round(v)))
}
}
示例10: Compute
// Compute will count number of bytes that is used in comment, NOT including
// the header content "/* ... */".
func (ftr *CommentLength) Compute(dataset tabula.DatasetInterface) {
col := dataset.GetColumnByName("editcomment")
leftcap := []byte("/*")
rightcap := []byte("*/")
for _, rec := range col.Records {
cmt := rec.Bytes()
cmt, _ = tekstus.BytesRemoveUntil(cmt, leftcap, rightcap)
ftr.PushBack(tabula.NewRecordInt(int64(len(cmt))))
}
}
示例11: Compute
/*
Compute change the classification from text to numeric. The "regular" edit
will become 0 and the "vandalism" will become 1.
*/
func (ftr *Class) Compute(dataset tabula.DatasetInterface) {
col := dataset.GetColumnByName("class")
for _, rec := range col.Records {
r := tabula.NewRecordInt(0)
if rec.String() == "vandalism" {
r.SetInteger(1)
}
ftr.PushBack(r)
}
}
示例12: Compute
/*
Compute if record in column is IP address then it is an anonim and set
their value to 1, otherwise set to 0.
*/
func (anon *Anonim) Compute(dataset tabula.DatasetInterface) {
col := dataset.GetColumnByName("editor")
for _, rec := range col.Records {
r := tabula.NewRecordReal(0)
IP := net.ParseIP(rec.String())
if IP != nil {
r.SetFloat(1.0)
}
anon.PushBack(r)
}
}
示例13: Compute
/*
Compute ratio of size between new and old revision.
*/
func (ftr *SizeRatio) Compute(dataset tabula.DatasetInterface) {
oldid := dataset.GetColumnByName("oldrevisionid")
newid := dataset.GetColumnByName("newrevisionid")
oldidlen := newid.Len()
for x, rec := range newid.Records {
if x >= oldidlen {
// Just in case additions is greater than deletions
break
}
newlen := revision.GetSize(rec.String())
oldlen := revision.GetSize(oldid.Records[x].String())
difflen := float64(1+newlen) / float64(1+oldlen)
ftr.PushBack(tabula.NewRecordReal(Round(difflen)))
}
}
示例14: doDiff
/*
doDiff read old and new revisions from edit and compare both of them to get
deletions in old rev and additions in new rev.
Deletions and additions then combined into one string and appended to dataset.
*/
func doDiff(readset dsv.ReaderInterface, ds tabula.DatasetInterface) {
oldids := ds.GetColumnByName("oldrevisionid").ToStringSlice()
newids := ds.GetColumnByName("newrevisionid").ToStringSlice()
revision.SetDir(dRevisions)
diffset, e := revision.Diff(oldids, newids, ".txt")
if e != nil {
panic(e)
}
// Create input metadata for diff
md := dsv.NewMetadata("deletions", "string", ",", "\"", "\"", nil)
readset.AddInputMetadata(md)
md = dsv.NewMetadata("additions", "string", ",", "\"", "\"", nil)
readset.AddInputMetadata(md)
ds.MergeColumns(diffset)
}
示例15: Compute
/*
Compute the frequency of inserted words.
*/
func (ftr *TermFrequency) Compute(dataset tabula.DatasetInterface) {
newrevidx := dataset.GetColumnByName("newrevisionid")
adds := dataset.GetColumnByName("additions")
recordslen := len(adds.Records)
for x, rec := range adds.Records {
r := tabula.NewRecordReal(float64(0))
// Get inserted words.
intext := rec.String()
if len(intext) == 0 {
ftr.PushBack(r)
continue
}
intext = clean.WikiText(intext)
inWords := tekstus.StringSplitWords(intext, true, true)
// Get content of new revision.
revid := newrevidx.Records[x].String()
if DEBUG >= 2 {
fmt.Printf("[feature] term_frequency: %d/%d processing %q\n",
x, recordslen, revid)
}
newtext, e := revision.GetContentClean(revid)
if e != nil {
ftr.PushBack(r)
continue
}
newWords := tekstus.StringSplitWords(newtext, true, false)
freq := tekstus.WordsFrequenciesOf(newWords, inWords, false)
r.SetFloat(Round(freq))
ftr.PushBack(r)
}
}