本文整理匯總了Golang中github.com/shuLhan/tabula.DatasetInterface類的典型用法代碼示例。如果您正苦於以下問題:Golang DatasetInterface類的具體用法?Golang DatasetInterface怎麽用?Golang DatasetInterface使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了DatasetInterface類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Golang代碼示例。
示例1: Compute
/*
Compute frequency of all words.
*/
func (ftr *WordsAllFrequency) Compute(dataset tabula.DatasetInterface) {
allWords := GetAllWordList()
col := dataset.GetColumnByName("additions")
for _, rec := range col.Records {
r := tabula.NewRecordReal(float64(0))
s := rec.String()
if len(s) == 0 {
ftr.PushBack(r)
continue
}
s = clean.WikiText(s)
if len(s) == 0 {
ftr.PushBack(r)
continue
}
inWords := tekstus.StringSplitWords(s, true, false)
freq := tekstus.WordsFrequenciesOf(inWords, allWords, false)
r.SetFloat(Round(freq))
ftr.PushBack(r)
}
}
示例2: Compute
/*
Compute the longest word in inserted text.
*/
func (ftr *LongestWord) Compute(dataset tabula.DatasetInterface) {
adds := dataset.GetColumnByName("additions")
addslen := adds.Len()
for x, rec := range adds.Records {
text := rec.String()
textlen := len(text)
if textlen == 0 {
ftr.PushBack(tabula.NewRecordInt(int64(0)))
continue
}
text = clean.WikiText(text)
inWords := tekstus.StringSplitWords(text, true, true)
slong, _ := tekstus.WordsFindLongest(inWords)
if DEBUG >= 2 {
fmt.Printf("[feature] %d/%d longest word: %q\n", x, addslen,
slong)
}
slonglen := int64(len(slong))
ftr.PushBack(tabula.NewRecordInt(slonglen))
}
}
示例3: Compute
/*
Compute describe what this feature do.
*/
func (ftr *Template) Compute(dataset tabula.DatasetInterface) {
// Get the column from dataset. This is a reference to `InputMetadata`
// in `features.dsv`.
// To see the list of column that we can process, see `features.dsv`
// for an example.
col := dataset.GetColumnByName("editid")
for _, rec := range col.Records {
// This is where the computed value will be saved.
r := &tabula.Record{}
// Get the field value from dataset
s := rec.String()
// Process the field value `s`, (e.g. cleaning, etc).
// ...
// Set the feature value after processing
e := r.SetValue(s, ftr.GetType())
if e == nil {
r.SetInteger(0)
}
// Save the record value
ftr.PushBack(r)
}
}
示例4: Compute
/*
Compute number of good token in inserted text.
*/
func (ftr *GoodToken) Compute(dataset tabula.DatasetInterface) {
col := dataset.GetColumnByName("additions")
for _, rec := range col.Records {
cnt := tekstus.StringCountTokens(rec.String(), tokens, false)
ftr.PushBack(tabula.NewRecordInt(int64(cnt)))
}
}
示例5: Compute
/*
Compute compress rate of inserted text.
*/
func (ftr *CompressRate) Compute(dataset tabula.DatasetInterface) {
adds := dataset.GetColumnByName("additions")
for _, rec := range adds.Records {
v, _ := compressRateLzw(rec.String())
ftr.PushBack(tabula.NewRecordReal(Round(v)))
}
}
示例6: Compute
/*
Compute non-alphanumeric ratio with all character in inserted text.
*/
func (ftr *NonAlnumRatio) Compute(dataset tabula.DatasetInterface) {
adds := dataset.GetColumnByName("additions")
for _, rec := range adds.Records {
ratio := tekstus.RatioNonAlnumChar(rec.String(), false)
ftr.PushBack(tabula.NewRecordReal(Round(ratio)))
}
}
示例7: Compute
/*
Compute maximum sequence of character at inserted text.
*/
func (ftr *LongestCharSeq) Compute(dataset tabula.DatasetInterface) {
col := dataset.GetColumnByName("additions")
for _, rec := range col.Records {
text := rec.String()
_, v := tekstus.GetMaxCharSequence(text)
ftr.PushBack(tabula.NewRecordInt(int64(v)))
}
}
示例8: Compute
/*
Compute frequency vulgar words in inserted text.
*/
func (ftr *WordsVulgarFrequency) Compute(dataset tabula.DatasetInterface) {
col := dataset.GetColumnByName("additions")
for _, rec := range col.Records {
s := clean.WikiText(rec.String())
freq := tekstus.StringFrequenciesOf(s, tekstus.VulgarWords,
false)
ftr.PushBack(tabula.NewRecordReal(Round(freq)))
}
}
示例9: Compute
/*
Compute character diversity.
*/
func (ftr *CharDiversity) Compute(dataset tabula.DatasetInterface) {
adds := dataset.GetColumnByName("additions")
for _, rec := range adds.Records {
intext := rec.String()
textlen := float64(len(intext))
nuniq := tekstus.CountUniqChar(intext)
v := math.Pow(textlen, 1/float64(1+nuniq))
ftr.PushBack(tabula.NewRecordReal(Round(v)))
}
}
示例10: Compute
// Compute will count number of bytes that is used in comment, NOT including
// the header content "/* ... */".
func (ftr *CommentLength) Compute(dataset tabula.DatasetInterface) {
col := dataset.GetColumnByName("editcomment")
leftcap := []byte("/*")
rightcap := []byte("*/")
for _, rec := range col.Records {
cmt := rec.Bytes()
cmt, _ = tekstus.BytesRemoveUntil(cmt, leftcap, rightcap)
ftr.PushBack(tabula.NewRecordInt(int64(len(cmt))))
}
}
示例11: Compute
/*
Compute change the classification from text to numeric. The "regular" edit
will become 0 and the "vandalism" will become 1.
*/
func (ftr *Class) Compute(dataset tabula.DatasetInterface) {
col := dataset.GetColumnByName("class")
for _, rec := range col.Records {
r := tabula.NewRecordInt(0)
if rec.String() == "vandalism" {
r.SetInteger(1)
}
ftr.PushBack(r)
}
}
示例12: Compute
/*
Compute if record in column is IP address then it is an anonim and set
their value to 1, otherwise set to 0.
*/
func (anon *Anonim) Compute(dataset tabula.DatasetInterface) {
col := dataset.GetColumnByName("editor")
for _, rec := range col.Records {
r := tabula.NewRecordReal(0)
IP := net.ParseIP(rec.String())
if IP != nil {
r.SetFloat(1.0)
}
anon.PushBack(r)
}
}
示例13: Compute
/*
Compute ratio of size between new and old revision.
*/
func (ftr *SizeRatio) Compute(dataset tabula.DatasetInterface) {
oldid := dataset.GetColumnByName("oldrevisionid")
newid := dataset.GetColumnByName("newrevisionid")
oldidlen := newid.Len()
for x, rec := range newid.Records {
if x >= oldidlen {
// Just in case additions is greater than deletions
break
}
newlen := revision.GetSize(rec.String())
oldlen := revision.GetSize(oldid.Records[x].String())
difflen := float64(1+newlen) / float64(1+oldlen)
ftr.PushBack(tabula.NewRecordReal(Round(difflen)))
}
}
示例14: doDiff
/*
doDiff read old and new revisions from edit and compare both of them to get
deletions in old rev and additions in new rev.
Deletions and additions then combined into one string and appended to dataset.
*/
func doDiff(readset dsv.ReaderInterface, ds tabula.DatasetInterface) {
oldids := ds.GetColumnByName("oldrevisionid").ToStringSlice()
newids := ds.GetColumnByName("newrevisionid").ToStringSlice()
revision.SetDir(dRevisions)
diffset, e := revision.Diff(oldids, newids, ".txt")
if e != nil {
panic(e)
}
// Create input metadata for diff
md := dsv.NewMetadata("deletions", "string", ",", "\"", "\"", nil)
readset.AddInputMetadata(md)
md = dsv.NewMetadata("additions", "string", ",", "\"", "\"", nil)
readset.AddInputMetadata(md)
ds.MergeColumns(diffset)
}
示例15: Compute
/*
Compute the frequency of inserted words.
*/
func (ftr *TermFrequency) Compute(dataset tabula.DatasetInterface) {
newrevidx := dataset.GetColumnByName("newrevisionid")
adds := dataset.GetColumnByName("additions")
recordslen := len(adds.Records)
for x, rec := range adds.Records {
r := tabula.NewRecordReal(float64(0))
// Get inserted words.
intext := rec.String()
if len(intext) == 0 {
ftr.PushBack(r)
continue
}
intext = clean.WikiText(intext)
inWords := tekstus.StringSplitWords(intext, true, true)
// Get content of new revision.
revid := newrevidx.Records[x].String()
if DEBUG >= 2 {
fmt.Printf("[feature] term_frequency: %d/%d processing %q\n",
x, recordslen, revid)
}
newtext, e := revision.GetContentClean(revid)
if e != nil {
ftr.PushBack(r)
continue
}
newWords := tekstus.StringSplitWords(newtext, true, false)
freq := tekstus.WordsFrequenciesOf(newWords, inWords, false)
r.SetFloat(Round(freq))
ftr.PushBack(r)
}
}