本文整理汇总了Python中Features.wordCountsSkLearn方法的典型用法代码示例。如果您正苦于以下问题:Python Features.wordCountsSkLearn方法的具体用法?Python Features.wordCountsSkLearn怎么用?Python Features.wordCountsSkLearn使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Features
的用法示例。
在下文中一共展示了Features.wordCountsSkLearn方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: extra_features
# 需要导入模块: import Features [as 别名]
# 或者: from Features import wordCountsSkLearn [as 别名]
def extra_features(train, test):
# uni and bigrams
state_info, train_ngrams = Features.wordCountsSkLearn(train, ngram_range = (1, 2), stop_words = 'english')
_, test_ngrams = Features.wordCountsSkLearn(test, vectorizer = state_info, ngram_range = (1, 2), stop_words = 'english')
# valence and punctuation
train_valence_punct, test_valence_punct = feat5(train, test)
# train matrix
train_matrix = Features.append_features([train_ngrams, train_valence_punct])
test_matrix = Features.append_features([test_ngrams, test_valence_punct])
return train_matrix, test_matrix
示例2: count_unigrams
# 需要导入模块: import Features [as 别名]
# 或者: from Features import wordCountsSkLearn [as 别名]
def count_unigrams(outpath):
tw_cter, twitter_cts = Features.wordCountsSkLearn(Features.getX(tw), stop_words = 'english')
blog_cter, blog_cts = Features.wordCountsSkLearn(Features.getX(blog), stop_words = 'english')
# Total number of non-stop-word unigrams
unigrams = set(tw_cter.vocabulary_.keys() + blog_cter.vocabulary_.keys())
print "Data has %d distinct unigrams" % len(unigrams)
# Distribution of unigram cts
twitter_unigram_histo = histogram_cts(twitter_cts)
blog_unigram_histo = histogram_cts(blog_cts)
unigram_histo = histo_to_tuples(twitter_unigram_histo, 'twitter+wiki') + \
histo_to_tuples(blog_unigram_histo, 'blog')
# Write out to csv
with open(outpath, 'w') as unigram_histo_file:
for elem in unigram_histo:
unigram_histo_file.write("%s,%d,%f\n" % elem)
return 0
示例3: feat3
# 需要导入模块: import Features [as 别名]
# 或者: from Features import wordCountsSkLearn [as 别名]
def feat3(train, test):
state_info, train_matrix = Features.tfIdfSkLearn(train)
_, test_matrix = Features.wordCountsSkLearn(test, vectorizer = state_info)
return train_matrix, test_matrix
示例4: feat2
# 需要导入模块: import Features [as 别名]
# 或者: from Features import wordCountsSkLearn [as 别名]
def feat2(train, test):
state_info, train_matrix = Features.tfIdfSkLearn(train)
_, test_matrix = Features.wordCountsSkLearn(test, vectorizer = state_info, stop_words = 'english')
return train_matrix, test_matrix