本文整理汇总了Python中classifier.Classifier.prune_features_topfrequency方法的典型用法代码示例。如果您正苦于以下问题:Python Classifier.prune_features_topfrequency方法的具体用法?Python Classifier.prune_features_topfrequency怎么用?Python Classifier.prune_features_topfrequency使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类classifier.Classifier
的用法示例。
在下文中一共展示了Classifier.prune_features_topfrequency方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: classify
# 需要导入模块: from classifier import Classifier [as 别名]
# 或者: from classifier.Classifier import prune_features_topfrequency [as 别名]
#.........这里部分代码省略.........
if not re.search(r"timex_",feature_new[featureo]):
extra_reg = int(feature.split("_")[-1])
new_feature = str(int(feature_new[featureo].split("_")[0]) + extra_reg) + "_days"
new_features.append(new_feature)
else:
new_features.append(feature)
instance["features"] = new_features
for ev in test_events:
for instance in event_instances[0][ev]:
new_features = []
for r,feature in enumerate(instance["features"]):
if re.search(r"timex_",feature):
featureo = "_".join(feature.split("_")[:-1])
try:
if not re.search(r"timex_",feature_new[featureo]):
extra_reg = int(feature.split("_")[-1])
new_feature = str(int(feature_new[featureo].split("_")[0]) + extra_reg) + "_days"
new_features.append(new_feature)
# if re.search("ajaaz",ev):
# print feature,new_feature
except:
continue
else:
new_features.append(feature)
instance["features"] = new_features
train = sum([event_instances[0][x] for x in train_events],[])
test = []
for event in test_events:
print event
testdict = {}
eventparts = event.split("/") + [args.scaling]
eventdir = args.d
for part in eventparts:
eventdir = eventdir + part + "/"
if not os.path.exists(eventdir):
os.system("mkdir " + eventdir)
print eventdir
if args.majority:
eventout = eventdir + "tweet.txt"
else:
eventout = eventdir + str(args.window) + "_" + str(args.step) + ".txt"
testdict["out"] = eventout
testdict["instances"] = event_instances[0][event]
test.append(testdict)
if args.c == "median_baseline":
for td in test:
outfile = open(td["out"],"w")
instances = td["instances"]
for instance in instances:
#extract day_estimations
ests = []
labelcount = defaultdict(int)
for feature in instance["features"]:
if re.search(r"days",feature):
ests.append(feature)
if len(ests) > 0:
for est in ests:
labelcount[est] += 1
topest = [e for e in sorted(labelcount, key=labelcount.get, reverse=True)][0]
num = re.search(r"(-?\d+)_days",topest).groups()[0]
else:
num = "during"
# if re.search("ajaaz",td["out"]):
# if re.search("fall_11",td["out"]):
# print instance["features"],num
outfile.write(instance["label"] + " " + str(num) + "\n")
outfile.close()
else:
#set up classifier object
if args.jobs:
cl = Classifier(train,test,jobs=args.jobs,scaling=args.scaling)
else:
cl = Classifier(train,test,scaling=args.scaling)
if args.stdev:
cl.filter_stdev(args.stdev, "timex_")
if args.balance:
print "balancing..."
cl.balance_data()
print "counting..."
cl.count_feature_frequency()
if args.f:
print "pruning..."
cl.prune_features_topfrequency(args.f)
#generate sparse input
print "indexing..."
cl.index_features()
#generate classifiers
print "classifying..."
if args.c == "svm":
if args.cw:
cl.classify_svm(classweight="auto")
else:
cl.classify_svm()
elif args.c == "svr":
print "svr"
if args.cw:
cl.classify_svm(t="continuous",classweight="auto")
else:
cl.classify_svm(t="continuous")