本文整理汇总了Python中classifier.Classifier方法的典型用法代码示例。如果您正苦于以下问题:Python classifier.Classifier方法的具体用法?Python classifier.Classifier怎么用?Python classifier.Classifier使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类classifier
的用法示例。
在下文中一共展示了classifier.Classifier方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import classifier [as 别名]
# 或者: from classifier import Classifier [as 别名]
def __init__(self, use_clf=False):
self.clf = Classifier()
self.use_clf = use_clf
self.weight = {
'coauthor_score': 0.7 if use_clf else 0.9,
'pubyear_score': 0.1,
}
if use_clf:
self.weight['field_score'] = 0.2
print(self.weight)
示例2: create_predict
# 需要导入模块: import classifier [as 别名]
# 或者: from classifier import Classifier [as 别名]
def create_predict(HudongItem_csv):
# 读取neo4j内容
db = Neo4j()
db.connectDB()
data_set = db.getLabeledHudongItem('labels.txt')
classifier = Classifier('wiki.zh.bin')
classifier.load_trainSet(data_set)
classifier.set_parameter(weight=[1.0, 3.0, 0.2, 4.0, 0],k=10)
predict_List = readCSVbyColumn(HudongItem_csv, 'title')
file_object = open('predict_labels2.txt','a')
count = 0
vis = set()
for p in predict_List:
cur = HudongItem(db.matchHudongItembyTitle(p))
count += 1
title = cur.title
if title in vis:
continue
vis.add(title)
label = classifier.KNN_predict(cur)
print(str(title)+" "+str(label)+": "+str(count)+"/"+str(len(predict_List)))
file_object.write(str(title)+" "+str(label)+"\n")
file_object.close()
#create_predict('hudong_pedia2.csv')
示例3: create_predict
# 需要导入模块: import classifier [as 别名]
# 或者: from classifier import Classifier [as 别名]
def create_predict(HudongItem_csv):
# 读取neo4j内容
db = Neo4j()
db.connectDB()
data_set = db.getLabeledHudongItem('labels.txt')
classifier = Classifier('wiki.zh.bin')
classifier.load_trainSet(data_set)
classifier.set_parameter(weight=[1.0, 3.0, 0.2, 4.0, 0],k=10)
predict_List = readCSVbyColumn(HudongItem_csv, 'title')
file_object = open('predict_labels2.txt','a')
count = 0
vis = set()
for p in predict_List:
cur = HudongItem(db.matchHudongItembyTitle(p))
if count > 200:
break
count += 1
if count <140 :
continue
title = cur.title
if title in vis:
continue
vis.add(title)
label = classifier.KNN_predict(cur)
print(str(title)+" "+str(label)+": "+str(count)+"/"+str(len(predict_List)))
file_object.write(str(title)+" "+str(label)+"\n")
file_object.close()
示例4: train
# 需要导入模块: import classifier [as 别名]
# 或者: from classifier import Classifier [as 别名]
def train():
if args.dataset=='baidu_VH':
dataset=baidu_VH(PROJECT_METAROOT)
elif args.dataset=='summe':
pass
#dataset=
else:
raise ValueError('No such dataset')
log.l.info(dataset.print_info())
train_data=AsyncReader(dataset,root_path=BAIDU_VH_ROOT,mode='train',modality=args.modality)
train_data.set_params({'limitedfiles':None,
'sample_rate':100,
'save_path':'tmp_results/train_{}_sampled.pkl'.format(args.modality)})
X_train,Y_train=train_data.read_data(k=args.thread)
val_data=AsyncReader(dataset,root_path=BAIDU_VH_ROOT,mode='val',modality=args.modality)
val_data.set_params({'limitedfiles':None,
'sample_rate':1,
'save_path':'tmp_results/val_{}_sampled.pkl'.format(args.modality)})
X_val,Y_val=val_data.read_data(k=args.thread)
model=Classifier(model_name=args.model_name,if_grid_search=args.if_grid_search,model_kernel=args.model_kernel)
if args.if_grid_search:
model.set_grid_search_params(grid_search_params[args.model_name])
X_train_grid_search,Y_train_grid_search=Sample_data(X_train,Y_train,args.grid_search_sample_rate)
model.grid_search(X_train_grid_search,Y_train_grid_search)
model.fit(X_train,Y_train)
X_val_metric,Y_val_metric=Sample_data(X_val,Y_val,0.1)
predict_val=model.predict(X_val_metric)
metrics=get_metrics(predict_val,Y_val_metric,metrics=METRICS)
# print metrics
log.l.info('the metrics of {} is :{}'.format(METRICS,metrics))
del X_train,Y_train#,X_train_grid_search,Y_train_grid_search,X_val_metric,Y_val_metric
if args.create_curves:
# for test set:
val_curves_dic=dict()
for k,v in val_data.data_dic.items():
val_curves_dic[k]=model.predict(v)
test_data=AsyncReader(dataset,root_path=BAIDU_VH_ROOT,mode='test',modality=args.modality)
test_data.set_params({'limitedfiles':None,
'sample_rate':1,
'save_path':'tmp_results/test_{}_sampled.pkl'.format(args.modality)})
_,_=test_data.read_data(k=args.thread)
test_curves_dic=dict()
for k,v in test_data.data_dic.items():
test_curves_dic[k]=model.predict(v)
return_info={'val':val_curves_dic,
'test':test_curves_dic}
if args.save_curves:
joblib.dump(return_info,'tmp_results/val_test_{}_curves.pkl'.format(args.modality))
return return_info
return None