本文整理汇总了Python中tgrocery.Grocery类的典型用法代码示例。如果您正苦于以下问题:Python Grocery类的具体用法?Python Grocery怎么用?Python Grocery使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Grocery类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
def train():
print 'train start '+'.'*30
#grocery=Grocery('sample')
grocery=Grocery('version1.0')
grocery.train(trainlist)
grocery.save()
print 'train end '+'.'*30
示例2: __init__
def __init__(self, keyword):
print '进行新闻分类'
(db, cursor) = connectdb()
cursor.execute("update task set status=1 where keyword=%s", [keyword])
cursor.execute("select id, title from news where keyword=%s",[keyword])
news = cursor.fetchall()
new_grocery = Grocery('static/paris')
new_grocery.load()
for item in news:
tag = new_grocery.predict(item['title'])
if tag == '新闻背景':
tag = 1
elif tag == '事实陈述':
tag = 2
elif tag == '事件演化':
tag = 3
elif tag == '各方态度':
tag = 4
elif tag == '直接关联':
tag = 6
elif tag == '暂无关联':
tag = 7
cursor.execute("update news set tag=%s where id=%s", [tag, item['id']])
closedb(db, cursor)
return
示例3: GET
def GET(self,name):
#i = web.input(name=None)
#url = "http://"+name
#html = urllib2.urlopen(url).read()
#soup = BeautifulSoup(html)
#title = soup.html.head.title.contents.pop().encode('utf-8')
title = name.encode('utf-8')
new_grocery = Grocery('sample')
new_grocery.load()
return new_grocery.predict(title)
示例4: __train__model__
def __train__model__():
dataframe = pd.read_excel(Classify.__FILE_PATH__)
data = dataframe[[u'类型', u'释义']]
train_data = [(x[0],x[1]) for x in data.values]
grocery = Grocery('Classify')
grocery.train(train_data)
grocery.save()
Classify.__MODEL__ = grocery
示例5: tGrocery
def tGrocery():
outFile = open('testResult.tmp', 'w')
[trainingSet, benchmark] = pickle.load(open('SampleSeg.pk'))
testingSet = []
correctLabel = []
for i in xrange(len(benchmark)):
print '%d out of %d' % (i, len(benchmark))
testingSet.append(benchmark[i][1])
correctLabel.append(benchmark[i][0])
grocery = Grocery('test')
grocery.train(trainingSet)
grocery.save()
# load
new_grocery = Grocery('test')
new_grocery.load()
Prediction = []
for i in xrange(len(testingSet)):
print '%d out of %d' % (i, len(testingSet))
prediction = new_grocery.predict(testingSet[i])
Prediction.append(prediction)
temp = correctLabel[i] + '<-->' + prediction + ' /x01' + testingSet[i] + '\n'
outFile.write(temp)
correct = 0
for i in xrange(len(Prediction)):
print Prediction[i], correctLabel[i],
if Prediction[i] == correctLabel[i]:
correct += 1
print 'Correct'
else:
print 'False'
print 'Correct Count:', correct
print 'Accuracy: %f' % (1.0 * correct / len(Prediction))
示例6: GroceryModel
class GroceryModel(object):
def __init__(self):
self.grocery = Grocery('TextClassify')
def train(self,train_file):
f = open(train_file,'r')
line = f.readline().decode('utf8')
dataset = []
while line:
tmp = line.split('\t')
dataset.append((tmp[0],''.join(tmp[1:])))
line = f.readline().decode('utf8')
f.close()
self.grocery.train(dataset)
self.grocery.save()
def load_model(self):
self.grocery.load()
def test(self,test_src):
self.load_model()
f = open(test_src,'r')
line = f.readline().decode('utf8')
dataset = []
while line:
tmp = line.split('\t')
dataset.append((tmp[0],''.join(tmp[1:])))
line = f.readline().decode('utf8')
f.close()
result = self.grocery.test(dataset)
print result
def predict(self,text):
print self.grocery.predict(text)
示例7: test_main
def test_main(self):
grocery = Grocery(self.grocery_name)
grocery.train(self.train_src)
grocery.save()
new_grocery = Grocery('test')
new_grocery.load()
assert grocery.get_load_status()
assert grocery.predict('考生必读:新托福写作考试评分标准') == 'education'
# cleanup
if self.grocery_name and os.path.exists(self.grocery_name):
shutil.rmtree(self.grocery_name)
示例8: test_grocery
def test_grocery():
grocery = Grocery('model_redian')
grocery.train('trdata_4.txt')
grocery.save()
new_grocery = Grocery('model_redian')
new_grocery.load()
test_result = new_grocery.test('tedata_4.txt')
print test_result.accuracy_labels
print test_result.recall_labels
test_result.show_result()
示例9: predict_corpus
def predict_corpus(input_file,output_csv):
import csv
csvfile = file(output_csv, 'wb')
writer = csv.writer(csvfile)
corpus = []
f = xlrd.open_workbook(input_file)
table = f.sheet_by_name('Sheet1')
nrows = table.nrows # 读取行数
for rownum in range(0, nrows):
row = table.row_values(rownum)
row[2].strip()
corpus.append(row[2])
corpus_grocery = Grocery(project_name)
corpus_grocery.load()
output = []
for sentence in corpus:
predict = corpus_grocery.predict(sentence)
output.append((sentence,predict))
writer.writerows(output)
print('Done!')
csvfile.close()
开发者ID:frederic89,项目名称:Event_Classification_and_Domain_Recognition,代码行数:21,代码来源:domain_predict_py2.py
示例10: jdParser
class jdParser(object):
def __init__(self):
self.clf = Grocery("./jdclf")
self.clf.load()
self.LINE_SPLIT = re.compile(u"[;。;\n]")
def get_demand_and_duty(self,jdstr):
linelist = [ line.strip() for line in self.LINE_SPLIT.split(jdstr) if len(line.strip()>4) ]
result = {}
demand = []
duty = []
for line in linelist:
pred = str(self.clf.predict(line))
if pred =="demand":
demand.append(line)
elif pred == "duty":
duty.append(line)
result['demand'] = '\n'.join(demand)
result['duty'] = '\n'.join(duty)
示例11: sentiment_train
def sentiment_train(gro_name, train_set):
"""
:param gro_name:
:param train_set:
:return:
"""
gro_ins = Grocery(gro_name)
# gro_ins.load()
gro_ins.train(train_set)
print("Is trained? ", gro_ins.get_load_status())
gro_ins.save()
示例12: MyGrocery
class MyGrocery(object):
def __init__(self, name):
super(MyGrocery, self).__init__()
self.grocery = Grocery(name)
self.loaded = False
self.correct = 1.0
def train(self, src):
lines = []
for line in csv.reader(open(src)):
label, s = line[0],line[1]
text = s.decode('utf8')
lines.append((label, text))
self.grocery.train(lines)
def save_model(self):
self.grocery.save()
def train_and_save(self, src):
self.train(src)
self.save_model()
def load_model(self):
if not self.loaded:
self.grocery.load()
self.loaded = True
def predict(self, text):
self.load_model()
return self.grocery.predict(text)
def test(self, src):
self.load_model()
total, wrong_num = 0.0, 0.0
for line in csv.reader(open(src)):
total += 1
if line[0] != self.predict(line[1]):
wrong_num += 1
print "load test file from " + src
correct = (total - wrong_num ) / total
self.correct = correct
print "total: %d , wrong_num: %d, success percentage: %f" %(total, wrong_num, correct)
result = dict(type="test", total=total, wrong_num=wrong_num, correct=correct)
return json.dumps(result)
示例13: __init__
def __init__(self):
self.CLEAN_TEXT = re.compile(u"[^\u4e00-\u9fa5\w\d;::;,。、\.,。![email protected]()\r\n\(\)\-\+ - ]")
self.clf = Grocery(base_dir+"/jdclf")
self.clf.load()
self.SPLIT_LINE = re.compile(u"[\r\n;::。!?;]|[ \s \xa0\u724b]{4,}")
self.CLEAN_LINE = re.compile(u"^[\u2022(【\[\s\t\r\n\(\- ]?[\da-z12345789]{1,2}[\.,。、,::)】\]\)\s]|^[!@#¥%……&×()\(\){}:“|、-\-,。::\.]|^[一二三四五六七八九123456789\d]{0,2}[\.、\s:: ]|[,;。、\s \.]$|^[\s \u2022 \uff0d \u25cf]")
self.CLEAN_JOBNAME = re.compile(u"急聘|诚聘|高薪|包[食住宿餐]|.险一金|待遇|^急?招|职位编号\s?[\s\d::]")
self.PAY = re.compile("(\d{3,}\-)?\d{3,}元")
self.SEX = re.compile(u"性别|男|女")
self.AGE = re.compile(u"\d+周?岁|年龄")
self.JOB_TAG = re.compile(u"全职|实习")
self.DEGREE = re.compile(u"小学|初中|高中|职技|本科|研究生|硕士|博士|教授|专科|大专|中专|无要求|不限|无限")
self.START_DEMAND = re.compile(u"(任职资格|岗位要求|工作要求|任职条件|任职要求|职位要求)[::\s】\n ]?")
self.START_DUTY = re.compile(u"(工作内容|岗位职责|工作职责|职位描述|工作描述|职位介绍|职位职责|岗位描述)[::\s 】\n ]")
self.START_BENEFIT = re.compile(u"(福利待遇|待遇|福利)[::\s\n】]")
self.INC_URL = re.compile(u"(主页|网站|网址|官网).{0,3}[\w\d_/\.:\-]+")
self.DEMAND = re.compile(u"精通|熟悉|熟练|有.+经验")
self.DUTY = re.compile(u"负责|促成|为客户|安排的其.工作")
self.BENEFIT = re.compile(u".险一金|福利|晋身|休假|带薪|补助|补贴")
self.CERT = re.compile(u"(\S{2,8}证书|CET-\d|普通话|英语|口语|.语|日文|雅思|托福|托业)(至少)?(通过)?[\d一二三四五六七八九]级[及或]?(以上)?|(英语)?CET-\d级?(以上)?|\
医学.{0,3}证|会计.{0,3}证|律师.{0,3}证|有.{1,8}证书")
self.degreedic = set([line.strip() for line in codecs.open(base_dir+'/data/degrees.txt','rb','utf-8')])
self.majordic = set([line.strip() for line in codecs.open(base_dir+'/data/majordic.txt','rb','utf-8')])
self.skilldic = set([line.strip() for line in codecs.open(base_dir+'/data/skills.txt','rb','utf-8')])
self.jobdic = set([line.strip() for line in codecs.open(base_dir+'/data/jobnames.txt','rb','utf-8')])
jieba.load_userdict(base_dir+'/data/majordic.txt')
jieba.load_userdict(base_dir+'/data/skills.txt')
jieba.load_userdict(base_dir+'/data/firm.txt')
jieba.load_userdict(base_dir+'/data/degrees.txt')
jieba.load_userdict(base_dir+'/data/benefits.txt')
示例14: __init__
def __init__(self):
self.data = []
self.clf = Grocery("jdclf")
self.clf.load()
self.SEX = re.compile(u"性别不限|性别|男|女")
self.AGE = re.compile(u"\d+周?岁|年龄")
self.DEGREE = re.compile(u"(全日制)?(初中|高中|中专|大专|专科|大学专科|中职|本科|大学本科|硕士|研究生|博士|博士后)(.?以上)?")
self.MAJOR = re.compile(u"\S+(相关专业|专业优先|及其.专业|[类等]专业[优先]?)")
self.EXP = re.compile(u"工作经验:|工作经[历验]|工作年限|年.{0,4}经[历验]|经[历验].{1,6}年")
self.PUB_TIME = re.compile(u"(\d+)(天前发布)")
self.INCNAME = re.compile(u"\S+(有限公司|酒店|银行|集团|研究中心|研究所|学校|旅行社|分?公司|研发中心|技术部|事.部|招聘)")
self.NOT_INC = re.compile(u"职位|描述|收藏|推荐|地址|邮箱|主页|介绍|欢迎|加入|要求|简介|险一金|奖金|包吃住|社区|厂房|人员|职责")
self.INCTAG = re.compile(u"大公司|五百强|全球500强|小公司|成长型公司|创业公司|私有经济|集体经济|集团|外企|已上市|稳定性高|平均年龄\d岁|妹纸多|学历高|福利待遇好|晋升机会大|民营公司|民营企业\
|互联网|创业型|国企|央企")
self.JOBNAME = re.compile(u'\S*(研发工程师|工程师|经理|助理|顾问|前台|秘书|主管|研究员|实习生|操作员|专员|教学人员|技术人员|管理员|业务员|公关|程序员|教师|老师|培训生|\
文员|研究员|策划|主任|总监|设计师|分析师|架构师|摄影师|编辑|BD|游戏UI|Android(开发)?|PHP(开发)?|Python(开发)?|.?(急招|急聘|初级|中级|高级|方向).?[\s)】\)])|\
|行政人事|网店设计|客服|会计|电话销售|外贸跟单|web前端|游戏UI|后.开发|产品运营|商业数据分析')
self.START_DEMAND = re.compile(u"(岗位要求|应聘条件|任职要求|岗位资格|任职资格|岗位条件|工作要求|任职条件|人员条件|职位.求|职位条件|职位描述|岗位资格|职位资格|具备条件)[::\s]\
|如果你.{0,10}[::\s]|我们希望你.{0,12}[::\s]|(要求|条件)[::\s]|你需要?具备什么.+[?\?::\s]|任职资格[::\s]")
self.DEMAND = re.compile(u"熟悉|熟练|具有|善于|懂得|掌握|具备|能够|优先|不少于|不超过|至少|团队.作|良好的|工作经验|开发经验|实习经历|能力强|富有|以上学历|经验|喜欢|\
较强的.{2,8}能力|相关专业|相关学历|者优先|精通|了解|及以上|技术全面|.强的责任心|[能有]独立|英文流利")
self.DUTY = re.compile(u"跟进|协助|负责|配合|其他工作|领导交办的|对.+提供|审核|参与|提出|跟踪|报告|为.+提出|日常.+工作|指导|跟进|拓展|运营|用户|客户|协调|拟写|通过|协同\
|完成|沟通|需求|秘书.{2,5}翻译")
self.START_DUTY = re.compile(u"(岗位职责|岗位描述|职位描述|职责描述|任职描述|职位职责|工作职责|工作职能|职位职能|工作内容|实习内容|职位内容)[::\s]|做这样的事[::\s]|职责.{0,5}[::\s]")
self.PAY = re.compile(u"薪酬|待遇|月薪|薪资|年薪|底薪|\d+k|\d+万|\d+元|工资|报酬|薪水|福利")
self.BENEFIT = re.compile(u"周休|补助|补贴|假日|餐补|提成|交通补助|食宿|加班工资|期权|年假|领导|扁平化|管理|氛围|空间|休假|月假|带薪|全休|晋升|培训|舒适的|旅游|奖励|过节费|五险一金|奖金|\
|弹性工作|氛围|成长空间|实训|培训|高薪|前景|旅游|活动|分红")
示例15: reload
# -*- coding: utf-8 -*-
import sys
reload(sys)
sys.path.append('../../')
from config import *
from tgrocery import Grocery
STOP_WORDS_FILE = 'stopwords.txt'
USER_DICT_FILE = 'user_dict.txt'
model_fintext = Grocery('model_fintext')
model_fintext.load()
sys.path.append('../')
from get_es import *
es = Elasticsearch([{'host':ES_HOST,'port':ES_PORT}])
def search(index_name):
es_search_options = set_search_optional()
es_result = get_search_result(es_search_options,index=index_name)
# final_result = get_result_list(es_result)
# return final_result
return es_result
def get_result_list(es_result):
final_result = []
for item in es_result:
final_result.append(item['_source'])
return final_result