本文整理汇总了Python中tgrocery.Grocery.load方法的典型用法代码示例。如果您正苦于以下问题:Python Grocery.load方法的具体用法?Python Grocery.load怎么用?Python Grocery.load使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tgrocery.Grocery
的用法示例。
在下文中一共展示了Grocery.load方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: tGrocery
# 需要导入模块: from tgrocery import Grocery [as 别名]
# 或者: from tgrocery.Grocery import load [as 别名]
def tGrocery():
outFile = open('testResult.tmp', 'w')
[trainingSet, benchmark] = pickle.load(open('SampleSeg.pk'))
testingSet = []
correctLabel = []
for i in xrange(len(benchmark)):
print '%d out of %d' % (i, len(benchmark))
testingSet.append(benchmark[i][1])
correctLabel.append(benchmark[i][0])
grocery = Grocery('test')
grocery.train(trainingSet)
grocery.save()
# load
new_grocery = Grocery('test')
new_grocery.load()
Prediction = []
for i in xrange(len(testingSet)):
print '%d out of %d' % (i, len(testingSet))
prediction = new_grocery.predict(testingSet[i])
Prediction.append(prediction)
temp = correctLabel[i] + '<-->' + prediction + ' /x01' + testingSet[i] + '\n'
outFile.write(temp)
correct = 0
for i in xrange(len(Prediction)):
print Prediction[i], correctLabel[i],
if Prediction[i] == correctLabel[i]:
correct += 1
print 'Correct'
else:
print 'False'
print 'Correct Count:', correct
print 'Accuracy: %f' % (1.0 * correct / len(Prediction))
示例2: __init__
# 需要导入模块: from tgrocery import Grocery [as 别名]
# 或者: from tgrocery.Grocery import load [as 别名]
def __init__(self, keyword):
print '进行新闻分类'
(db, cursor) = connectdb()
cursor.execute("update task set status=1 where keyword=%s", [keyword])
cursor.execute("select id, title from news where keyword=%s",[keyword])
news = cursor.fetchall()
new_grocery = Grocery('static/paris')
new_grocery.load()
for item in news:
tag = new_grocery.predict(item['title'])
if tag == '新闻背景':
tag = 1
elif tag == '事实陈述':
tag = 2
elif tag == '事件演化':
tag = 3
elif tag == '各方态度':
tag = 4
elif tag == '直接关联':
tag = 6
elif tag == '暂无关联':
tag = 7
cursor.execute("update news set tag=%s where id=%s", [tag, item['id']])
closedb(db, cursor)
return
示例3: GroceryModel
# 需要导入模块: from tgrocery import Grocery [as 别名]
# 或者: from tgrocery.Grocery import load [as 别名]
class GroceryModel(object):
def __init__(self):
self.grocery = Grocery('TextClassify')
def train(self,train_file):
f = open(train_file,'r')
line = f.readline().decode('utf8')
dataset = []
while line:
tmp = line.split('\t')
dataset.append((tmp[0],''.join(tmp[1:])))
line = f.readline().decode('utf8')
f.close()
self.grocery.train(dataset)
self.grocery.save()
def load_model(self):
self.grocery.load()
def test(self,test_src):
self.load_model()
f = open(test_src,'r')
line = f.readline().decode('utf8')
dataset = []
while line:
tmp = line.split('\t')
dataset.append((tmp[0],''.join(tmp[1:])))
line = f.readline().decode('utf8')
f.close()
result = self.grocery.test(dataset)
print result
def predict(self,text):
print self.grocery.predict(text)
示例4: test_grocery
# 需要导入模块: from tgrocery import Grocery [as 别名]
# 或者: from tgrocery.Grocery import load [as 别名]
def test_grocery():
grocery = Grocery('model_redian')
grocery.train('trdata_4.txt')
grocery.save()
new_grocery = Grocery('model_redian')
new_grocery.load()
test_result = new_grocery.test('tedata_4.txt')
print test_result.accuracy_labels
print test_result.recall_labels
test_result.show_result()
示例5: GET
# 需要导入模块: from tgrocery import Grocery [as 别名]
# 或者: from tgrocery.Grocery import load [as 别名]
def GET(self,name):
#i = web.input(name=None)
#url = "http://"+name
#html = urllib2.urlopen(url).read()
#soup = BeautifulSoup(html)
#title = soup.html.head.title.contents.pop().encode('utf-8')
title = name.encode('utf-8')
new_grocery = Grocery('sample')
new_grocery.load()
return new_grocery.predict(title)
示例6: test_main
# 需要导入模块: from tgrocery import Grocery [as 别名]
# 或者: from tgrocery.Grocery import load [as 别名]
def test_main(self):
grocery = Grocery(self.grocery_name)
grocery.train(self.train_src)
grocery.save()
new_grocery = Grocery('test')
new_grocery.load()
assert grocery.get_load_status()
assert grocery.predict('考生必读:新托福写作考试评分标准') == 'education'
# cleanup
if self.grocery_name and os.path.exists(self.grocery_name):
shutil.rmtree(self.grocery_name)
示例7: MyGrocery
# 需要导入模块: from tgrocery import Grocery [as 别名]
# 或者: from tgrocery.Grocery import load [as 别名]
class MyGrocery(object):
def __init__(self, name):
super(MyGrocery, self).__init__()
self.grocery = Grocery(name)
self.loaded = False
self.correct = 1.0
def train(self, src):
lines = []
for line in csv.reader(open(src)):
label, s = line[0],line[1]
text = s.decode('utf8')
lines.append((label, text))
self.grocery.train(lines)
def save_model(self):
self.grocery.save()
def train_and_save(self, src):
self.train(src)
self.save_model()
def load_model(self):
if not self.loaded:
self.grocery.load()
self.loaded = True
def predict(self, text):
self.load_model()
return self.grocery.predict(text)
def test(self, src):
self.load_model()
total, wrong_num = 0.0, 0.0
for line in csv.reader(open(src)):
total += 1
if line[0] != self.predict(line[1]):
wrong_num += 1
print "load test file from " + src
correct = (total - wrong_num ) / total
self.correct = correct
print "total: %d , wrong_num: %d, success percentage: %f" %(total, wrong_num, correct)
result = dict(type="test", total=total, wrong_num=wrong_num, correct=correct)
return json.dumps(result)
示例8: predict_corpus
# 需要导入模块: from tgrocery import Grocery [as 别名]
# 或者: from tgrocery.Grocery import load [as 别名]
def predict_corpus(input_file,output_csv):
import csv
csvfile = file(output_csv, 'wb')
writer = csv.writer(csvfile)
corpus = []
f = xlrd.open_workbook(input_file)
table = f.sheet_by_name('Sheet1')
nrows = table.nrows # 读取行数
for rownum in range(0, nrows):
row = table.row_values(rownum)
row[2].strip()
corpus.append(row[2])
corpus_grocery = Grocery(project_name)
corpus_grocery.load()
output = []
for sentence in corpus:
predict = corpus_grocery.predict(sentence)
output.append((sentence,predict))
writer.writerows(output)
print('Done!')
csvfile.close()
开发者ID:frederic89,项目名称:Event_Classification_and_Domain_Recognition,代码行数:23,代码来源:domain_predict_py2.py
示例9: jdParser
# 需要导入模块: from tgrocery import Grocery [as 别名]
# 或者: from tgrocery.Grocery import load [as 别名]
class jdParser(object):
def __init__(self):
self.clf = Grocery("./jdclf")
self.clf.load()
self.LINE_SPLIT = re.compile(u"[;。;\n]")
def get_demand_and_duty(self,jdstr):
linelist = [ line.strip() for line in self.LINE_SPLIT.split(jdstr) if len(line.strip()>4) ]
result = {}
demand = []
duty = []
for line in linelist:
pred = str(self.clf.predict(line))
if pred =="demand":
demand.append(line)
elif pred == "duty":
duty.append(line)
result['demand'] = '\n'.join(demand)
result['duty'] = '\n'.join(duty)
示例10: reload
# 需要导入模块: from tgrocery import Grocery [as 别名]
# 或者: from tgrocery.Grocery import load [as 别名]
# -*- coding: utf-8 -*-
import sys
reload(sys)
sys.path.append('../../')
from config import *
from tgrocery import Grocery
STOP_WORDS_FILE = 'stopwords.txt'
USER_DICT_FILE = 'user_dict.txt'
model_fintext = Grocery('model_fintext')
model_fintext.load()
sys.path.append('../')
from get_es import *
es = Elasticsearch([{'host':ES_HOST,'port':ES_PORT}])
def search(index_name):
es_search_options = set_search_optional()
es_result = get_search_result(es_search_options,index=index_name)
# final_result = get_result_list(es_result)
# return final_result
return es_result
def get_result_list(es_result):
final_result = []
for item in es_result:
final_result.append(item['_source'])
return final_result
示例11: delete_stop_words
# 需要导入模块: from tgrocery import Grocery [as 别名]
# 或者: from tgrocery.Grocery import load [as 别名]
##########################################
# init
model_choose = "svm" # svm, lda, rnn
grocery_name = "./SVM_models/svm_for_news"
corpus_path = "./Corpus/NewsClassCorpus/"
file_path = "./"
file_name = "post.txt"
t_text = delete_stop_words(codecs.open(file_path + file_name, encoding="UTF-8").read())
###########################################
# 调用 SVM 模型分类
if model_choose == "svm":
tic = time.time()
grocery = Grocery(grocery_name)
grocery.load()
t_pre_result = grocery.predict(delete_stop_words(t_text))
toc = time.time()
t_label = t_pre_result.predicted_y
print("Sentiment: ", t_label)
print("How much: ", t_pre_result.dec_values[t_label])
print("Elapsed time of predict is: %s s" % (toc - tic))
elif model_choose == "lda":
pass
elif model_choose == "rnn":
pass
else:
print("")
示例12: JdCRF
# 需要导入模块: from tgrocery import Grocery [as 别名]
# 或者: from tgrocery.Grocery import load [as 别名]
class JdCRF(object):
def __init__(self):
self.data = []
self.clf = Grocery("jdclf")
self.clf.load()
self.SEX = re.compile(u"性别不限|性别|男|女")
self.AGE = re.compile(u"\d+周?岁|年龄")
self.DEGREE = re.compile(u"(全日制)?(初中|高中|中专|大专|专科|大学专科|中职|本科|大学本科|硕士|研究生|博士|博士后)(.?以上)?")
self.MAJOR = re.compile(u"\S+(相关专业|专业优先|及其.专业|[类等]专业[优先]?)")
self.EXP = re.compile(u"工作经验:|工作经[历验]|工作年限|年.{0,4}经[历验]|经[历验].{1,6}年")
self.PUB_TIME = re.compile(u"(\d+)(天前发布)")
self.INCNAME = re.compile(u"\S+(有限公司|酒店|银行|集团|研究中心|研究所|学校|旅行社|分?公司|研发中心|技术部|事.部|招聘)")
self.NOT_INC = re.compile(u"职位|描述|收藏|推荐|地址|邮箱|主页|介绍|欢迎|加入|要求|简介|险一金|奖金|包吃住|社区|厂房|人员|职责")
self.INCTAG = re.compile(u"大公司|五百强|全球500强|小公司|成长型公司|创业公司|私有经济|集体经济|集团|外企|已上市|稳定性高|平均年龄\d岁|妹纸多|学历高|福利待遇好|晋升机会大|民营公司|民营企业\
|互联网|创业型|国企|央企")
self.JOBNAME = re.compile(u'\S*(研发工程师|工程师|经理|助理|顾问|前台|秘书|主管|研究员|实习生|操作员|专员|教学人员|技术人员|管理员|业务员|公关|程序员|教师|老师|培训生|\
文员|研究员|策划|主任|总监|设计师|分析师|架构师|摄影师|编辑|BD|游戏UI|Android(开发)?|PHP(开发)?|Python(开发)?|.?(急招|急聘|初级|中级|高级|方向).?[\s)】\)])|\
|行政人事|网店设计|客服|会计|电话销售|外贸跟单|web前端|游戏UI|后.开发|产品运营|商业数据分析')
self.START_DEMAND = re.compile(u"(岗位要求|应聘条件|任职要求|岗位资格|任职资格|岗位条件|工作要求|任职条件|人员条件|职位.求|职位条件|职位描述|岗位资格|职位资格|具备条件)[::\s]\
|如果你.{0,10}[::\s]|我们希望你.{0,12}[::\s]|(要求|条件)[::\s]|你需要?具备什么.+[?\?::\s]|任职资格[::\s]")
self.DEMAND = re.compile(u"熟悉|熟练|具有|善于|懂得|掌握|具备|能够|优先|不少于|不超过|至少|团队.作|良好的|工作经验|开发经验|实习经历|能力强|富有|以上学历|经验|喜欢|\
较强的.{2,8}能力|相关专业|相关学历|者优先|精通|了解|及以上|技术全面|.强的责任心|[能有]独立|英文流利")
self.DUTY = re.compile(u"跟进|协助|负责|配合|其他工作|领导交办的|对.+提供|审核|参与|提出|跟踪|报告|为.+提出|日常.+工作|指导|跟进|拓展|运营|用户|客户|协调|拟写|通过|协同\
|完成|沟通|需求|秘书.{2,5}翻译")
self.START_DUTY = re.compile(u"(岗位职责|岗位描述|职位描述|职责描述|任职描述|职位职责|工作职责|工作职能|职位职能|工作内容|实习内容|职位内容)[::\s]|做这样的事[::\s]|职责.{0,5}[::\s]")
self.PAY = re.compile(u"薪酬|待遇|月薪|薪资|年薪|底薪|\d+k|\d+万|\d+元|工资|报酬|薪水|福利")
self.BENEFIT = re.compile(u"周休|补助|补贴|假日|餐补|提成|交通补助|食宿|加班工资|期权|年假|领导|扁平化|管理|氛围|空间|休假|月假|带薪|全休|晋升|培训|舒适的|旅游|奖励|过节费|五险一金|奖金|\
|弹性工作|氛围|成长空间|实训|培训|高薪|前景|旅游|活动|分红")
def gen_data(self,fname='./data/lagou_train.txt'):
fw = codecs.open('./data/jd_train_crf.txt','wb','utf-8')
cnt = 1
for line in codecs.open(fname,'rb','utf-8'):
if line.startswith(u"====="):
fw.write(line)
continue
cnt +=1
if len(line.strip())>1:
pred = self.clf.predict(line)
newline = pred+'\t\t'+line.strip()+'\t\t'+str(len(line))+"\n"
fw.write(newline)
print cnt
print 'done'
def load_data(self,fname="./data/jd_train_crf.txt"):
data = []
tmp = []
for line in codecs.open(fname,'rb','utf-8'):
if line.startswith(u"===="):
data.append(tmp)
tmp = []
continue
else:
tag_data = line.strip().split('\t\t')
if len(tag_data)==3:
tmp.append(tuple(tag_data))
else:
print '\t '.join(tag_data)
n = len(data)/2
print 'train data',n
print 'test data',len(data)-n
return data[n:],data[:n]
def word2features(self,sent,i):
word = sent[i][0]
postag = sent[i][1]
features = [
'bias',
'word.lower=' + word.lower(),
'word[:2]=' +word[:2],
'word.isdigit=%s'%word.isdigit(),
'postag='+postag,
'demand=%s'% '1' if self.DEMAND.search(word) else '0',
'start_demand=%s'% '1' if self.START_DEMAND.search(word) else '0',
'start_duty=%s'% '1' if self.START_DUTY.search(word) else '0',
'duty=%s'% '1' if self.DUTY.search(word) else '0',
'jobname=%s'% '1' if self.JOBNAME.search(word) else '0',
'incname=%s'% '1' if self.INCNAME.search(word) else '0',
'benefit = %s'% '1' if self.BENEFIT.search(word) else '0',
'pred=%s' % self.clf.predict(word)
]
#.........这里部分代码省略.........
示例13: reload
# 需要导入模块: from tgrocery import Grocery [as 别名]
# 或者: from tgrocery.Grocery import load [as 别名]
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import MySQLdb
from tgrocery import Grocery
import sys
reload(sys)
sys.setdefaultencoding('utf8')
grocery = Grocery('sample')
dict_list = list()
conn = MySQLdb.connect(host = 'localhost', db = 'newsdata', user = 'root', passwd = 'root', charset = 'utf8', use_unicode = False)
cur = conn.cursor()
cur.execute('select com_new_type_id, com_new_name from tbl_new where com_new_type_id is not null')
for row in cur.fetchall():
dict_list.append(row)
grocery.train(dict_list)
grocery.save()
news_grocery = Grocery('sample')
news_grocery.load()
while True:
result = news_grocery.predict(raw_input('please input title:' ))
print result
示例14: JdParserTop
# 需要导入模块: from tgrocery import Grocery [as 别名]
# 或者: from tgrocery.Grocery import load [as 别名]
class JdParserTop(object):
def __init__(self):
self.CLEAN_TEXT = re.compile(u"[^\u4e00-\u9fa5\w\d;::;,。、\.,。![email protected]()\r\n\(\)\-\+ - ]")
self.clf = Grocery(base_dir+"/jdclf")
self.clf.load()
self.SPLIT_LINE = re.compile(u"[\r\n;::。!?;]|[ \s \xa0\u724b]{4,}")
self.CLEAN_LINE = re.compile(u"^[\u2022(【\[\s\t\r\n\(\- ]?[\da-z12345789]{1,2}[\.,。、,::)】\]\)\s]|^[!@#¥%……&×()\(\){}:“|、-\-,。::\.]|^[一二三四五六七八九123456789\d]{0,2}[\.、\s:: ]|[,;。、\s \.]$|^[\s \u2022 \uff0d \u25cf]")
self.CLEAN_JOBNAME = re.compile(u"急聘|诚聘|高薪|包[食住宿餐]|.险一金|待遇|^急?招|职位编号\s?[\s\d::]")
self.PAY = re.compile("(\d{3,}\-)?\d{3,}元")
self.SEX = re.compile(u"性别|男|女")
self.AGE = re.compile(u"\d+周?岁|年龄")
self.JOB_TAG = re.compile(u"全职|实习")
self.DEGREE = re.compile(u"小学|初中|高中|职技|本科|研究生|硕士|博士|教授|专科|大专|中专|无要求|不限|无限")
self.START_DEMAND = re.compile(u"(任职资格|岗位要求|工作要求|任职条件|任职要求|职位要求)[::\s】\n ]?")
self.START_DUTY = re.compile(u"(工作内容|岗位职责|工作职责|职位描述|工作描述|职位介绍|职位职责|岗位描述)[::\s 】\n ]")
self.START_BENEFIT = re.compile(u"(福利待遇|待遇|福利)[::\s\n】]")
self.INC_URL = re.compile(u"(主页|网站|网址|官网).{0,3}[\w\d_/\.:\-]+")
self.DEMAND = re.compile(u"精通|熟悉|熟练|有.+经验")
self.DUTY = re.compile(u"负责|促成|为客户|安排的其.工作")
self.BENEFIT = re.compile(u".险一金|福利|晋身|休假|带薪|补助|补贴")
self.CERT = re.compile(u"(\S{2,8}证书|CET-\d|普通话|英语|口语|.语|日文|雅思|托福|托业)(至少)?(通过)?[\d一二三四五六七八九]级[及或]?(以上)?|(英语)?CET-\d级?(以上)?|\
医学.{0,3}证|会计.{0,3}证|律师.{0,3}证|有.{1,8}证书")
self.degreedic = set([line.strip() for line in codecs.open(base_dir+'/data/degrees.txt','rb','utf-8')])
self.majordic = set([line.strip() for line in codecs.open(base_dir+'/data/majordic.txt','rb','utf-8')])
self.skilldic = set([line.strip() for line in codecs.open(base_dir+'/data/skills.txt','rb','utf-8')])
self.jobdic = set([line.strip() for line in codecs.open(base_dir+'/data/jobnames.txt','rb','utf-8')])
jieba.load_userdict(base_dir+'/data/majordic.txt')
jieba.load_userdict(base_dir+'/data/skills.txt')
jieba.load_userdict(base_dir+'/data/firm.txt')
jieba.load_userdict(base_dir+'/data/degrees.txt')
jieba.load_userdict(base_dir+'/data/benefits.txt')
def clean_line(self,line):
"""
清除一个句子首尾的标点符号
"""
line = self.CLEAN_LINE.sub("",line).strip()
line = re.sub("\s+|^/d+[;’、,/。\.]","",line)
return line
def clean_cnNum(self,line):
"""
经验年限提取时,中文一二三等转为123
"""
line = unicode(line)
a = [u"一",u"二",u"三",u"四",u"五",u"六",u"七",u"八",u"九",u"十",u"两"]
b = range(1,11)+[2]
table = dict((ord(aa),bb) for aa,bb in zip(a,b))
return line.translate(table)
def line2vec(self,line):
"""
句子转换为向量
"""
vec = np.zeros(50)
for word in jieba.cut(line):
if word in self.w2v.vocab:
vec += self.w2v[word]
return vec
def clean_jobname(self,jobname):
"""
职位名清洗
"""
if jobname.lower() in self.jobdic:
return jobname.lower()
else:
res = [(lcs_len(jobname,job),job) for job in self.jobdic]
res.sort()
return res[-1][1]
示例15: DataFrame
# 需要导入模块: from tgrocery import Grocery [as 别名]
# 或者: from tgrocery.Grocery import load [as 别名]
tdic['id'].append(_id)
tdic['type'].append(_type)
tdic['contents'].append(contents)
i +=1
#train = pd.read_csv( train_file, header = 0, delimiter = "\t", quoting = 3 )
#test = pd.read_csv( test_file, header = 1, delimiter = "\t", quoting = 3 )
train = DataFrame(dic)
test = DataFrame(tdic)
#
#classfynews_instance 是模型保存路径
grocery = Grocery('classfynews_instance')
train_in = [train['contents'],train['type']]
grocery.train(train_in)
print grocery.get_load_status()
#grocery.save()
copy_grocery = Grocery('classfynews_instance')
copy_grocery.load()
#copy_grocery = grocery
test_in = [test['contents'],test['type']]
#输入类似 ['我是中国人','台北*****']
#输出 [11,12]
test_result = copy_grocery.predict(test['contents'])
print test_result.predicted_y
#test_result = copy_grocery.test(test_in)
#print test_result.show_result()