本文整理匯總了Python中jieba.cut_for_search方法的典型用法代碼示例。如果您正苦於以下問題:Python jieba.cut_for_search方法的具體用法?Python jieba.cut_for_search怎麽用?Python jieba.cut_for_search使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類jieba
的用法示例。
在下文中一共展示了jieba.cut_for_search方法的10個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: testCutForSearch
# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def testCutForSearch(self):
for content in test_contents:
result = jieba.cut_for_search(content)
assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error"
result = list(result)
assert isinstance(result, list), "Test CutForSearch error on content: %s" % content
print(" , ".join(result), file=sys.stderr)
print("testCutForSearch", file=sys.stderr)
示例2: testCutForSearch_NOHMM
# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def testCutForSearch_NOHMM(self):
for content in test_contents:
result = jieba.cut_for_search(content,HMM=False)
assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error"
result = list(result)
assert isinstance(result, list), "Test CutForSearch error on content: %s" % content
print(" , ".join(result), file=sys.stderr)
print("testCutForSearch_NOHMM", file=sys.stderr)
示例3: cuttest
# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def cuttest(test_sent):
result = jieba.cut_for_search(test_sent)
for word in result:
print(word, "/", end=' ')
print("")
示例4: run
# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def run(self):
seg_list = jieba.cut("我來到北京清華大學",cut_all=True)
print("Full Mode:" + "/ ".join(seg_list)) #全模式
seg_list = jieba.cut("我來到北京清華大學",cut_all=False)
print("Default Mode:" + "/ ".join(seg_list)) #默認模式
seg_list = jieba.cut("他來到了網易杭研大廈")
print(", ".join(seg_list))
seg_list = jieba.cut_for_search("小明碩士畢業於中國科學院計算所,後在日本京都大學深造") #搜索引擎模式
print(", ".join(seg_list))
示例5: cache_raw_seg
# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def cache_raw_seg(self):
config = yaml.safe_load(open("./application.yml"))
r = redis.StrictRedis(host=config['redis']['host'], port=config['redis']['port'], db=config['redis']['db'])
for i in range(0, len(self.sentences)):
raw_word_seg_list = jieba.cut_for_search(self.sentences[i].raw_sentence)
sentence_seg_id = 'article:' + self.article_id + ':raw_seg:' + str(i)
for raw_word_seg in raw_word_seg_list:
r.sadd(sentence_seg_id, raw_word_seg)
示例6: cut_for_search
# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def cut_for_search(self,sentence, stopword=True):
seg_list = jieba.cut_for_search(sentence)
results = []
for seg in seg_list:
if stopword and seg in self.stopwords:
continue
results.append(seg)
return results
示例7: search
# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def search(self, keywords, start=0, length=20):
"""
搜索關鍵字
"""
seg_list = list(jieba.cut_for_search(keywords))
key_list = self.search_by_words(seg_list, start, length)
return key_list
示例8: add_content
# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def add_content(self, content, obj_key):
"""
添加文檔到索引
"""
seg_list = jieba.cut_for_search(content)
seg_list = min_nlp.get_weight(seg_list)
self.add_word_index(seg_list, obj_key)
示例9: GET
# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def GET(self):
data=web.input()
if data:
searchword=data.searchword
else:
searchword=''
news_list=list()
topic=list()
if searchword:
cut = jieba.cut_for_search(searchword)
word_list = []
for word in cut:
if word not in punct and word not in Letters_and_numbers:
word_list.append(word.encode("utf-8"))
topK=query.calculate(word_list,config.query_return_numbers)
for k in topK:
data = dict()
title, content, url= id_index.get_data(k)
data['id'] = k
data['content'] = content.decode("utf-8")[:config.query_return_snipper_size]
data['title']=title.decode("utf-8")
data['url'] = url.decode("utf-8")
news_list.append(data)
del data,cut,word_list,word,topK,title,content,url
#word2Vec推薦相似主題
word2vec.cal(searchword.encode('utf-8'))
print word2vec.result.length
if word2vec.result.length==0:#詞不存在,長度為1
pass
else:
for i in range(config.recommand_topic_numbers):
topic.append(word2vec.result.word[i].char)
return render.index(searchword,news_list,topic)
示例10: calculate
# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def calculate(self,doc_id,Top_numbers=10,multiple=10):
title,content,url=self.index.get_data(doc_id)
cut=jieba.cut_for_search(content)
word_list=[]
for word in cut:
if word not in self.punct and word not in self.Letters_and_numbers :
#計算文檔間相似度,必須去停用詞,否則太慢
if self.stopword.has_key(word.encode("utf-8")):
pass
else:
word_list.append(word.encode("utf-8"))
return self.FastCos.calculate(word_list,Top_numbers,multiple)