本文整理汇总了Python中jieba.cut_for_search方法的典型用法代码示例。如果您正苦于以下问题:Python jieba.cut_for_search方法的具体用法?Python jieba.cut_for_search怎么用?Python jieba.cut_for_search使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类jieba
的用法示例。
在下文中一共展示了jieba.cut_for_search方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testCutForSearch
# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def testCutForSearch(self):
for content in test_contents:
result = jieba.cut_for_search(content)
assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error"
result = list(result)
assert isinstance(result, list), "Test CutForSearch error on content: %s" % content
print(" , ".join(result), file=sys.stderr)
print("testCutForSearch", file=sys.stderr)
示例2: testCutForSearch_NOHMM
# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def testCutForSearch_NOHMM(self):
for content in test_contents:
result = jieba.cut_for_search(content,HMM=False)
assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error"
result = list(result)
assert isinstance(result, list), "Test CutForSearch error on content: %s" % content
print(" , ".join(result), file=sys.stderr)
print("testCutForSearch_NOHMM", file=sys.stderr)
示例3: cuttest
# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def cuttest(test_sent):
result = jieba.cut_for_search(test_sent)
for word in result:
print(word, "/", end=' ')
print("")
示例4: run
# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def run(self):
seg_list = jieba.cut("我来到北京清华大学",cut_all=True)
print("Full Mode:" + "/ ".join(seg_list)) #全模式
seg_list = jieba.cut("我来到北京清华大学",cut_all=False)
print("Default Mode:" + "/ ".join(seg_list)) #默认模式
seg_list = jieba.cut("他来到了网易杭研大厦")
print(", ".join(seg_list))
seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造") #搜索引擎模式
print(", ".join(seg_list))
示例5: cache_raw_seg
# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def cache_raw_seg(self):
config = yaml.safe_load(open("./application.yml"))
r = redis.StrictRedis(host=config['redis']['host'], port=config['redis']['port'], db=config['redis']['db'])
for i in range(0, len(self.sentences)):
raw_word_seg_list = jieba.cut_for_search(self.sentences[i].raw_sentence)
sentence_seg_id = 'article:' + self.article_id + ':raw_seg:' + str(i)
for raw_word_seg in raw_word_seg_list:
r.sadd(sentence_seg_id, raw_word_seg)
示例6: cut_for_search
# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def cut_for_search(self,sentence, stopword=True):
seg_list = jieba.cut_for_search(sentence)
results = []
for seg in seg_list:
if stopword and seg in self.stopwords:
continue
results.append(seg)
return results
示例7: search
# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def search(self, keywords, start=0, length=20):
"""
搜索关键字
"""
seg_list = list(jieba.cut_for_search(keywords))
key_list = self.search_by_words(seg_list, start, length)
return key_list
示例8: add_content
# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def add_content(self, content, obj_key):
"""
添加文档到索引
"""
seg_list = jieba.cut_for_search(content)
seg_list = min_nlp.get_weight(seg_list)
self.add_word_index(seg_list, obj_key)
示例9: GET
# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def GET(self):
data=web.input()
if data:
searchword=data.searchword
else:
searchword=''
news_list=list()
topic=list()
if searchword:
cut = jieba.cut_for_search(searchword)
word_list = []
for word in cut:
if word not in punct and word not in Letters_and_numbers:
word_list.append(word.encode("utf-8"))
topK=query.calculate(word_list,config.query_return_numbers)
for k in topK:
data = dict()
title, content, url= id_index.get_data(k)
data['id'] = k
data['content'] = content.decode("utf-8")[:config.query_return_snipper_size]
data['title']=title.decode("utf-8")
data['url'] = url.decode("utf-8")
news_list.append(data)
del data,cut,word_list,word,topK,title,content,url
#word2Vec推荐相似主题
word2vec.cal(searchword.encode('utf-8'))
print word2vec.result.length
if word2vec.result.length==0:#词不存在,长度为1
pass
else:
for i in range(config.recommand_topic_numbers):
topic.append(word2vec.result.word[i].char)
return render.index(searchword,news_list,topic)
示例10: calculate
# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def calculate(self,doc_id,Top_numbers=10,multiple=10):
title,content,url=self.index.get_data(doc_id)
cut=jieba.cut_for_search(content)
word_list=[]
for word in cut:
if word not in self.punct and word not in self.Letters_and_numbers :
#计算文档间相似度,必须去停用词,否则太慢
if self.stopword.has_key(word.encode("utf-8")):
pass
else:
word_list.append(word.encode("utf-8"))
return self.FastCos.calculate(word_list,Top_numbers,multiple)