当前位置: 首页>>代码示例>>Python>>正文


Python jieba.cut_for_search方法代码示例

本文整理汇总了Python中jieba.cut_for_search方法的典型用法代码示例。如果您正苦于以下问题:Python jieba.cut_for_search方法的具体用法?Python jieba.cut_for_search怎么用?Python jieba.cut_for_search使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在jieba的用法示例。


在下文中一共展示了jieba.cut_for_search方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: testCutForSearch

# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def testCutForSearch(self):
        for content in test_contents:
            result = jieba.cut_for_search(content)
            assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error"
            result = list(result)
            assert isinstance(result, list), "Test CutForSearch error on content: %s" % content
            print(" , ".join(result), file=sys.stderr)
        print("testCutForSearch", file=sys.stderr) 
开发者ID:deepcs233,项目名称:jieba_fast,代码行数:10,代码来源:jieba_test.py

示例2: testCutForSearch_NOHMM

# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def testCutForSearch_NOHMM(self):
        for content in test_contents:
            result = jieba.cut_for_search(content,HMM=False)
            assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error"
            result = list(result)
            assert isinstance(result, list), "Test CutForSearch error on content: %s" % content
            print(" , ".join(result), file=sys.stderr)
        print("testCutForSearch_NOHMM", file=sys.stderr) 
开发者ID:deepcs233,项目名称:jieba_fast,代码行数:10,代码来源:jieba_test.py

示例3: cuttest

# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def cuttest(test_sent):
    result = jieba.cut_for_search(test_sent)
    for word in result:
        print(word, "/", end=' ') 
    print("") 
开发者ID:deepcs233,项目名称:jieba_fast,代码行数:7,代码来源:test_cut_for_search.py

示例4: run

# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def run(self):
        seg_list = jieba.cut("我来到北京清华大学",cut_all=True)
        print("Full Mode:" + "/ ".join(seg_list)) #全模式

        seg_list = jieba.cut("我来到北京清华大学",cut_all=False)
        print("Default Mode:" + "/ ".join(seg_list)) #默认模式

        seg_list = jieba.cut("他来到了网易杭研大厦")
        print(", ".join(seg_list))

        seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造") #搜索引擎模式
        print(", ".join(seg_list)) 
开发者ID:deepcs233,项目名称:jieba_fast,代码行数:14,代码来源:test_multithread.py

示例5: cache_raw_seg

# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def cache_raw_seg(self):
        config = yaml.safe_load(open("./application.yml"))
        r = redis.StrictRedis(host=config['redis']['host'], port=config['redis']['port'], db=config['redis']['db'])
        for i in range(0, len(self.sentences)):
            raw_word_seg_list = jieba.cut_for_search(self.sentences[i].raw_sentence)
            sentence_seg_id = 'article:' + self.article_id + ':raw_seg:' + str(i)
            for raw_word_seg in raw_word_seg_list:
                r.sadd(sentence_seg_id, raw_word_seg) 
开发者ID:galaxyyao,项目名称:public-opinion-analysis,代码行数:10,代码来源:article.py

示例6: cut_for_search

# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def cut_for_search(self,sentence, stopword=True):
        seg_list = jieba.cut_for_search(sentence)

        results = []
        for seg in seg_list:
            if stopword and seg in self.stopwords:
                continue
            results.append(seg)

        return results 
开发者ID:WenRichard,项目名称:Customer-Chatbot,代码行数:12,代码来源:jiebaSegment.py

示例7: search

# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def search(self, keywords, start=0, length=20):
        """
        搜索关键字
        """
        seg_list = list(jieba.cut_for_search(keywords))
        key_list = self.search_by_words(seg_list, start, length)
        return key_list 
开发者ID:ziyueit,项目名称:min,代码行数:9,代码来源:min.py

示例8: add_content

# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def add_content(self, content, obj_key):
        """
        添加文档到索引
        """
        seg_list = jieba.cut_for_search(content)
        seg_list = min_nlp.get_weight(seg_list)
        self.add_word_index(seg_list, obj_key) 
开发者ID:ziyueit,项目名称:min,代码行数:9,代码来源:min.py

示例9: GET

# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def GET(self):
        data=web.input()
        if data:
            searchword=data.searchword
        else:
            searchword=''
        news_list=list()
        topic=list()
        if searchword:
            cut = jieba.cut_for_search(searchword)
            word_list = []
            for word in cut:
                if word not in punct and word not in Letters_and_numbers:
                    word_list.append(word.encode("utf-8"))
            topK=query.calculate(word_list,config.query_return_numbers)
            for k in topK:
                data = dict()
                title, content, url= id_index.get_data(k)
                data['id'] = k
                data['content'] = content.decode("utf-8")[:config.query_return_snipper_size]
                data['title']=title.decode("utf-8")
                data['url'] = url.decode("utf-8")
                news_list.append(data)
            del data,cut,word_list,word,topK,title,content,url
            #word2Vec推荐相似主题
            word2vec.cal(searchword.encode('utf-8'))
            print word2vec.result.length
            if word2vec.result.length==0:#词不存在,长度为1
                pass
            else:
                for i in range(config.recommand_topic_numbers):
                    topic.append(word2vec.result.word[i].char)
        return render.index(searchword,news_list,topic) 
开发者ID:Google1234,项目名称:Information_retrieva_Projectl-,代码行数:35,代码来源:main.py

示例10: calculate

# 需要导入模块: import jieba [as 别名]
# 或者: from jieba import cut_for_search [as 别名]
def calculate(self,doc_id,Top_numbers=10,multiple=10):
        title,content,url=self.index.get_data(doc_id)
        cut=jieba.cut_for_search(content)
        word_list=[]
        for word in cut:
            if  word not in self.punct and word not in self.Letters_and_numbers :
                #计算文档间相似度,必须去停用词,否则太慢
                if self.stopword.has_key(word.encode("utf-8")):
                    pass
                else:
                    word_list.append(word.encode("utf-8"))
	return self.FastCos.calculate(word_list,Top_numbers,multiple) 
开发者ID:Google1234,项目名称:Information_retrieva_Projectl-,代码行数:14,代码来源:similar_doc.py


注:本文中的jieba.cut_for_search方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。