當前位置: 首頁>>代碼示例>>Python>>正文


Python jieba.cut_for_search方法代碼示例

本文整理匯總了Python中jieba.cut_for_search方法的典型用法代碼示例。如果您正苦於以下問題:Python jieba.cut_for_search方法的具體用法?Python jieba.cut_for_search怎麽用?Python jieba.cut_for_search使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在jieba的用法示例。


在下文中一共展示了jieba.cut_for_search方法的10個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: testCutForSearch

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def testCutForSearch(self):
        for content in test_contents:
            result = jieba.cut_for_search(content)
            assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error"
            result = list(result)
            assert isinstance(result, list), "Test CutForSearch error on content: %s" % content
            print(" , ".join(result), file=sys.stderr)
        print("testCutForSearch", file=sys.stderr) 
開發者ID:deepcs233,項目名稱:jieba_fast,代碼行數:10,代碼來源:jieba_test.py

示例2: testCutForSearch_NOHMM

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def testCutForSearch_NOHMM(self):
        for content in test_contents:
            result = jieba.cut_for_search(content,HMM=False)
            assert isinstance(result, types.GeneratorType), "Test CutForSearch Generator error"
            result = list(result)
            assert isinstance(result, list), "Test CutForSearch error on content: %s" % content
            print(" , ".join(result), file=sys.stderr)
        print("testCutForSearch_NOHMM", file=sys.stderr) 
開發者ID:deepcs233,項目名稱:jieba_fast,代碼行數:10,代碼來源:jieba_test.py

示例3: cuttest

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def cuttest(test_sent):
    result = jieba.cut_for_search(test_sent)
    for word in result:
        print(word, "/", end=' ') 
    print("") 
開發者ID:deepcs233,項目名稱:jieba_fast,代碼行數:7,代碼來源:test_cut_for_search.py

示例4: run

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def run(self):
        seg_list = jieba.cut("我來到北京清華大學",cut_all=True)
        print("Full Mode:" + "/ ".join(seg_list)) #全模式

        seg_list = jieba.cut("我來到北京清華大學",cut_all=False)
        print("Default Mode:" + "/ ".join(seg_list)) #默認模式

        seg_list = jieba.cut("他來到了網易杭研大廈")
        print(", ".join(seg_list))

        seg_list = jieba.cut_for_search("小明碩士畢業於中國科學院計算所,後在日本京都大學深造") #搜索引擎模式
        print(", ".join(seg_list)) 
開發者ID:deepcs233,項目名稱:jieba_fast,代碼行數:14,代碼來源:test_multithread.py

示例5: cache_raw_seg

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def cache_raw_seg(self):
        config = yaml.safe_load(open("./application.yml"))
        r = redis.StrictRedis(host=config['redis']['host'], port=config['redis']['port'], db=config['redis']['db'])
        for i in range(0, len(self.sentences)):
            raw_word_seg_list = jieba.cut_for_search(self.sentences[i].raw_sentence)
            sentence_seg_id = 'article:' + self.article_id + ':raw_seg:' + str(i)
            for raw_word_seg in raw_word_seg_list:
                r.sadd(sentence_seg_id, raw_word_seg) 
開發者ID:galaxyyao,項目名稱:public-opinion-analysis,代碼行數:10,代碼來源:article.py

示例6: cut_for_search

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def cut_for_search(self,sentence, stopword=True):
        seg_list = jieba.cut_for_search(sentence)

        results = []
        for seg in seg_list:
            if stopword and seg in self.stopwords:
                continue
            results.append(seg)

        return results 
開發者ID:WenRichard,項目名稱:Customer-Chatbot,代碼行數:12,代碼來源:jiebaSegment.py

示例7: search

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def search(self, keywords, start=0, length=20):
        """
        搜索關鍵字
        """
        seg_list = list(jieba.cut_for_search(keywords))
        key_list = self.search_by_words(seg_list, start, length)
        return key_list 
開發者ID:ziyueit,項目名稱:min,代碼行數:9,代碼來源:min.py

示例8: add_content

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def add_content(self, content, obj_key):
        """
        添加文檔到索引
        """
        seg_list = jieba.cut_for_search(content)
        seg_list = min_nlp.get_weight(seg_list)
        self.add_word_index(seg_list, obj_key) 
開發者ID:ziyueit,項目名稱:min,代碼行數:9,代碼來源:min.py

示例9: GET

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def GET(self):
        data=web.input()
        if data:
            searchword=data.searchword
        else:
            searchword=''
        news_list=list()
        topic=list()
        if searchword:
            cut = jieba.cut_for_search(searchword)
            word_list = []
            for word in cut:
                if word not in punct and word not in Letters_and_numbers:
                    word_list.append(word.encode("utf-8"))
            topK=query.calculate(word_list,config.query_return_numbers)
            for k in topK:
                data = dict()
                title, content, url= id_index.get_data(k)
                data['id'] = k
                data['content'] = content.decode("utf-8")[:config.query_return_snipper_size]
                data['title']=title.decode("utf-8")
                data['url'] = url.decode("utf-8")
                news_list.append(data)
            del data,cut,word_list,word,topK,title,content,url
            #word2Vec推薦相似主題
            word2vec.cal(searchword.encode('utf-8'))
            print word2vec.result.length
            if word2vec.result.length==0:#詞不存在,長度為1
                pass
            else:
                for i in range(config.recommand_topic_numbers):
                    topic.append(word2vec.result.word[i].char)
        return render.index(searchword,news_list,topic) 
開發者ID:Google1234,項目名稱:Information_retrieva_Projectl-,代碼行數:35,代碼來源:main.py

示例10: calculate

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import cut_for_search [as 別名]
def calculate(self,doc_id,Top_numbers=10,multiple=10):
        title,content,url=self.index.get_data(doc_id)
        cut=jieba.cut_for_search(content)
        word_list=[]
        for word in cut:
            if  word not in self.punct and word not in self.Letters_and_numbers :
                #計算文檔間相似度,必須去停用詞,否則太慢
                if self.stopword.has_key(word.encode("utf-8")):
                    pass
                else:
                    word_list.append(word.encode("utf-8"))
	return self.FastCos.calculate(word_list,Top_numbers,multiple) 
開發者ID:Google1234,項目名稱:Information_retrieva_Projectl-,代碼行數:14,代碼來源:similar_doc.py


注:本文中的jieba.cut_for_search方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。