當前位置: 首頁>>代碼示例>>Python>>正文


Python phrases.Phrases類代碼示例

本文整理匯總了Python中gensim.models.phrases.Phrases的典型用法代碼示例。如果您正苦於以下問題:Python Phrases類的具體用法?Python Phrases怎麽用?Python Phrases使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


在下文中一共展示了Phrases類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: __init__

    def __init__(self):

        '''
        Training parameters:
        '''

        self.w2v_dim=100
        self.num_feature=400
        self.batch_size=16
        self.num_epoch=30

        # self.w2v_model=Word2Vec.load_word2vec_format('./data/word2vec/GoogleNews-vectors-negative300.bin', binary=True)
        self.w2v_model=Word2Vec.load('./data/word2vec/w2v.model')

        self.index2word_set = set(self.w2v_model.index2word)

        #self.bigram=None
        #self.trigram=None

        self.bigram=Phrases.load('./data/bigram.dat')
        self.trigram=Phrases.load('./data/trigram.dat')

        print('Build model...')

        self.model = Sequential()
        self.model.add(Dropout(0.2,input_shape=(self.num_feature,)))
        self.model.add(Dense(3, input_dim=self.num_feature, init='orthogonal'))
        self.model.add(Activation('softmax'))


        self.model.compile(loss='categorical_crossentropy', optimizer='adam', class_mode="categorical")

        print('Model has been built!')
開發者ID:hujiewang,項目名稱:research,代碼行數:33,代碼來源:model2.py

示例2: testScoringDefault

    def testScoringDefault(self):
        """ test the default scoring, from the mikolov word2vec paper """
        bigram = Phrases(self.sentences, min_count=1, threshold=1, common_terms=self.common_terms)

        seen_scores = set()

        test_sentences = [['data', 'and', 'graph', 'survey', 'for', 'human', 'interface']]
        for phrase, score in bigram.export_phrases(test_sentences):
            seen_scores.add(round(score, 3))

        min_count = float(bigram.min_count)
        len_vocab = float(len(bigram.vocab))
        graph = float(bigram.vocab[b"graph"])
        data = float(bigram.vocab[b"data"])
        data_and_graph = float(bigram.vocab[b"data_and_graph"])
        human = float(bigram.vocab[b"human"])
        interface = float(bigram.vocab[b"interface"])
        human_interface = float(bigram.vocab[b"human_interface"])

        assert seen_scores == set([
            # score for data and graph
            round((data_and_graph - min_count) / data / graph * len_vocab, 3),
            # score for human interface
            round((human_interface - min_count) / human / interface * len_vocab, 3),
        ])
開發者ID:lopusz,項目名稱:gensim,代碼行數:25,代碼來源:test_phrases.py

示例3: build_trigram_model

 def build_trigram_model(self,sentences,bigram):
     print "In Trigram Model"
     trigram = Phrases(bigram[sentences])
     dest = self.models + 'trigram_model'
     trigram.save(dest)
     
     return trigram
開發者ID:Wushaowei001,項目名稱:article-tagger-system,代碼行數:7,代碼來源:word2vec_builder.py

示例4: build

 def build(self):
     self.phrases = Phrases(self.sentences, min_count=1, threshold=self.threshold)
     # run additional merge rounds
     for i in range(2, self.bigram_iter + 1):
         self.phrases = Phrases(self.sentences, min_count=1, threshold=self.threshold*(1.0/self.decay)**(i-1))
     # prune phrases
     self.prune()
     # save model to file
     self.save()
開發者ID:carriercomm,項目名稱:medical-text,代碼行數:9,代碼來源:phrasedetection.py

示例5: testExportPhrases

    def testExportPhrases(self):
        """Test Phrases bigram export_phrases functionality."""
        bigram = Phrases(sentences, min_count=1, threshold=1)

        seen_bigrams = set()

        for phrase, score in bigram.export_phrases(sentences):
            seen_bigrams.add(phrase)

        assert seen_bigrams == {b'response time', b'graph minors', b'human interface'}
開發者ID:rmalouf,項目名稱:gensim,代碼行數:10,代碼來源:test_phrases.py

示例6: testMultipleBigramsSingleEntry

    def testMultipleBigramsSingleEntry(self):
        """ a single entry should produce multiple bigrams. """
        bigram = Phrases(self.sentences, min_count=1, threshold=1)
        seen_bigrams = set()

        test_sentences = [['graph', 'minors', 'survey', 'human', 'interface']]
        for phrase, score in bigram.export_phrases(test_sentences):
            seen_bigrams.add(phrase)

        assert seen_bigrams == {b'graph minors', b'human interface'}
開發者ID:lopusz,項目名稱:gensim,代碼行數:10,代碼來源:test_phrases.py

示例7: testCustomScorer

    def testCustomScorer(self):
        """ test using a custom scoring function """

        bigram = Phrases(self.sentences, min_count=1, threshold=.001, scoring=dumb_scorer)

        seen_scores = []
        test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
        for phrase, score in bigram.export_phrases(test_sentences):
            seen_scores.append(score)

        assert all(seen_scores)  # all scores 1
        assert len(seen_scores) == 3  # 'graph minors' and 'survey human' and 'interface system'
開發者ID:lopusz,項目名稱:gensim,代碼行數:12,代碼來源:test_phrases.py

示例8: __init__

 def __init__(self):
     reader = Reader()
     print('loading data')
     self.X_train=reader.getData(TRAIN)
     print('train data has been loaded!')
     self.X_valid=reader.getData(DEV)
     print('valid data has been loaded!')
     self.X_test=reader.getData(TEST)
     print('test data has been loaded!')
     self.c_title=[]
     self.c_body=[]
     self.bigram=Phrases.load('./data/bigram.dat')
     self.trigram=Phrases.load('./data/trigram.dat')
開發者ID:hujiewang,項目名稱:research,代碼行數:13,代碼來源:analysis.py

示例9: testScoringNpmi

    def testScoringNpmi(self):
        """ test normalized pointwise mutual information scoring """
        bigram = Phrases(self.sentences, min_count=1, threshold=.5, scoring='npmi')

        seen_scores = set()
        test_sentences = [['graph', 'minors', 'survey', 'human', 'interface']]
        for phrase, score in bigram.export_phrases(test_sentences):
            seen_scores.add(round(score, 3))

        assert seen_scores == {
            .882,  # score for graph minors
            .714  # score for human interface
        }
開發者ID:lopusz,項目名稱:gensim,代碼行數:13,代碼來源:test_phrases.py

示例10: testSaveLoadCustomScorer

    def testSaveLoadCustomScorer(self):
        """ saving and loading a Phrases object with a custom scorer """

        with temporary_file("test.pkl") as fpath:
            bigram = Phrases(self.sentences, min_count=1, threshold=.001, scoring=dumb_scorer)
            bigram.save(fpath)
            bigram_loaded = Phrases.load(fpath)
            seen_scores = []
            test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
            for phrase, score in bigram_loaded.export_phrases(test_sentences):
                seen_scores.append(score)

            assert all(seen_scores)  # all scores 1
            assert len(seen_scores) == 3  # 'graph minors' and 'survey human' and 'interface system'
開發者ID:lopusz,項目名稱:gensim,代碼行數:14,代碼來源:test_phrases.py

示例11: testExportPhrases

    def testExportPhrases(self):
        """Test Phrases bigram export_phrases functionality."""
        bigram = Phrases(self.sentences, min_count=1, threshold=1, common_terms=self.common_terms)

        seen_bigrams = set()

        for phrase, score in bigram.export_phrases(self.sentences):
            seen_bigrams.add(phrase)

        assert seen_bigrams == set([
            b'human interface',
            b'graph of trees',
            b'data and graph',
            b'lack of interest',
        ])
開發者ID:lopusz,項目名稱:gensim,代碼行數:15,代碼來源:test_phrases.py

示例12: testSaveLoad

    def testSaveLoad(self):
        """ Saving and loading a Phrases object."""

        with temporary_file("test.pkl") as fpath:
            bigram = Phrases(self.sentences, min_count=1, threshold=1)
            bigram.save(fpath)
            bigram_loaded = Phrases.load(fpath)
            seen_scores = set()
            test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
            for phrase, score in bigram_loaded.export_phrases(test_sentences):
                seen_scores.add(round(score, 3))

            assert seen_scores == set([
                5.167,  # score for graph minors
                3.444  # score for human interface
            ])
開發者ID:lopusz,項目名稱:gensim,代碼行數:16,代碼來源:test_phrases.py

示例13: testSaveLoadNoCommonTerms

 def testSaveLoadNoCommonTerms(self):
     """ Ensure backwards compatibility with old versions of Phrases, before common_terms"""
     bigram_loaded = Phrases.load(datapath("phrases-no-common-terms.pkl"))
     self.assertEqual(bigram_loaded.common_terms, frozenset())
     # can make a phraser, cf #1751
     phraser = Phraser(bigram_loaded)  # does not raise
     phraser[["human", "interface", "survey"]]  # does not raise
開發者ID:lopusz,項目名稱:gensim,代碼行數:7,代碼來源:test_phrases.py

示例14: __init__

    def __init__(self,train_data,dev_data,test_data):
        self.train_data=train_data
        self.dev_data=dev_data
        self.test_data=test_data

        # Hyper-parameters
        self.learningRate=0.01
        self.trainSize=2000
        self.testSize=1000
        self.totalSize = self.trainSize + self.testSize
        self.maxEpochs=10000
        self.num_processed=-1

        self.w2v_model=Word2Vec.load('./data/word2vec/w2v.model')
        self.bigram=Phrases.load('./data/bigram.dat')
        self.trigram=Phrases.load('./data/trigram.dat')
開發者ID:hujiewang,項目名稱:research,代碼行數:16,代碼來源:model.py

示例15: __init__

    def __init__(self, sentences, filename=None):

        # model parameters
        self.sentences = sentences
        self.dataset = "CASEREPORT"
        self.tokenizer = "RAW"
        self.prune_stopwords = stopwords("pubmed")
        self.phrases = None
        self.threshold = 250
        self.decay = 2
        self.bigram_iter = 3

        # data file path
        models_folder = os.path.join(*[os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'models'])
        if filename is None:
            filename = "PHRASE_%s_%s_%s_%s" % (self.threshold, self.decay, self.dataset, self.tokenizer, )
        self.filepath = os.path.join(models_folder, filename)

        # does identical model already exists?
        model_exists = os.path.isfile(self.filepath)
        if model_exists:
            logging.info("LOADING - loading phrase data..")
            self.phrases = Phrases.load(self.filepath)
        else:
            logging.info("CREATE - creating phrase data..")
            self.build()
開發者ID:carriercomm,項目名稱:medical-text,代碼行數:26,代碼來源:phrasedetection.py


注:本文中的gensim.models.phrases.Phrases類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。