当前位置: 首页>>代码示例>>Python>>正文


Python Phrases.load方法代码示例

本文整理汇总了Python中gensim.models.phrases.Phrases.load方法的典型用法代码示例。如果您正苦于以下问题:Python Phrases.load方法的具体用法?Python Phrases.load怎么用?Python Phrases.load使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.models.phrases.Phrases的用法示例。


在下文中一共展示了Phrases.load方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
    def __init__(self):

        '''
        Training parameters:
        '''

        self.w2v_dim=100
        self.num_feature=400
        self.batch_size=16
        self.num_epoch=30

        # self.w2v_model=Word2Vec.load_word2vec_format('./data/word2vec/GoogleNews-vectors-negative300.bin', binary=True)
        self.w2v_model=Word2Vec.load('./data/word2vec/w2v.model')

        self.index2word_set = set(self.w2v_model.index2word)

        #self.bigram=None
        #self.trigram=None

        self.bigram=Phrases.load('./data/bigram.dat')
        self.trigram=Phrases.load('./data/trigram.dat')

        print('Build model...')

        self.model = Sequential()
        self.model.add(Dropout(0.2,input_shape=(self.num_feature,)))
        self.model.add(Dense(3, input_dim=self.num_feature, init='orthogonal'))
        self.model.add(Activation('softmax'))


        self.model.compile(loss='categorical_crossentropy', optimizer='adam', class_mode="categorical")

        print('Model has been built!')
开发者ID:hujiewang,项目名称:research,代码行数:35,代码来源:model2.py

示例2: __init__

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
 def __init__(self):
     reader = Reader()
     print('loading data')
     self.X_train=reader.getData(TRAIN)
     print('train data has been loaded!')
     self.X_valid=reader.getData(DEV)
     print('valid data has been loaded!')
     self.X_test=reader.getData(TEST)
     print('test data has been loaded!')
     self.c_title=[]
     self.c_body=[]
     self.bigram=Phrases.load('./data/bigram.dat')
     self.trigram=Phrases.load('./data/trigram.dat')
开发者ID:hujiewang,项目名称:research,代码行数:15,代码来源:analysis.py

示例3: __init__

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
    def __init__(self,train_data,dev_data,test_data):
        self.train_data=train_data
        self.dev_data=dev_data
        self.test_data=test_data

        # Hyper-parameters
        self.learningRate=0.01
        self.trainSize=2000
        self.testSize=1000
        self.totalSize = self.trainSize + self.testSize
        self.maxEpochs=10000
        self.num_processed=-1

        self.w2v_model=Word2Vec.load('./data/word2vec/w2v.model')
        self.bigram=Phrases.load('./data/bigram.dat')
        self.trigram=Phrases.load('./data/trigram.dat')
开发者ID:hujiewang,项目名称:research,代码行数:18,代码来源:model.py

示例4: __init__

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
    def __init__(self, sentences, filename=None):

        # model parameters
        self.sentences = sentences
        self.dataset = "CASEREPORT"
        self.tokenizer = "RAW"
        self.prune_stopwords = stopwords("pubmed")
        self.phrases = None
        self.threshold = 250
        self.decay = 2
        self.bigram_iter = 3

        # data file path
        models_folder = os.path.join(*[os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'models'])
        if filename is None:
            filename = "PHRASE_%s_%s_%s_%s" % (self.threshold, self.decay, self.dataset, self.tokenizer, )
        self.filepath = os.path.join(models_folder, filename)

        # does identical model already exists?
        model_exists = os.path.isfile(self.filepath)
        if model_exists:
            logging.info("LOADING - loading phrase data..")
            self.phrases = Phrases.load(self.filepath)
        else:
            logging.info("CREATE - creating phrase data..")
            self.build()
开发者ID:carriercomm,项目名称:medical-text,代码行数:28,代码来源:phrasedetection.py

示例5: testSaveLoadNoCommonTerms

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
 def testSaveLoadNoCommonTerms(self):
     """ Ensure backwards compatibility with old versions of Phrases, before common_terms"""
     bigram_loaded = Phrases.load(datapath("phrases-no-common-terms.pkl"))
     self.assertEqual(bigram_loaded.common_terms, frozenset())
     # can make a phraser, cf #1751
     phraser = Phraser(bigram_loaded)  # does not raise
     phraser[["human", "interface", "survey"]]  # does not raise
开发者ID:lopusz,项目名称:gensim,代码行数:9,代码来源:test_phrases.py

示例6: testCompatibilty

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
    def testCompatibilty(self):
        phr = Phraser.load(datapath("phraser-3.6.0.model"))
        model = Phrases.load(datapath("phrases-3.6.0.model"))

        test_sentences = ['trees', 'graph', 'minors']
        expected_res = ['trees', 'graph_minors']

        phr_out = phr[test_sentences]
        model_out = model[test_sentences]

        self.assertEqual(phr_out, expected_res)
        self.assertEqual(model_out, expected_res)
开发者ID:RaRe-Technologies,项目名称:gensim,代码行数:14,代码来源:test_phrases.py

示例7: testSaveLoadStringScoring

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
    def testSaveLoadStringScoring(self):
        """ Saving and loading a Phrases object with a string scoring parameter.
        This should ensure backwards compatibility with the previous version of Phrases"""
        bigram_loaded = Phrases.load(datapath("phrases-scoring-str.pkl"))
        seen_scores = set()
        test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
        for phrase, score in bigram_loaded.export_phrases(test_sentences):
            seen_scores.add(round(score, 3))

        assert seen_scores == set([
            5.167,  # score for graph minors
            3.444  # score for human interface
        ])
开发者ID:lopusz,项目名称:gensim,代码行数:15,代码来源:test_phrases.py

示例8: testSaveLoadCustomScorer

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
    def testSaveLoadCustomScorer(self):
        """ saving and loading a Phrases object with a custom scorer """

        with temporary_file("test.pkl") as fpath:
            bigram = Phrases(self.sentences, min_count=1, threshold=.001, scoring=dumb_scorer)
            bigram.save(fpath)
            bigram_loaded = Phrases.load(fpath)
            seen_scores = []
            test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
            for phrase, score in bigram_loaded.export_phrases(test_sentences):
                seen_scores.append(score)

            assert all(seen_scores)  # all scores 1
            assert len(seen_scores) == 3  # 'graph minors' and 'survey human' and 'interface system'
开发者ID:lopusz,项目名称:gensim,代码行数:16,代码来源:test_phrases.py

示例9: __init__

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
    def __init__(self):

        '''
        Training parameters:
        '''

        self.w2v_dim=100
        self.num_feature=400
        self.batch_size=16
        self.num_epoch=1

        #self.w2v_model=Word2Vec.load_word2vec_format('./data/word2vec/GoogleNews-vectors-negative300.bin', binary=True)
        self.w2v_model=Word2Vec.load('./data/word2vec/w2v.model')
        self.index2word_set = set(self.w2v_model.index2word)
        self.bigram=Phrases.load('./data/bigram.dat')
        self.trigram=Phrases.load('./data/trigram.dat')

        print('Build model...')

        param_dist = {
            "n_estimators":sp_randint(20,250),
            "criterion": ["gini", "entropy"],
            "max_depth": sp_randint(10, 300),
            "min_samples_split": sp_randint(1, 30),
            "min_samples_leaf": sp_randint(1, 30),
            "max_features": sp_randint(1, 200),
            "bootstrap": [True, False],
            'random_state':sp_randint(1, 1000000),
        }
        # build a classifier
        clf = RandomForestClassifier(n_jobs=8)
        # run randomized search
        self.model=RandomizedSearchCV(clf, param_distributions=param_dist,
                                   n_iter=10,cv=9,n_jobs=8)

        print('Model has been built!')
开发者ID:hujiewang,项目名称:research,代码行数:38,代码来源:model3.py

示例10: testSaveLoad

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
    def testSaveLoad(self):
        """ Saving and loading a Phrases object."""

        with temporary_file("test.pkl") as fpath:
            bigram = Phrases(self.sentences, min_count=1, threshold=1)
            bigram.save(fpath)
            bigram_loaded = Phrases.load(fpath)
            seen_scores = set()
            test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
            for phrase, score in bigram_loaded.export_phrases(test_sentences):
                seen_scores.add(round(score, 3))

            assert seen_scores == set([
                5.167,  # score for graph minors
                3.444  # score for human interface
            ])
开发者ID:lopusz,项目名称:gensim,代码行数:18,代码来源:test_phrases.py

示例11: testSaveLoadCustomScorer

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
    def testSaveLoadCustomScorer(self):
        """ saving and loading a Phrases object with a custom scorer """

        try:
            bigram = Phrases(self.sentences, min_count=1, threshold=.001, scoring=dumb_scorer)
            bigram.save("test_phrases_testSaveLoadCustomScorer_temp_save.pkl")
            bigram_loaded = Phrases.load("test_phrases_testSaveLoadCustomScorer_temp_save.pkl")
            seen_scores = []
            test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
            for phrase, score in bigram_loaded.export_phrases(test_sentences):
                seen_scores.append(score)

            assert all(seen_scores)  # all scores 1
            assert len(seen_scores) == 3  # 'graph minors' and 'survey human' and 'interface system'

        finally:
            if os.path.exists("test_phrases_testSaveLoadCustomScorer_temp_save.pkl"):
                os.remove("test_phrases_testSaveLoadCustomScorer_temp_save.pkl")
开发者ID:vishalbelsare,项目名称:gensim,代码行数:20,代码来源:test_phrases.py

示例12: testSaveLoad

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
    def testSaveLoad(self):
        """ Saving and loading a Phrases object."""

        try:
            bigram = Phrases(self.sentences, min_count=1, threshold=1)
            bigram.save("test_phrases_testSaveLoad_temp_save.pkl")
            bigram_loaded = Phrases.load("test_phrases_testSaveLoad_temp_save.pkl")
            seen_scores = set()
            test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
            for phrase, score in bigram_loaded.export_phrases(test_sentences):
                seen_scores.add(round(score, 3))

            assert seen_scores == set([
                5.167,  # score for graph minors
                3.444  # score for human interface
            ])

        finally:
            if os.path.exists("test_phrases_testSaveLoad_temp_save.pkl"):
                os.remove("test_phrases_testSaveLoad_temp_save.pkl")
开发者ID:vishalbelsare,项目名称:gensim,代码行数:22,代码来源:test_phrases.py

示例13: testSaveLoadNoScoring

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
    def testSaveLoadNoScoring(self):
        """ Saving and loading a Phrases object with no scoring parameter.
        This should ensure backwards compatibility with old versions of Phrases"""

        try:
            bigram = Phrases(self.sentences, min_count=1, threshold=1)
            del(bigram.scoring)
            bigram.save("test_phrases_testSaveLoadNoScoring_temp_save.pkl")
            bigram_loaded = Phrases.load("test_phrases_testSaveLoadNoScoring_temp_save.pkl")
            seen_scores = set()
            test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
            for phrase, score in bigram_loaded.export_phrases(test_sentences):
                seen_scores.add(round(score, 3))

            assert seen_scores == set([
                5.167,  # score for graph minors
                3.444  # score for human interface
            ])

        finally:
            if os.path.exists("test_phrases_testSaveLoadNoScoring_temp_save.pkl"):
                os.remove("test_phrases_testSaveLoadNoScoring_temp_save.pkl")
开发者ID:vishalbelsare,项目名称:gensim,代码行数:24,代码来源:test_phrases.py

示例14: __init__

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
    def __init__(self):
        self.session = tf.Session()
        '''
        Training parameters:
        '''

        self.w2v_dim=100
        self.num_feature=400
        self.batch_size=32
        self.num_epoch=10000
        self.num_hidden_1=3
        self.num_hidden_2=3

        self.number_of_layers=3

        #self.max_len = 50
        self.max_len_title=6
        self.max_len_body=38

        self.d2v_model=Doc2Vec.load('data/word2vec/d2v.model')
        #self.bigram = None
        #self.trigram =None
        self.bigram=Phrases.load('./data/bigram.dat')
        self.trigram=Phrases.load('./data/trigram.dat')

        # Model
        self.input=tf.placeholder(tf.float32,[None,self.w2v_dim*4])


        self.dropout_input = tf.placeholder(tf.float32)
        self.dropout_hidden = tf.placeholder(tf.float32)

        self.target = tf.placeholder(tf.float32, [None, 3])


         # 2-layer NN
        # 2-layer NN
        with tf.variable_scope("NN", initializer=tf.random_uniform_initializer()):
            W_1 = tf.get_variable("W_1", [self.w2v_dim*4, self.num_hidden_1])
            b_1 = tf.get_variable("b_1", [self.num_hidden_1])
            # W_2 = tf.get_variable("W_2", [self.num_hidden_1, self.num_hidden_2])
            # b_2 = tf.get_variable("b_2", [self.num_hidden_2])

            # input = tf.nn.dropout(input, self.dropout_input)
            # y_1 = tf.sigmoid(tf.matmul(self.input, W_1)+b_1)
            # y_1 = tf.nn.dropout(y_1, self.dropout_hidden)
            # y_2 = tf.matmul(y_1, W_2)+b_2
        y_2 = tf.matmul(self.input, W_1)+b_1

        self.y_pred=tf.nn.softmax(y_2)
        self.y_pred=tf.clip_by_value(self.y_pred,1e-7, 1.0)
        self.cross_entropy = -tf.reduce_mean(self.target*tf.log(self.y_pred))


        # Optimizer.

        global_step = tf.Variable(0)
        # optimizer = tf.train.GradientDescentOptimizer(0.1)
        # optimizer = tf.train.AdamOptimizer(0.01)
        # gradients, v = zip(*optimizer.compute_gradients(self.cross_entropy))
        # gradients, _ = tf.clip_by_global_norm(gradients, 50)
        # self.optimizer= optimizer.apply_gradients(zip(gradients, v), global_step=global_step)
        self.optimizer = tf.train.AdamOptimizer(0.01).minimize(self.cross_entropy)


        print('Model has been built!')
开发者ID:hujiewang,项目名称:research,代码行数:68,代码来源:model7.py

示例15: Reader

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
from reader import Reader,TRAIN,TEST,DEV,EXTRA
from preprocess import preprocess
from gensim.models.phrases import Phrases
reader = Reader()
sentences=reader.getText(TRAIN+EXTRA)
# use phrase only when it has already trained


bigram=Phrases.load('./data/bigram.dat')
trigram=Phrases.load('./data/trigram.dat')
sen_set=set()
with open('./data/text_cleaned_phrase.txt','w') as f:
    for sentence in sentences:
        s=preprocess(sentence,bigram=bigram,trigram=trigram)
        if s not in sen_set:
            sen_set.add(s)
            f.write(preprocess(sentence,bigram=bigram,trigram=trigram))
            f.write('\n')


'''
# for phrase training only

with open('./data/text_cleaned.txt','w') as f:
    for sentence in sentences:
        f.write(preprocess(sentence,no_stopwords=True))
        f.write('\n')
'''
开发者ID:hujiewang,项目名称:research,代码行数:30,代码来源:getText.py


注:本文中的gensim.models.phrases.Phrases.load方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。