本文整理匯總了Python中gensim.models.phrases.Phrases類的典型用法代碼示例。如果您正苦於以下問題:Python Phrases類的具體用法?Python Phrases怎麽用?Python Phrases使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了Phrases類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: __init__
def __init__(self):
'''
Training parameters:
'''
self.w2v_dim=100
self.num_feature=400
self.batch_size=16
self.num_epoch=30
# self.w2v_model=Word2Vec.load_word2vec_format('./data/word2vec/GoogleNews-vectors-negative300.bin', binary=True)
self.w2v_model=Word2Vec.load('./data/word2vec/w2v.model')
self.index2word_set = set(self.w2v_model.index2word)
#self.bigram=None
#self.trigram=None
self.bigram=Phrases.load('./data/bigram.dat')
self.trigram=Phrases.load('./data/trigram.dat')
print('Build model...')
self.model = Sequential()
self.model.add(Dropout(0.2,input_shape=(self.num_feature,)))
self.model.add(Dense(3, input_dim=self.num_feature, init='orthogonal'))
self.model.add(Activation('softmax'))
self.model.compile(loss='categorical_crossentropy', optimizer='adam', class_mode="categorical")
print('Model has been built!')
示例2: testScoringDefault
def testScoringDefault(self):
""" test the default scoring, from the mikolov word2vec paper """
bigram = Phrases(self.sentences, min_count=1, threshold=1, common_terms=self.common_terms)
seen_scores = set()
test_sentences = [['data', 'and', 'graph', 'survey', 'for', 'human', 'interface']]
for phrase, score in bigram.export_phrases(test_sentences):
seen_scores.add(round(score, 3))
min_count = float(bigram.min_count)
len_vocab = float(len(bigram.vocab))
graph = float(bigram.vocab[b"graph"])
data = float(bigram.vocab[b"data"])
data_and_graph = float(bigram.vocab[b"data_and_graph"])
human = float(bigram.vocab[b"human"])
interface = float(bigram.vocab[b"interface"])
human_interface = float(bigram.vocab[b"human_interface"])
assert seen_scores == set([
# score for data and graph
round((data_and_graph - min_count) / data / graph * len_vocab, 3),
# score for human interface
round((human_interface - min_count) / human / interface * len_vocab, 3),
])
示例3: build_trigram_model
def build_trigram_model(self,sentences,bigram):
print "In Trigram Model"
trigram = Phrases(bigram[sentences])
dest = self.models + 'trigram_model'
trigram.save(dest)
return trigram
示例4: build
def build(self):
self.phrases = Phrases(self.sentences, min_count=1, threshold=self.threshold)
# run additional merge rounds
for i in range(2, self.bigram_iter + 1):
self.phrases = Phrases(self.sentences, min_count=1, threshold=self.threshold*(1.0/self.decay)**(i-1))
# prune phrases
self.prune()
# save model to file
self.save()
示例5: testExportPhrases
def testExportPhrases(self):
"""Test Phrases bigram export_phrases functionality."""
bigram = Phrases(sentences, min_count=1, threshold=1)
seen_bigrams = set()
for phrase, score in bigram.export_phrases(sentences):
seen_bigrams.add(phrase)
assert seen_bigrams == {b'response time', b'graph minors', b'human interface'}
示例6: testMultipleBigramsSingleEntry
def testMultipleBigramsSingleEntry(self):
""" a single entry should produce multiple bigrams. """
bigram = Phrases(self.sentences, min_count=1, threshold=1)
seen_bigrams = set()
test_sentences = [['graph', 'minors', 'survey', 'human', 'interface']]
for phrase, score in bigram.export_phrases(test_sentences):
seen_bigrams.add(phrase)
assert seen_bigrams == {b'graph minors', b'human interface'}
示例7: testCustomScorer
def testCustomScorer(self):
""" test using a custom scoring function """
bigram = Phrases(self.sentences, min_count=1, threshold=.001, scoring=dumb_scorer)
seen_scores = []
test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
for phrase, score in bigram.export_phrases(test_sentences):
seen_scores.append(score)
assert all(seen_scores) # all scores 1
assert len(seen_scores) == 3 # 'graph minors' and 'survey human' and 'interface system'
示例8: __init__
def __init__(self):
reader = Reader()
print('loading data')
self.X_train=reader.getData(TRAIN)
print('train data has been loaded!')
self.X_valid=reader.getData(DEV)
print('valid data has been loaded!')
self.X_test=reader.getData(TEST)
print('test data has been loaded!')
self.c_title=[]
self.c_body=[]
self.bigram=Phrases.load('./data/bigram.dat')
self.trigram=Phrases.load('./data/trigram.dat')
示例9: testScoringNpmi
def testScoringNpmi(self):
""" test normalized pointwise mutual information scoring """
bigram = Phrases(self.sentences, min_count=1, threshold=.5, scoring='npmi')
seen_scores = set()
test_sentences = [['graph', 'minors', 'survey', 'human', 'interface']]
for phrase, score in bigram.export_phrases(test_sentences):
seen_scores.add(round(score, 3))
assert seen_scores == {
.882, # score for graph minors
.714 # score for human interface
}
示例10: testSaveLoadCustomScorer
def testSaveLoadCustomScorer(self):
""" saving and loading a Phrases object with a custom scorer """
with temporary_file("test.pkl") as fpath:
bigram = Phrases(self.sentences, min_count=1, threshold=.001, scoring=dumb_scorer)
bigram.save(fpath)
bigram_loaded = Phrases.load(fpath)
seen_scores = []
test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
for phrase, score in bigram_loaded.export_phrases(test_sentences):
seen_scores.append(score)
assert all(seen_scores) # all scores 1
assert len(seen_scores) == 3 # 'graph minors' and 'survey human' and 'interface system'
示例11: testExportPhrases
def testExportPhrases(self):
"""Test Phrases bigram export_phrases functionality."""
bigram = Phrases(self.sentences, min_count=1, threshold=1, common_terms=self.common_terms)
seen_bigrams = set()
for phrase, score in bigram.export_phrases(self.sentences):
seen_bigrams.add(phrase)
assert seen_bigrams == set([
b'human interface',
b'graph of trees',
b'data and graph',
b'lack of interest',
])
示例12: testSaveLoad
def testSaveLoad(self):
""" Saving and loading a Phrases object."""
with temporary_file("test.pkl") as fpath:
bigram = Phrases(self.sentences, min_count=1, threshold=1)
bigram.save(fpath)
bigram_loaded = Phrases.load(fpath)
seen_scores = set()
test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
for phrase, score in bigram_loaded.export_phrases(test_sentences):
seen_scores.add(round(score, 3))
assert seen_scores == set([
5.167, # score for graph minors
3.444 # score for human interface
])
示例13: testSaveLoadNoCommonTerms
def testSaveLoadNoCommonTerms(self):
""" Ensure backwards compatibility with old versions of Phrases, before common_terms"""
bigram_loaded = Phrases.load(datapath("phrases-no-common-terms.pkl"))
self.assertEqual(bigram_loaded.common_terms, frozenset())
# can make a phraser, cf #1751
phraser = Phraser(bigram_loaded) # does not raise
phraser[["human", "interface", "survey"]] # does not raise
示例14: __init__
def __init__(self,train_data,dev_data,test_data):
self.train_data=train_data
self.dev_data=dev_data
self.test_data=test_data
# Hyper-parameters
self.learningRate=0.01
self.trainSize=2000
self.testSize=1000
self.totalSize = self.trainSize + self.testSize
self.maxEpochs=10000
self.num_processed=-1
self.w2v_model=Word2Vec.load('./data/word2vec/w2v.model')
self.bigram=Phrases.load('./data/bigram.dat')
self.trigram=Phrases.load('./data/trigram.dat')
示例15: __init__
def __init__(self, sentences, filename=None):
# model parameters
self.sentences = sentences
self.dataset = "CASEREPORT"
self.tokenizer = "RAW"
self.prune_stopwords = stopwords("pubmed")
self.phrases = None
self.threshold = 250
self.decay = 2
self.bigram_iter = 3
# data file path
models_folder = os.path.join(*[os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'models'])
if filename is None:
filename = "PHRASE_%s_%s_%s_%s" % (self.threshold, self.decay, self.dataset, self.tokenizer, )
self.filepath = os.path.join(models_folder, filename)
# does identical model already exists?
model_exists = os.path.isfile(self.filepath)
if model_exists:
logging.info("LOADING - loading phrase data..")
self.phrases = Phrases.load(self.filepath)
else:
logging.info("CREATE - creating phrase data..")
self.build()