本文整理汇总了Python中gensim.models.phrases.Phrases.load方法的典型用法代码示例。如果您正苦于以下问题:Python Phrases.load方法的具体用法?Python Phrases.load怎么用?Python Phrases.load使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models.phrases.Phrases
的用法示例。
在下文中一共展示了Phrases.load方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
def __init__(self):
'''
Training parameters:
'''
self.w2v_dim=100
self.num_feature=400
self.batch_size=16
self.num_epoch=30
# self.w2v_model=Word2Vec.load_word2vec_format('./data/word2vec/GoogleNews-vectors-negative300.bin', binary=True)
self.w2v_model=Word2Vec.load('./data/word2vec/w2v.model')
self.index2word_set = set(self.w2v_model.index2word)
#self.bigram=None
#self.trigram=None
self.bigram=Phrases.load('./data/bigram.dat')
self.trigram=Phrases.load('./data/trigram.dat')
print('Build model...')
self.model = Sequential()
self.model.add(Dropout(0.2,input_shape=(self.num_feature,)))
self.model.add(Dense(3, input_dim=self.num_feature, init='orthogonal'))
self.model.add(Activation('softmax'))
self.model.compile(loss='categorical_crossentropy', optimizer='adam', class_mode="categorical")
print('Model has been built!')
示例2: __init__
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
def __init__(self):
reader = Reader()
print('loading data')
self.X_train=reader.getData(TRAIN)
print('train data has been loaded!')
self.X_valid=reader.getData(DEV)
print('valid data has been loaded!')
self.X_test=reader.getData(TEST)
print('test data has been loaded!')
self.c_title=[]
self.c_body=[]
self.bigram=Phrases.load('./data/bigram.dat')
self.trigram=Phrases.load('./data/trigram.dat')
示例3: __init__
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
def __init__(self,train_data,dev_data,test_data):
self.train_data=train_data
self.dev_data=dev_data
self.test_data=test_data
# Hyper-parameters
self.learningRate=0.01
self.trainSize=2000
self.testSize=1000
self.totalSize = self.trainSize + self.testSize
self.maxEpochs=10000
self.num_processed=-1
self.w2v_model=Word2Vec.load('./data/word2vec/w2v.model')
self.bigram=Phrases.load('./data/bigram.dat')
self.trigram=Phrases.load('./data/trigram.dat')
示例4: __init__
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
def __init__(self, sentences, filename=None):
# model parameters
self.sentences = sentences
self.dataset = "CASEREPORT"
self.tokenizer = "RAW"
self.prune_stopwords = stopwords("pubmed")
self.phrases = None
self.threshold = 250
self.decay = 2
self.bigram_iter = 3
# data file path
models_folder = os.path.join(*[os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'models'])
if filename is None:
filename = "PHRASE_%s_%s_%s_%s" % (self.threshold, self.decay, self.dataset, self.tokenizer, )
self.filepath = os.path.join(models_folder, filename)
# does identical model already exists?
model_exists = os.path.isfile(self.filepath)
if model_exists:
logging.info("LOADING - loading phrase data..")
self.phrases = Phrases.load(self.filepath)
else:
logging.info("CREATE - creating phrase data..")
self.build()
示例5: testSaveLoadNoCommonTerms
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
def testSaveLoadNoCommonTerms(self):
""" Ensure backwards compatibility with old versions of Phrases, before common_terms"""
bigram_loaded = Phrases.load(datapath("phrases-no-common-terms.pkl"))
self.assertEqual(bigram_loaded.common_terms, frozenset())
# can make a phraser, cf #1751
phraser = Phraser(bigram_loaded) # does not raise
phraser[["human", "interface", "survey"]] # does not raise
示例6: testCompatibilty
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
def testCompatibilty(self):
phr = Phraser.load(datapath("phraser-3.6.0.model"))
model = Phrases.load(datapath("phrases-3.6.0.model"))
test_sentences = ['trees', 'graph', 'minors']
expected_res = ['trees', 'graph_minors']
phr_out = phr[test_sentences]
model_out = model[test_sentences]
self.assertEqual(phr_out, expected_res)
self.assertEqual(model_out, expected_res)
示例7: testSaveLoadStringScoring
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
def testSaveLoadStringScoring(self):
""" Saving and loading a Phrases object with a string scoring parameter.
This should ensure backwards compatibility with the previous version of Phrases"""
bigram_loaded = Phrases.load(datapath("phrases-scoring-str.pkl"))
seen_scores = set()
test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
for phrase, score in bigram_loaded.export_phrases(test_sentences):
seen_scores.add(round(score, 3))
assert seen_scores == set([
5.167, # score for graph minors
3.444 # score for human interface
])
示例8: testSaveLoadCustomScorer
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
def testSaveLoadCustomScorer(self):
""" saving and loading a Phrases object with a custom scorer """
with temporary_file("test.pkl") as fpath:
bigram = Phrases(self.sentences, min_count=1, threshold=.001, scoring=dumb_scorer)
bigram.save(fpath)
bigram_loaded = Phrases.load(fpath)
seen_scores = []
test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
for phrase, score in bigram_loaded.export_phrases(test_sentences):
seen_scores.append(score)
assert all(seen_scores) # all scores 1
assert len(seen_scores) == 3 # 'graph minors' and 'survey human' and 'interface system'
示例9: __init__
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
def __init__(self):
'''
Training parameters:
'''
self.w2v_dim=100
self.num_feature=400
self.batch_size=16
self.num_epoch=1
#self.w2v_model=Word2Vec.load_word2vec_format('./data/word2vec/GoogleNews-vectors-negative300.bin', binary=True)
self.w2v_model=Word2Vec.load('./data/word2vec/w2v.model')
self.index2word_set = set(self.w2v_model.index2word)
self.bigram=Phrases.load('./data/bigram.dat')
self.trigram=Phrases.load('./data/trigram.dat')
print('Build model...')
param_dist = {
"n_estimators":sp_randint(20,250),
"criterion": ["gini", "entropy"],
"max_depth": sp_randint(10, 300),
"min_samples_split": sp_randint(1, 30),
"min_samples_leaf": sp_randint(1, 30),
"max_features": sp_randint(1, 200),
"bootstrap": [True, False],
'random_state':sp_randint(1, 1000000),
}
# build a classifier
clf = RandomForestClassifier(n_jobs=8)
# run randomized search
self.model=RandomizedSearchCV(clf, param_distributions=param_dist,
n_iter=10,cv=9,n_jobs=8)
print('Model has been built!')
示例10: testSaveLoad
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
def testSaveLoad(self):
""" Saving and loading a Phrases object."""
with temporary_file("test.pkl") as fpath:
bigram = Phrases(self.sentences, min_count=1, threshold=1)
bigram.save(fpath)
bigram_loaded = Phrases.load(fpath)
seen_scores = set()
test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
for phrase, score in bigram_loaded.export_phrases(test_sentences):
seen_scores.add(round(score, 3))
assert seen_scores == set([
5.167, # score for graph minors
3.444 # score for human interface
])
示例11: testSaveLoadCustomScorer
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
def testSaveLoadCustomScorer(self):
""" saving and loading a Phrases object with a custom scorer """
try:
bigram = Phrases(self.sentences, min_count=1, threshold=.001, scoring=dumb_scorer)
bigram.save("test_phrases_testSaveLoadCustomScorer_temp_save.pkl")
bigram_loaded = Phrases.load("test_phrases_testSaveLoadCustomScorer_temp_save.pkl")
seen_scores = []
test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
for phrase, score in bigram_loaded.export_phrases(test_sentences):
seen_scores.append(score)
assert all(seen_scores) # all scores 1
assert len(seen_scores) == 3 # 'graph minors' and 'survey human' and 'interface system'
finally:
if os.path.exists("test_phrases_testSaveLoadCustomScorer_temp_save.pkl"):
os.remove("test_phrases_testSaveLoadCustomScorer_temp_save.pkl")
示例12: testSaveLoad
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
def testSaveLoad(self):
""" Saving and loading a Phrases object."""
try:
bigram = Phrases(self.sentences, min_count=1, threshold=1)
bigram.save("test_phrases_testSaveLoad_temp_save.pkl")
bigram_loaded = Phrases.load("test_phrases_testSaveLoad_temp_save.pkl")
seen_scores = set()
test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
for phrase, score in bigram_loaded.export_phrases(test_sentences):
seen_scores.add(round(score, 3))
assert seen_scores == set([
5.167, # score for graph minors
3.444 # score for human interface
])
finally:
if os.path.exists("test_phrases_testSaveLoad_temp_save.pkl"):
os.remove("test_phrases_testSaveLoad_temp_save.pkl")
示例13: testSaveLoadNoScoring
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
def testSaveLoadNoScoring(self):
""" Saving and loading a Phrases object with no scoring parameter.
This should ensure backwards compatibility with old versions of Phrases"""
try:
bigram = Phrases(self.sentences, min_count=1, threshold=1)
del(bigram.scoring)
bigram.save("test_phrases_testSaveLoadNoScoring_temp_save.pkl")
bigram_loaded = Phrases.load("test_phrases_testSaveLoadNoScoring_temp_save.pkl")
seen_scores = set()
test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
for phrase, score in bigram_loaded.export_phrases(test_sentences):
seen_scores.add(round(score, 3))
assert seen_scores == set([
5.167, # score for graph minors
3.444 # score for human interface
])
finally:
if os.path.exists("test_phrases_testSaveLoadNoScoring_temp_save.pkl"):
os.remove("test_phrases_testSaveLoadNoScoring_temp_save.pkl")
示例14: __init__
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
def __init__(self):
self.session = tf.Session()
'''
Training parameters:
'''
self.w2v_dim=100
self.num_feature=400
self.batch_size=32
self.num_epoch=10000
self.num_hidden_1=3
self.num_hidden_2=3
self.number_of_layers=3
#self.max_len = 50
self.max_len_title=6
self.max_len_body=38
self.d2v_model=Doc2Vec.load('data/word2vec/d2v.model')
#self.bigram = None
#self.trigram =None
self.bigram=Phrases.load('./data/bigram.dat')
self.trigram=Phrases.load('./data/trigram.dat')
# Model
self.input=tf.placeholder(tf.float32,[None,self.w2v_dim*4])
self.dropout_input = tf.placeholder(tf.float32)
self.dropout_hidden = tf.placeholder(tf.float32)
self.target = tf.placeholder(tf.float32, [None, 3])
# 2-layer NN
# 2-layer NN
with tf.variable_scope("NN", initializer=tf.random_uniform_initializer()):
W_1 = tf.get_variable("W_1", [self.w2v_dim*4, self.num_hidden_1])
b_1 = tf.get_variable("b_1", [self.num_hidden_1])
# W_2 = tf.get_variable("W_2", [self.num_hidden_1, self.num_hidden_2])
# b_2 = tf.get_variable("b_2", [self.num_hidden_2])
# input = tf.nn.dropout(input, self.dropout_input)
# y_1 = tf.sigmoid(tf.matmul(self.input, W_1)+b_1)
# y_1 = tf.nn.dropout(y_1, self.dropout_hidden)
# y_2 = tf.matmul(y_1, W_2)+b_2
y_2 = tf.matmul(self.input, W_1)+b_1
self.y_pred=tf.nn.softmax(y_2)
self.y_pred=tf.clip_by_value(self.y_pred,1e-7, 1.0)
self.cross_entropy = -tf.reduce_mean(self.target*tf.log(self.y_pred))
# Optimizer.
global_step = tf.Variable(0)
# optimizer = tf.train.GradientDescentOptimizer(0.1)
# optimizer = tf.train.AdamOptimizer(0.01)
# gradients, v = zip(*optimizer.compute_gradients(self.cross_entropy))
# gradients, _ = tf.clip_by_global_norm(gradients, 50)
# self.optimizer= optimizer.apply_gradients(zip(gradients, v), global_step=global_step)
self.optimizer = tf.train.AdamOptimizer(0.01).minimize(self.cross_entropy)
print('Model has been built!')
示例15: Reader
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import load [as 别名]
from reader import Reader,TRAIN,TEST,DEV,EXTRA
from preprocess import preprocess
from gensim.models.phrases import Phrases
reader = Reader()
sentences=reader.getText(TRAIN+EXTRA)
# use phrase only when it has already trained
bigram=Phrases.load('./data/bigram.dat')
trigram=Phrases.load('./data/trigram.dat')
sen_set=set()
with open('./data/text_cleaned_phrase.txt','w') as f:
for sentence in sentences:
s=preprocess(sentence,bigram=bigram,trigram=trigram)
if s not in sen_set:
sen_set.add(s)
f.write(preprocess(sentence,bigram=bigram,trigram=trigram))
f.write('\n')
'''
# for phrase training only
with open('./data/text_cleaned.txt','w') as f:
for sentence in sentences:
f.write(preprocess(sentence,no_stopwords=True))
f.write('\n')
'''