本文整理汇总了Python中gensim.models.phrases.Phrases.save方法的典型用法代码示例。如果您正苦于以下问题:Python Phrases.save方法的具体用法?Python Phrases.save怎么用?Python Phrases.save使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models.phrases.Phrases
的用法示例。
在下文中一共展示了Phrases.save方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: build_trigram_model
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
def build_trigram_model(self,sentences,bigram):
print "In Trigram Model"
trigram = Phrases(bigram[sentences])
dest = self.models + 'trigram_model'
trigram.save(dest)
return trigram
示例2: testSaveLoadCustomScorer
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
def testSaveLoadCustomScorer(self):
""" saving and loading a Phrases object with a custom scorer """
with temporary_file("test.pkl") as fpath:
bigram = Phrases(self.sentences, min_count=1, threshold=.001, scoring=dumb_scorer)
bigram.save(fpath)
bigram_loaded = Phrases.load(fpath)
seen_scores = []
test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
for phrase, score in bigram_loaded.export_phrases(test_sentences):
seen_scores.append(score)
assert all(seen_scores) # all scores 1
assert len(seen_scores) == 3 # 'graph minors' and 'survey human' and 'interface system'
示例3: testSaveLoad
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
def testSaveLoad(self):
""" Saving and loading a Phrases object."""
with temporary_file("test.pkl") as fpath:
bigram = Phrases(self.sentences, min_count=1, threshold=1)
bigram.save(fpath)
bigram_loaded = Phrases.load(fpath)
seen_scores = set()
test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
for phrase, score in bigram_loaded.export_phrases(test_sentences):
seen_scores.add(round(score, 3))
assert seen_scores == set([
5.167, # score for graph minors
3.444 # score for human interface
])
示例4: testSaveLoadCustomScorer
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
def testSaveLoadCustomScorer(self):
""" saving and loading a Phrases object with a custom scorer """
try:
bigram = Phrases(self.sentences, min_count=1, threshold=.001, scoring=dumb_scorer)
bigram.save("test_phrases_testSaveLoadCustomScorer_temp_save.pkl")
bigram_loaded = Phrases.load("test_phrases_testSaveLoadCustomScorer_temp_save.pkl")
seen_scores = []
test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
for phrase, score in bigram_loaded.export_phrases(test_sentences):
seen_scores.append(score)
assert all(seen_scores) # all scores 1
assert len(seen_scores) == 3 # 'graph minors' and 'survey human' and 'interface system'
finally:
if os.path.exists("test_phrases_testSaveLoadCustomScorer_temp_save.pkl"):
os.remove("test_phrases_testSaveLoadCustomScorer_temp_save.pkl")
示例5: testSaveLoad
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
def testSaveLoad(self):
""" Saving and loading a Phrases object."""
try:
bigram = Phrases(self.sentences, min_count=1, threshold=1)
bigram.save("test_phrases_testSaveLoad_temp_save.pkl")
bigram_loaded = Phrases.load("test_phrases_testSaveLoad_temp_save.pkl")
seen_scores = set()
test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
for phrase, score in bigram_loaded.export_phrases(test_sentences):
seen_scores.add(round(score, 3))
assert seen_scores == set([
5.167, # score for graph minors
3.444 # score for human interface
])
finally:
if os.path.exists("test_phrases_testSaveLoad_temp_save.pkl"):
os.remove("test_phrases_testSaveLoad_temp_save.pkl")
示例6: testSaveLoadNoScoring
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
def testSaveLoadNoScoring(self):
""" Saving and loading a Phrases object with no scoring parameter.
This should ensure backwards compatibility with old versions of Phrases"""
try:
bigram = Phrases(self.sentences, min_count=1, threshold=1)
del(bigram.scoring)
bigram.save("test_phrases_testSaveLoadNoScoring_temp_save.pkl")
bigram_loaded = Phrases.load("test_phrases_testSaveLoadNoScoring_temp_save.pkl")
seen_scores = set()
test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
for phrase, score in bigram_loaded.export_phrases(test_sentences):
seen_scores.add(round(score, 3))
assert seen_scores == set([
5.167, # score for graph minors
3.444 # score for human interface
])
finally:
if os.path.exists("test_phrases_testSaveLoadNoScoring_temp_save.pkl"):
os.remove("test_phrases_testSaveLoadNoScoring_temp_save.pkl")
示例7: build_bigram_model
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
def build_bigram_model(self,sentences,count):
print "In Bigram Model"
bigram = Phrases(sentences,min_count=count)
dest = self.models + 'bigram_model'
bigram.save(dest)
return bigram
示例8: PmiPhraseDetector
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
class PmiPhraseDetector(object):
"""
Detection using Pointwise Mutual Information (PMI)
"""
def __init__(self, sentences, filename=None):
# model parameters
self.sentences = sentences
self.dataset = "CASEREPORT"
self.tokenizer = "RAW"
self.prune_stopwords = stopwords("pubmed")
self.phrases = None
self.threshold = 250
self.decay = 2
self.bigram_iter = 3
# data file path
models_folder = os.path.join(*[os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'models'])
if filename is None:
filename = "PHRASE_%s_%s_%s_%s" % (self.threshold, self.decay, self.dataset, self.tokenizer, )
self.filepath = os.path.join(models_folder, filename)
# does identical model already exists?
model_exists = os.path.isfile(self.filepath)
if model_exists:
logging.info("LOADING - loading phrase data..")
self.phrases = Phrases.load(self.filepath)
else:
logging.info("CREATE - creating phrase data..")
self.build()
def build(self):
self.phrases = Phrases(self.sentences, min_count=1, threshold=self.threshold)
# run additional merge rounds
for i in range(2, self.bigram_iter + 1):
self.phrases = Phrases(self.sentences, min_count=1, threshold=self.threshold*(1.0/self.decay)**(i-1))
# prune phrases
self.prune()
# save model to file
self.save()
def save(self):
self.phrases.save(self.filepath)
def prune(self, min_reduce=1):
"""
Remove phrases beginning or ending with a stopword.
Also removes phrases appearing less frequently than a threshold.
:param min_reduce: frequency threshold
"""
multiword_phrases = [phrase for phrase in self.phrases.vocab if "_" in phrase]
for phrase in multiword_phrases:
words = phrase.split("_")
first_word, last_word = words[0], words[-1]
if first_word in self.prune_stopwords or last_word in self.prune_stopwords:
del self.phrases.vocab[phrase]
prune_vocab(self.phrases.vocab, min_reduce)
def detect(self, sentence):
return self.phrases[sentence]
def print_phrases(self, threshold=100):
for word in self.phrases.vocab:
if "_" in word and self.phrases.vocab[word] > threshold:
print word, self.phrases.vocab[word]
示例9: Phrases
# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
from gensim.models.phrases import Phrases
from gensim.models.word2vec import LineSentence
sentence_stream=LineSentence('./data/text_cleaned.txt')
bigram = Phrases(sentence_stream,threshold=50.0)
bigram.save('./data/bigram.dat')
trigram = Phrases(bigram[sentence_stream],threshold=50.0)
trigram.save('./data/trigram.dat')