当前位置: 首页>>代码示例>>Python>>正文


Python Phrases.save方法代码示例

本文整理汇总了Python中gensim.models.phrases.Phrases.save方法的典型用法代码示例。如果您正苦于以下问题:Python Phrases.save方法的具体用法?Python Phrases.save怎么用?Python Phrases.save使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.models.phrases.Phrases的用法示例。


在下文中一共展示了Phrases.save方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: build_trigram_model

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
 def build_trigram_model(self,sentences,bigram):
     print "In Trigram Model"
     trigram = Phrases(bigram[sentences])
     dest = self.models + 'trigram_model'
     trigram.save(dest)
     
     return trigram
开发者ID:Wushaowei001,项目名称:article-tagger-system,代码行数:9,代码来源:word2vec_builder.py

示例2: testSaveLoadCustomScorer

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
    def testSaveLoadCustomScorer(self):
        """ saving and loading a Phrases object with a custom scorer """

        with temporary_file("test.pkl") as fpath:
            bigram = Phrases(self.sentences, min_count=1, threshold=.001, scoring=dumb_scorer)
            bigram.save(fpath)
            bigram_loaded = Phrases.load(fpath)
            seen_scores = []
            test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
            for phrase, score in bigram_loaded.export_phrases(test_sentences):
                seen_scores.append(score)

            assert all(seen_scores)  # all scores 1
            assert len(seen_scores) == 3  # 'graph minors' and 'survey human' and 'interface system'
开发者ID:lopusz,项目名称:gensim,代码行数:16,代码来源:test_phrases.py

示例3: testSaveLoad

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
    def testSaveLoad(self):
        """ Saving and loading a Phrases object."""

        with temporary_file("test.pkl") as fpath:
            bigram = Phrases(self.sentences, min_count=1, threshold=1)
            bigram.save(fpath)
            bigram_loaded = Phrases.load(fpath)
            seen_scores = set()
            test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
            for phrase, score in bigram_loaded.export_phrases(test_sentences):
                seen_scores.add(round(score, 3))

            assert seen_scores == set([
                5.167,  # score for graph minors
                3.444  # score for human interface
            ])
开发者ID:lopusz,项目名称:gensim,代码行数:18,代码来源:test_phrases.py

示例4: testSaveLoadCustomScorer

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
    def testSaveLoadCustomScorer(self):
        """ saving and loading a Phrases object with a custom scorer """

        try:
            bigram = Phrases(self.sentences, min_count=1, threshold=.001, scoring=dumb_scorer)
            bigram.save("test_phrases_testSaveLoadCustomScorer_temp_save.pkl")
            bigram_loaded = Phrases.load("test_phrases_testSaveLoadCustomScorer_temp_save.pkl")
            seen_scores = []
            test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
            for phrase, score in bigram_loaded.export_phrases(test_sentences):
                seen_scores.append(score)

            assert all(seen_scores)  # all scores 1
            assert len(seen_scores) == 3  # 'graph minors' and 'survey human' and 'interface system'

        finally:
            if os.path.exists("test_phrases_testSaveLoadCustomScorer_temp_save.pkl"):
                os.remove("test_phrases_testSaveLoadCustomScorer_temp_save.pkl")
开发者ID:vishalbelsare,项目名称:gensim,代码行数:20,代码来源:test_phrases.py

示例5: testSaveLoad

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
    def testSaveLoad(self):
        """ Saving and loading a Phrases object."""

        try:
            bigram = Phrases(self.sentences, min_count=1, threshold=1)
            bigram.save("test_phrases_testSaveLoad_temp_save.pkl")
            bigram_loaded = Phrases.load("test_phrases_testSaveLoad_temp_save.pkl")
            seen_scores = set()
            test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
            for phrase, score in bigram_loaded.export_phrases(test_sentences):
                seen_scores.add(round(score, 3))

            assert seen_scores == set([
                5.167,  # score for graph minors
                3.444  # score for human interface
            ])

        finally:
            if os.path.exists("test_phrases_testSaveLoad_temp_save.pkl"):
                os.remove("test_phrases_testSaveLoad_temp_save.pkl")
开发者ID:vishalbelsare,项目名称:gensim,代码行数:22,代码来源:test_phrases.py

示例6: testSaveLoadNoScoring

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
    def testSaveLoadNoScoring(self):
        """ Saving and loading a Phrases object with no scoring parameter.
        This should ensure backwards compatibility with old versions of Phrases"""

        try:
            bigram = Phrases(self.sentences, min_count=1, threshold=1)
            del(bigram.scoring)
            bigram.save("test_phrases_testSaveLoadNoScoring_temp_save.pkl")
            bigram_loaded = Phrases.load("test_phrases_testSaveLoadNoScoring_temp_save.pkl")
            seen_scores = set()
            test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
            for phrase, score in bigram_loaded.export_phrases(test_sentences):
                seen_scores.add(round(score, 3))

            assert seen_scores == set([
                5.167,  # score for graph minors
                3.444  # score for human interface
            ])

        finally:
            if os.path.exists("test_phrases_testSaveLoadNoScoring_temp_save.pkl"):
                os.remove("test_phrases_testSaveLoadNoScoring_temp_save.pkl")
开发者ID:vishalbelsare,项目名称:gensim,代码行数:24,代码来源:test_phrases.py

示例7: build_bigram_model

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
 def build_bigram_model(self,sentences,count):
     print "In Bigram Model"
     bigram = Phrases(sentences,min_count=count)
     dest = self.models + 'bigram_model'
     bigram.save(dest)
     return bigram
开发者ID:Wushaowei001,项目名称:article-tagger-system,代码行数:8,代码来源:word2vec_builder.py

示例8: PmiPhraseDetector

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
class PmiPhraseDetector(object):
    """
    Detection using Pointwise Mutual Information (PMI)
    """
    def __init__(self, sentences, filename=None):

        # model parameters
        self.sentences = sentences
        self.dataset = "CASEREPORT"
        self.tokenizer = "RAW"
        self.prune_stopwords = stopwords("pubmed")
        self.phrases = None
        self.threshold = 250
        self.decay = 2
        self.bigram_iter = 3

        # data file path
        models_folder = os.path.join(*[os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data', 'models'])
        if filename is None:
            filename = "PHRASE_%s_%s_%s_%s" % (self.threshold, self.decay, self.dataset, self.tokenizer, )
        self.filepath = os.path.join(models_folder, filename)

        # does identical model already exists?
        model_exists = os.path.isfile(self.filepath)
        if model_exists:
            logging.info("LOADING - loading phrase data..")
            self.phrases = Phrases.load(self.filepath)
        else:
            logging.info("CREATE - creating phrase data..")
            self.build()

    def build(self):
        self.phrases = Phrases(self.sentences, min_count=1, threshold=self.threshold)
        # run additional merge rounds
        for i in range(2, self.bigram_iter + 1):
            self.phrases = Phrases(self.sentences, min_count=1, threshold=self.threshold*(1.0/self.decay)**(i-1))
        # prune phrases
        self.prune()
        # save model to file
        self.save()

    def save(self):
        self.phrases.save(self.filepath)

    def prune(self, min_reduce=1):
        """
        Remove phrases beginning or ending with a stopword.
        Also removes phrases appearing less frequently than a threshold.
        :param min_reduce: frequency threshold
        """
        multiword_phrases = [phrase for phrase in self.phrases.vocab if "_" in phrase]
        for phrase in multiword_phrases:
            words = phrase.split("_")
            first_word, last_word = words[0], words[-1]
            if first_word in self.prune_stopwords or last_word in self.prune_stopwords:
                del self.phrases.vocab[phrase]

        prune_vocab(self.phrases.vocab, min_reduce)

    def detect(self, sentence):
        return self.phrases[sentence]

    def print_phrases(self, threshold=100):
        for word in self.phrases.vocab:
            if "_" in word and self.phrases.vocab[word] > threshold:
                print word, self.phrases.vocab[word]
开发者ID:carriercomm,项目名称:medical-text,代码行数:68,代码来源:phrasedetection.py

示例9: Phrases

# 需要导入模块: from gensim.models.phrases import Phrases [as 别名]
# 或者: from gensim.models.phrases.Phrases import save [as 别名]
from gensim.models.phrases import Phrases
from gensim.models.word2vec import LineSentence

sentence_stream=LineSentence('./data/text_cleaned.txt')
bigram = Phrases(sentence_stream,threshold=50.0)
bigram.save('./data/bigram.dat')
trigram = Phrases(bigram[sentence_stream],threshold=50.0)
trigram.save('./data/trigram.dat')

开发者ID:hujiewang,项目名称:research,代码行数:10,代码来源:phrase.py


注:本文中的gensim.models.phrases.Phrases.save方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。