当前位置: 首页>>代码示例>>Python>>正文


Python LdaModel.load方法代码示例

本文整理汇总了Python中gensim.models.ldamodel.LdaModel.load方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.load方法的具体用法?Python LdaModel.load怎么用?Python LdaModel.load使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.models.ldamodel.LdaModel的用法示例。


在下文中一共展示了LdaModel.load方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
    def __init__(self, topics = 10, 
                 worker = 3, 
                 pretrained_model = None, 
                 dictionary = None):
        """
        lda模型训练初始化。
        Args:
            topics -- 指定主题个数
            worker -- 并行化参数,一般为core数量减一
            pretrained_model -- 预训练的模型,由于支持在线更新,所以可以加载上次训练的模型
            dictionary -- 训练时词需要转换成ID,所以跟模型配套有一个ID映射的词典
        Example:
            >>> lda = LDA(topics = 20, worker = 2, 
                          pretrained_model = model_file, 
                          dictionary = dictionary_file)
            >>> corpus = read_file(corpus_file) # [['word1', 'word2'], ['word3', 'word4']]
            >>> lda.update(corpus)
            >>> lda.save(model_file, dictionary_file)
            >>> topics = lda.inference(['word5', 'word6'])
        """

        self._topics = topics
        self._workers = worker
        self._model = None
        self._common_dictionary = None
        if pretrained_model and common_dictionary:
            self._model = LdaModel.load(pretrained_model)
            self._common_dictionary = Dictionary.load(dictionary)
开发者ID:freygit,项目名称:36,代码行数:30,代码来源:lda.py

示例2: run

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
	def run(self):
		if self.clean_level in ('raw','clean','stopwords'):
			kind = self.clean_level
		else:
			kind = 'stopwords'

		if not os.path.exists(self.res_dir):
			print 'Creando carpeta para resultados...'
			os.mkdir(self.res_dir)

		# Aplicar cada modelo
		for idioma, modelos in self.input()['lda']['langs'].iteritems():
			corp_path = self.input()['corp']['langs'][idioma].path
			corpus = corpora.MmCorpus(corp_path)
			for n_topics, modelo in modelos.iteritems():
				model_path = modelo.path
				model = LdaModel.load(model_path)
				classification = []
				for doc in corpus:
					topic = model.get_document_topics(doc)
					classification.append(topic)
				print '--------------------------------------'
				print 'USER INFO: Clasificando textos en %s con nivel de limpieza "%s" con %d tópicos' % (idioma, kind, n_topics)
				model.print_topics(len(corpus),5)
				with self.output()['langs'][idioma][n_topics]['doc_topics'].open('w') as f:
					pickle.dump(classification, f)
				with self.output()['langs'][idioma][n_topics]['topics'].open('w') as f:
					pickle.dump(model.print_topics(n_topics,5), f) # el 5 es un parámetro que se puede editar (numero de palabras del tópico a mostrar)	
开发者ID:andreslechuga,项目名称:arte_mexicano_antiguo,代码行数:30,代码来源:lda.py

示例3: make_clouds

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def make_clouds(files, n_words=20):
    # set locations
    base_model_name = os.path.splitext(os.path.basename(files.model))[0]
    output_d = '../browser/clouds/' + base_model_name + '/'
    if not os.path.exists(output_d):
        os.makedirs(output_d)
    # create wordcloud generator
    wc = WordCloud(width=1000, height=500, background_color='white')

    print('Loading model')
    model = LdaModel.load(files.model)
    beta = model.expElogbeta

    print('Normalizing by topics, and by words')
    pTW = normalize(beta, axis=0)
    pWT = normalize(beta, axis=1)

    # load bug<->id map, then invert to id<-> bug
    bug_to_id = json.loads(open(files.replacements).read())
    id_to_bug = {v: k for k, v in bug_to_id.items() if "." not in k}

    for i in range(len(beta)):
        # compute RAR
        t_rar = np.sqrt(pTW[i] * pWT[i])
        top_word_ids = t_rar.argsort()[:-1 - n_words:-1]
        top_words = [model.id2word.id2token[wordid] for wordid in top_word_ids]
        top_words = [id_to_bug[word] if word in id_to_bug else word for word in top_words]
        wc.fit_words(zip(top_words, t_rar[top_word_ids]))
        wc.to_file(output_d + str(i) + '.png')
开发者ID:knights-lab,项目名称:bugbrowser,代码行数:31,代码来源:make_clouds.py

示例4: __init__

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
    def __init__(self, fnames, model=None, corpus=None, dictionary=None):
        """`fnames` is an array of files for [lda_model, distribution]"""
        self.reviews = open('data/electronics_topics_in.txt').readlines()

        print "Loding topic model..."
        if model is not None:
            print "Using argument model"
            self.lda = model
        else:
            self.lda = LdaModel.load(fnames[0])

        if corpus is not None:
            print "Using argument corpus and dictionary"
            self.corpus = corpus
            self.dictionary = dictionary
        else:
            print "Loading corpus and dictionary from file"
            self.corpus = load("data/models/electronics_tfidf_corpus.pkl")
            self.dictionary = load("data/models/electronics_dict.pkl")

        print "Loading review-topic distribution..."
        self.review_dist = [l for l in self.lda[self.corpus]]
        tmp = lambda dist: sorted(dist, key=lambda arr: arr[1], reverse=True)
        self.review_dist = map(lambda dist: tmp(dist), self.review_dist)

        print "processing topics"
        tmp = map(lambda t: re.sub("(\d*\.\d*\*)", "", t), self.lda.show_topics(-1))
        self.topics = map(lambda ts: re.sub("\\s\+", ",", ts), tmp)
开发者ID:fayimora,项目名称:amazon-reviews-analysis,代码行数:30,代码来源:topic_model_helpers.py

示例5: __init__

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
	def __init__(self, ac):
		with open('../TextMining/Topic/data.loc','rb') as f:
			load(f)
			self.data = load(f)
		with open('../TextMining/Topic/translator.loc','rb') as f:
			self.translator = load(f)
		self.index = similarities.MatrixSimilarity.load('../TextMining/Topic/index.loc')
		self.lda = LdaModel.load('../TextMining/Topic/lda.loc')
		self.dictionary = Dictionary().load("../TextMining/Topic/dic.loc")
		self.ac_terms = ac
开发者ID:valenca,项目名称:News-Recommendation-System,代码行数:12,代码来源:document.py

示例6: __init__

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
 def __init__(self, jobdesc_fname, jobtitle_fname):
     self.es = Elasticsearch([{'host': app.config['ES_HOST'], 'port': 9200, 'timeout': 120}])
     self.model = LdaModel.load(app.config['RCMDR_LDA_MODEL'])
     self.job_labels = {
         int(k):v
         for k, v in (line.split("=") for line in open(app.config['RCMDR_JOB_LABELS'])
                 .read().strip().split('\n'))
         }
     self.jobdesc_fname = jobdesc_fname
     self.jobtitle_fname = jobtitle_fname
开发者ID:RajeshThallam,项目名称:job-fiction,代码行数:12,代码来源:model_store.py

示例7: analyze

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
	def analyze(self, docs):
		# load dictionary and model
		self.dictionary = Dictionary.load(self.getModelFilePath("common.dictionary.file"))
		self.ldaModel = LdaModel.load(self.getModelFilePath("common.model.file"))

		# Converting list of documents (corpus) into Document Term Matrix using dictionary prepared above.
		docTermMatrix = [self.dictionary.doc2bow(doc) for doc in docs]

		docTopicDistr = self.getDocumentTopics(docTermMatrix)
		return docTopicDistr
开发者ID:pranab,项目名称:avenir,代码行数:12,代码来源:lda.py

示例8: __init__

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
 def __init__(self):
     self.dictionary = Dictionary.load(app.config["RCMDR_DICT"])
     self.corpus = corpora.MmCorpus(app.config["RCMDR_CORPUS"])
     self.tfidf = TfidfModel.load(app.config["RCMDR_TFIDF_MODEL"])
     self.lda_model = LdaModel.load(app.config["RCMDR_LDA_MODEL"])
     self.lsi_model = LsiModel.load(app.config["RCMDR_LSI_MODEL"])
     self.lda_index = Similarity.load(app.config["RCMDR_LDA_INDEX"])
     self.lsi_index = Similarity.load(app.config["RCMDR_LSI_INDEX"])
     self.job_labels = {
         int(k): v
         for k, v in (line.split("=") for line in open(app.config["RCMDR_JOB_LABELS"]).read().strip().split("\n"))
     }
开发者ID:RajeshThallam,项目名称:job-fiction,代码行数:14,代码来源:model_talking.py

示例9: AuthorTopicStd

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def AuthorTopicStd():
    import nltk

    from gensim import corpora
    from gensim import matutils
    from gensim.models.ldamodel import LdaModel
    from nltk.corpus import stopwords
    from unidecode import unidecode

    TOPIC_FILE = './lda_topic.dump'
    LDA_FILE = './result.lda'
    DICTIONARY_FILE = './keywords.dict'

    with open(TOPIC_FILE, 'rb') as f:
        num_topics, topic_result = serializer.load(f)

    lda = LdaModel.load(LDA_FILE)

    dictionary = corpora.Dictionary.load(DICTIONARY_FILE)

    tokenizer = nltk.tokenize.RegexpTokenizer(r'[\w]{2,}')
    stopwords_set = set(stopwords.words())

    my_topic_cache_by_aid = [None, None]

    def calculator(aid, pid):
        if my_topic_cache_by_aid[0] == aid:
            my_topic = my_topic_cache_by_aid[1]
        else:
            my_keywords = []

            for ipid, iaid in paper_authors.get_by_aid(aid):
                paper = papers.get(ipid)
                if paper is None:
                    continue
                keywords = tokenizer.tokenize(unidecode(paper[Papers.IDX_TITLE]).lower())
                if not keywords:
                    continue
                my_keywords.extend(keywords)

            my_keywords = list(filter(lambda s: s not in stopwords_set, my_keywords))
            if not my_keywords:
                return np.nan

            my_topic = lda[dictionary.doc2bow(my_keywords)]

            my_topic_cache_by_aid[0] = aid
            my_topic_cache_by_aid[1] = my_topic

        my_topic_array = matutils.sparse2full(my_topic, num_topics)
        return np.std(my_topic_array)

    return calculator
开发者ID:pjknkda,项目名称:kddcup2013-kaist-pjkp,代码行数:55,代码来源:features.py

示例10: getLdaModel

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def getLdaModel(bow_corpus, dictionary, useSavedTill):
    if useSavedTill >= USESAVED.lda_model:
        common_logger.info("loading LDA model from file")
        return LdaModel.load(file_lda_model)
    else:
        common_logger.info("Training LDA model")
        num_topics = int(math.log(len(bow_corpus)) + 1)  # assumption:
        lda_model = LdaModel(bow_corpus, num_topics=num_topics, id2word=dictionary, passes=numPasses)
        common_logger.info("Saving LDA model")
        lda_model.save(file_lda_model)
        common_logger.info("Done creating LDA model")
        return lda_model
开发者ID:KshitizSethia,项目名称:AcroDisam,代码行数:14,代码来源:LDAModel.py

示例11: update

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
	def update(self, docs):
		# load dictionary and model
		self.dictionary = Dictionary.load(self.getModelFilePath("common.dictionary.file"))
		self.ldaModel = LdaModel.load(self.getModelFilePath("common.model.file"))

		# Converting list of documents (corpus) into Document Term Matrix using dictionary prepared above.
		docTermMatrix = [self.dictionary.doc2bow(doc) for doc in docs]

		numPass = self.config.getIntConfig("train.num.pass")[0]
		self.ldaModel.update(docTermMatrix, passes=numPasses)

		docTopicDistr = self.getDocumentTopics(docTermMatrix)
		return docTopicDistr
开发者ID:pranab,项目名称:avenir,代码行数:15,代码来源:lda.py

示例12: fetch_model

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
	def fetch_model(dictionary):
		print "Fetching LDA Model... ",
		try:
			lda = LdaModel.load('Topic/lda.tm')
			print "LDA Model loaded!"
		except IOError:
			print "Model not found, building LDA..."
			corpus=MyCorpus()
			#lda = LdaModel(corpus,num_topics=50,update_every=1,chunksize=1000,passes=15)
			lda = LdaModel(corpus,num_topics=50,id2word=dictionary,update_every=1,chunksize=1000,passes=50)
			print "LDA Built!"
			lda.save('Topic/lda.tm')
		return lda
开发者ID:valenca,项目名称:News-Recommendation-System,代码行数:15,代码来源:topic_model.py

示例13: main

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def main():
    logformat = '%(asctime)s %(name)-12s: %(message)s'
    logging.basicConfig(level=logging.DEBUG, format=logformat)
    kera = NOB_kera()
    es = Elasticsearch(port=9201)
    mod = LdaModel.load(modelfile)
    vocab = Dictionary.load(vocabulary)
    tfidf = TfidfModel(dictionary=vocab)
    results = []
    for (topics, topicid) in get_doc_topics(mod, mod.num_topics, num_words_from_topic, vocab, tfidf):
        res = es.search(index='wiki4', body={"query": {"match": {"_all": topics}}}, size=num_results_from_es)
        results.append({'topics': topics, 'result': res, 'topicid': topicid})
    results = add_keywords(results, kera)
    df = pd.DataFrame(results)
    df.to_csv('nowiki_4_with_kera_250_topics.csv', encoding='utf-8')
开发者ID:comperiosearch,项目名称:comperio-text-analytics,代码行数:17,代码来源:build_LDA_kera_from_wiki.py

示例14: SNAP_ldaTopicsForTopic

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
  def SNAP_ldaTopicsForTopic(self, topic, numTopics = 10):
    if numTopics not in [5, 10, 20, 30]:
      print("[ERROR] Invalid numTopics")
      return
    inPath = os.path.join(
      os.path.dirname(os.path.abspath(__file__)),
      'snap_data',
      "gensim_snap_lda_%s_%d" % (topic, numTopics)
    )
    lda = LdaModel.load(inPath)
    return lda.print_topics(numTopics)

  ##################
  #
  ##################
开发者ID:dshahaf,项目名称:snap-sentiment,代码行数:17,代码来源:corpus.py

示例15: get_lda_model

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import load [as 别名]
def get_lda_model(num_topics):
    file_name = None

    if num_topics == 10:
        file_name = LDA_FILE_10
    elif num_topics == 30:
        file_name = LDA_FILE_30
    elif num_topics == 60:
        file_name = LDA_FILE_60
    elif num_topics == 120:
        file_name = LDA_FILE_120
    else:
        raise ValueError("bad number of topics")

    return LdaModel.load(file_name)
开发者ID:msushkov,项目名称:cs224w-wiki,代码行数:17,代码来源:lda.py


注:本文中的gensim.models.ldamodel.LdaModel.load方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。