当前位置: 首页>>代码示例>>Python>>正文


Python LdaModel.save方法代码示例

本文整理汇总了Python中gensim.models.ldamodel.LdaModel.save方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.save方法的具体用法?Python LdaModel.save怎么用?Python LdaModel.save使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.models.ldamodel.LdaModel的用法示例。


在下文中一共展示了LdaModel.save方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: LDA

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import save [as 别名]
class LDA(object):
    def __init__(self, model, vocab, corpus=None, topics=200, passes=1):
        self._model_file = model
        self._dict_file = vocab
        self._corpus_file = corpus
        self._topics = topics
        self._passes = passes

    def train(self):
        self._corpus = SentenceDocCorpus(self._corpus_file)
        self._lda = LdaModel(self._corpus, num_topics = self._topics, id2word = self._corpus.dictionary, passes = self._passes)
        self._dictionary = self._corpus.dictionary
        
        self._lda.save(self._model_file)
        self._dictionary.save(self._dict_file)

    def load(self):
        self._lda = LdaModel.load(self._model_file)
        self._dictionary = Dictionary.load(self._dict_file)

    def topics(self, words):
        return self._lda[self._dictionary.doc2bow(common.filter(words))]

    def topic_vector(self, words):
        return np.array([v for k, v in self._lda.__getitem__(self._dictionary.doc2bow(common.filter(words)), eps=0)])
开发者ID:vchahun,项目名称:cdec-features,代码行数:27,代码来源:lda.py

示例2: run

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import save [as 别名]
	def run(self):
		if self.clean_level in ('raw','clean','stopwords'):
			kind = self.clean_level
		else:
			kind = 'stopwords'

		for idioma in self.output()['langs'].iterkeys():
			dicc_path = self.input()['dict']['langs'][idioma].path
			corp_path = self.input()['corp']['langs'][idioma].path
			print '=============================='
			print 'Corriendo LDA de %s con nivel de limpieza %s' % (idioma, kind)
			print '=============================='

			# Cargar diccionario y corpus
			dicc = corpora.Dictionary.load(dicc_path)
			corpus = corpora.MmCorpus(corp_path)

			# Correr LDA del idioma para cada numero de topicos
			for n_topics in self.output()['langs'][idioma].iterkeys():
				print 'Número de tópicos: ' + str(n_topics)
				if self.by_chunks:
					lda = LdaModel(corpus, id2word=dicc, num_topics=n_topics, update_every=self.update_e, chunksize=self.chunk_size, passes=self.n_passes)
				else:
					lda = LdaModel(corpus, id2word=dicc, num_topics=n_topics, passes=1)
				lda.save(self.output()['langs'][idioma][n_topics].path)
开发者ID:andreslechuga,项目名称:arte_mexicano_antiguo,代码行数:27,代码来源:lda.py

示例3: getLdaModel

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import save [as 别名]
def getLdaModel(bow_corpus, dictionary, useSavedTill):
    if useSavedTill >= USESAVED.lda_model:
        common_logger.info("loading LDA model from file")
        return LdaModel.load(file_lda_model)
    else:
        common_logger.info("Training LDA model")
        num_topics = int(math.log(len(bow_corpus)) + 1)  # assumption:
        lda_model = LdaModel(bow_corpus, num_topics=num_topics, id2word=dictionary, passes=numPasses)
        common_logger.info("Saving LDA model")
        lda_model.save(file_lda_model)
        common_logger.info("Done creating LDA model")
        return lda_model
开发者ID:KshitizSethia,项目名称:AcroDisam,代码行数:14,代码来源:LDAModel.py

示例4: fetch_model

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import save [as 别名]
	def fetch_model(dictionary):
		print "Fetching LDA Model... ",
		try:
			lda = LdaModel.load('Topic/lda.tm')
			print "LDA Model loaded!"
		except IOError:
			print "Model not found, building LDA..."
			corpus=MyCorpus()
			#lda = LdaModel(corpus,num_topics=50,update_every=1,chunksize=1000,passes=15)
			lda = LdaModel(corpus,num_topics=50,id2word=dictionary,update_every=1,chunksize=1000,passes=50)
			print "LDA Built!"
			lda.save('Topic/lda.tm')
		return lda
开发者ID:valenca,项目名称:News-Recommendation-System,代码行数:15,代码来源:topic_model.py

示例5: gensim_lda

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import save [as 别名]
def gensim_lda(d):
    from gensim import corpora, models
    from gensim.models.ldamodel import LdaModel
    list_doc = []
    for i in range(0,len(d)):
        list_doc = list_doc + d[i]

    dictionary = corpora.Dictionary(list_doc)
    model = LdaModel(num_topics = 20, id2word = dictionary)
    for i in range(0, len(d)):
        print 'Generating corpus and updating model ', i
        corpus = [dictionary.doc2bow(doc) for doc in d[i]]
        model.update(corpus)

    model.save('model_20')
    print model.show_topics(num_topics = 20, num_words = 10)
开发者ID:JT17,项目名称:445Project,代码行数:18,代码来源:lda_training_data.py

示例6: main

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import save [as 别名]
def main(argv):
    if len(argv) < 4:
        print 'python train_lda.py group_id num_topics passes'
        sys.exit(1)
        
    group_id = argv[1]
    num_topics = int(argv[2])
    passes = int(argv[3])
    log.info('Prepare corpus for group: %s' % group_id)

    base_path = 'tables/' + group_id + '/'
    model_base_path = 'ldamodels/' + group_id + '/'
    
    # buid dict and corpus
    #now = datetime.now()
    indicator = 'title-comment'
    source_path = base_path + 'corpus-topic-comment'
    
    corpus_path = model_base_path + 'corpus-'+ indicator + '-' + group_id + '.mm'
    dict_path = model_base_path + 'dict-' + indicator + '-' + group_id + '.dict'
    
    log.info('Building the dict...')
    build_dict_corpus(source_path, corpus_path, dict_path)
    
    log.info('Loading dict from pre-saved file...')
    dictionary = corpora.Dictionary.load(dict_path)
    log.info('Done')
    
    #dictionary.save_as_text(base_path + 'text-dict.txt')
    
    log.info('Build a lda model...')
    log.info('Loading corpus from pre-saved .mm file...')
    mmcorpus = corpora.MmCorpus(corpus_path)
    log.info('Done')
    
    log.info('Training lda model...')
    model = LdaModel(mmcorpus, num_topics=num_topics, id2word = dictionary, passes = passes)
    model_path = model_base_path + indicator + '-' + group_id + '.ldamodel'
    model.save(model_path)
    log.info('Done.')
    
    model = LdaModel.load(model_path)
    model.show_topics(topics=num_topics, topn=10, log=True)
开发者ID:hitalex,项目名称:crawler,代码行数:45,代码来源:train_lda.py

示例7: generate_model

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import save [as 别名]
def generate_model():
    np.set_printoptions(precision=2)
    corpus = []
    corpus += load_expo_cdc()
    corpus += load_lago()
    corpus += load_news()
    corpus += load_news_ic()
    corpus += load_palestras()
    corpus = preprocessing(corpus)
    dictionary = corpora.Dictionary(corpus)
    bow_corpus = [dictionary.doc2bow(text) for text in corpus]

    dictionary.save(DICT)
    corpora.MmCorpus.serialize(BOW_CORPUS, bow_corpus)

    bow2 = np.concatenate((bow_corpus, bow_corpus), axis=0)
    bow2 = np.concatenate((bow2, bow2), axis=0)
    bow2 = np.concatenate((bow2, bow2), axis=0)
    TOPICS = 20
    model = LdaModel(bow2, id2word=dictionary, num_topics=TOPICS, iterations=100, passes=15)
    model.save(MODEL)

    lda_corpus = [model[vector] for vector in bow2]
    lda_dense = gensim.matutils.corpus2dense(lda_corpus, num_terms=TOPICS).transpose()
    """
    tfidf = models.TfidfModel(bow_corpus)
    tfidf_corpus = [tfidf[vector] for vector in bow_corpus]
    tfidf_dense = gensim.matutils.corpus2dense(tfidf_corpus, num_terms=len(dictionary)).transpose()
    """
    classifier = LogisticRegression()
    labels = load_labels()
    labels2 = labels
    labels2 += labels2
    labels2 += labels2
    labels2 += labels2
    classifier.fit(lda_dense, labels2)
    joblib.dump(classifier, CLASSIFIER, compress=9)
    #print "LDA results"
    probs = classifier.predict_proba(lda_dense)
开发者ID:Campus-Board,项目名称:campusboard,代码行数:41,代码来源:text_analysis.py

示例8: SNAP_generateLDAForTopic

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import save [as 别名]
 def SNAP_generateLDAForTopic(self, topic, numTopics = 5):
   if (topic == 'all'):
     topics = ['syria', 'ufo', 'movie', 'celebrity', 'russia'] # bieber, cyrus
     for t in topics:
       for nt in [5, 10]:
         self.SNAP_generateLDAForTopic(t, nt)
     return
   id2word = self.SNAP_id2word()
   mmPath = os.path.join(
     os.path.dirname(os.path.abspath(__file__)),
     'snap_data',
     "gensim_snap_mmcorpus_%s.mm" % topic
   )
   outPath = os.path.join(
     os.path.dirname(os.path.abspath(__file__)),
     'snap_data',
     "gensim_snap_lda_%s_%d" % (topic, numTopics)
   )
   mm = MmCorpus(mmPath)
   lda = LdaModel(corpus=mm, id2word=id2word, num_topics=numTopics, update_every=1, chunksize=10000, passes=1)
   lda.save(outPath)
   return
开发者ID:dshahaf,项目名称:snap-sentiment,代码行数:24,代码来源:corpus.py

示例9: build_lda_model

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import save [as 别名]
def build_lda_model(corpus, dictionary, num_topics=10):
    file_name = None

    if corpus == None:
        corpus = get_corpus()
    if dictionary == None:
        dictionary = get_dictionary()

    if num_topics == 10:
        file_name = LDA_FILE_10
    elif num_topics == 30:
        file_name = LDA_FILE_30
    elif num_topics == 60:
        file_name = LDA_FILE_60
    elif num_topics == 120:
        file_name = LDA_FILE_120
    else:
        raise ValueError("bad number of topics")
    lda = LdaModel(corpus=corpus, id2word=dictionary, num_topics=num_topics, update_every=1, chunksize=100, passes=1)
    lda.save(file_name)
    for topic in range(10):
        print "Topic {0}: {1}".format(topic, lda.print_topic(topic))
    return lda
开发者ID:msushkov,项目名称:cs224w-wiki,代码行数:25,代码来源:lda.py

示例10: train

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import save [as 别名]
def train(refresh=True):
    if refresh:
        ptb = BracketParseCorpusReader(Corpus.DATA_DIR, Corpus.FILE_PATTERN)
        train_folders = [str(i) + str(j) for i in range(2) for j in range(10)]
        train_folders += [str(i) + str(j) for i in range(2, 3) for j in range(5)]

        dictionary = corpora.dictionary.Dictionary()
        train_documents = list()

        logger.debug('Starting to parse training documents')
        for folder in train_folders:
            for ptb_file in os.listdir(os.path.join(Corpus.DATA_DIR, folder)):
                document_sentences = ptb.sents(fileids=[os.path.join(folder, ptb_file)])
                if len(document_sentences) > DOC_LEN_THRESHOLD:
                    doc2sentence = list(chain.from_iterable(document_sentences))
                    doc2sentence = clean_text(doc2sentence)
                    dictionary.add_documents([doc2sentence])
                    train_documents.append(doc2sentence)
        logger.debug('Parsed all training documents')

        dictionary.filter_extremes(no_below=1, no_above=0.5)
        dictionary.save(DICTIONARY_FILE)

        logger.debug('Creating corpus for training data')
        corpus = [dictionary.doc2bow(text) for text in train_documents]
        logger.debug('Finished creating corpus')

        logger.debug('Training LDA model on corpus')
        lda = LdaModel(corpus=corpus, id2word=dictionary, num_topics=N_TOPICS, passes=20)
        logger.debug('Completed LDA training')

        lda.save(LDA_MODEL_FILE)
    else:
        dictionary = corpora.dictionary.Dictionary.load(DICTIONARY_FILE)
        lda = LdaModel.load(LDA_MODEL_FILE)

    return lda, dictionary
开发者ID:pbamotra,项目名称:cgrnnlm,代码行数:39,代码来源:LDA.py

示例11: print

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import save [as 别名]
from gensim import corpora,models, similarities
import numpy as np
import time

corpusType = "sraa2_";
subDirectory = 'run_sraa'
t1 = time.time()

corpus = corpora.MmCorpus(subDirectory+'/'+corpusType+'corpus.mm')
dictionary = corpora.dictionary.Dictionary.load(subDirectory+'/'+corpusType+'dictionary.dict')
classes = np.loadtxt(subDirectory+'/'+corpusType+'classes.dat',dtype=int)

t2 = time.time()
print 'data loaded ... seconds: ',
print (t2-t1)

ldaModel = LdaModel(corpus, num_topics=30, id2word = dictionary, passes=20)
ldaModel.save(subDirectory+'/'+corpusType+'sraa.lda_model')

t3 = time.time()
print 'ldaModel is finished... seconds:',
print (t3-t2)

tfidfModel = models.TfidfModel(corpus)
tfidfModel.save(subDirectory+'/'+corpusType+'sraa.tfidf_model')

t4 = time.time()
print 'tfidfModel is finished... seconds:',
print (t4-t3)

开发者ID:babakahmadi,项目名称:EigenTransfer,代码行数:31,代码来源:constructingModels.py

示例12: create_models

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import save [as 别名]
def create_models(db, lsi_num_topics=10, lda_num_topics=5, num_bars=None):
    """
    Create and save a lsi object
    using data in the database.
    Save this object, along with the
    dictionary and the corpus, to disk
    """

    bars = db['bars']

    if num_bars == None:
        locations = bars.find({ 'nymag.review' : {'$ne':None}, 
                                'foursquare.tips' : {'$exists':True}, 
                                'foursquare.tips' : {'$ne':None} 
                                })
    else:
        locations = bars.find({ 'nymag.review' : {'$ne':None}, 
                                'foursquare.tips' : {'$exists':True}, 
                                'foursquare.tips' : {'$ne':None} 
                                }).limit(num_bars)

    ignorechars = '''!"#$%&()*+,-./:;<=>[email protected][\]^_`{|}~'''
    stopwords = get_stopwords()

    texts = []
    bar_idx_map = {}
    idx_bar_map = {}

    save_directory = "assets/"

    print "Fetching texts from database and tokenizing"
    for idx, location in enumerate(locations):
        bar_name = location['nymag']['name']
        bar_idx_map[bar_name] = idx
        idx_bar_map[int(idx)] = bar_name
        text = create_string_from_database(location)
        tokens = tokenize_document(text, stopwords, ignorechars)
        texts.append(tokens)

    # Do some cleaning
    print "Cleaning texts"
    texts = remove_words_appearing_once(texts)

    # Create the counter
    word_counts = Counter()
    for text in texts: 
        word_counts.update(text)       

    # Create and save the dictionary
    print "Creating dictionary"
    dictionary = corpora.Dictionary(texts)
    dictionary.save(save_directory + 'keywords.dict')

    # Create and save the corpus
    print "Creating Corpus matrix"
    corpus = [dictionary.doc2bow(text) for text in texts]
    corpora.MmCorpus.serialize(save_directory + 'corpus.mm', corpus) 

    # Term Frequency, Inverse Document Frequency
    print "Applying TFIDF"
    tfidf = models.TfidfModel(corpus) 
    tfidf.save(save_directory + "tfidf.model")

    # Map TFIDF on the corpus
    print "Mapping TFIDF on corpus"
    corpus_tfidf = tfidf[corpus]
    corpora.MmCorpus.serialize(save_directory + 'corpus_tfidf.mm', corpus_tfidf) 

    # Create the LSI
    print "Creating LSI with %s topics" % lsi_num_topics
    lsi = models.LsiModel(corpus_tfidf, id2word=dictionary, num_topics=lsi_num_topics) 
    lsi.save(save_directory + 'lsi.model')

    # Map LSI on the corpus
    corpus_lsi_tfidf = lsi[corpus_tfidf]
    corpora.MmCorpus.serialize(save_directory + 'corpus_lsi_tfidf.mm', corpus_lsi_tfidf)

    # Create the index
    #index = similarities.MatrixSimilarity(lsi[corpus_tfidf])
    index = similarities.MatrixSimilarity(corpus_lsi_tfidf)
    index.save(save_directory + 'lsi_tfidf.index')

    # Create the LDA (on the raw corpus)
    print "Creating LDA with %s topics" % lda_num_topics
    lda = LdaModel(corpus, num_topics=lda_num_topics, id2word=dictionary, 
                   update_every=0, passes=30)
    #lda.show_topics(10, 20, formatted=False)
    lda.save(save_directory + 'lda.model')

    # Create the lda corpus
    corpus_lda = lda[corpus]
    corpora.MmCorpus.serialize(save_directory + 'corpus_lda.mm', corpus_lda)

    # Save some additional info
    with open(save_directory + 'bar_idx_map.json', 'wb') as fp:
        json.dump(bar_idx_map, fp)

    with open(save_directory + 'idx_bar_map.json', 'wb') as fp:
        json.dump(idx_bar_map, fp)

#.........这里部分代码省略.........
开发者ID:ghl3,项目名称:BarkovChain,代码行数:103,代码来源:semantic.py

示例13: enumerate

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import save [as 别名]
for text in newTexts:
    for token in text:
        newfrequency[token] += 1

# In[87]:

logger.info("Generating topics from LDA")

num_topics=100
model=LdaModel(num_topics=num_topics,corpus=corpus,id2word=dictionary,iterations=1500)
#model=LdaMulticore(num_topics=100,workers=3,corpus=corpus,id2word=dictionary,iterations=3000)
#model=HdpModel(corpus=corpus, id2word=dictionary)

# In[94]:

model.save('cache/model.pkl')


# In[96]:

cursor = db.movies.find({},{"movieId":1})
movieId=[]
for doc in cursor:
    movieId.append(doc['movieId'])


# In[97]:

movieDict={}
for i,val in enumerate(movieId):
    movieDict[val]=newTexts[i]
开发者ID:GromitC,项目名称:MovieRecommender,代码行数:33,代码来源:lda.py

示例14: get_params

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import save [as 别名]
def get_params(files):
    print "Converting data to features..."
    tweets = imap(lambda f: open(f).read(), files)
    features = [to_features(tweet) for tweet in tweets]
    # features = json.load(open("models/lda_features.json"))

    print "Converting features to bag of words..."
    dictionary = corpora.Dictionary(features)
    corpus = [dictionary.doc2bow(text) for text in features]
    # corpus = json.load(open("models/lda_corpus.json"))

    return corpus, features, dictionary


if __name__ == "__main__":
    print "Loading file names..."
    files = glob.glob("../tweets/*")
    corpus, features, dictionary = get_params(files)

    print "Creating LDA Model..."
    lda = LdaModel(corpus, id2word=dictionary, num_topics=30, iterations=1000, alpha='auto', chunksize=50)
    lda_topic_distribution = [l for l in lda[corpus]]

    print "Saving model..."
    lda.save("lda_model_unigrams.dat")

    print "Saving distribution..."
    f = open("lda_topic_distribution.json", 'w')
    json.dump(lda_topic_distribution, f)
    f.close()
开发者ID:fayimora,项目名称:FinalYearProject,代码行数:32,代码来源:TopicModelling.py

示例15: len

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import save [as 别名]
if len(sys.argv) != 2:
    print 'Usage: {0} rcv1_data_dir'.format(sys.argv[0])
    raise SystemExit(1)

data_dir = sys.argv[1]
mapping_file = data_dir+'/token_id_idf'
dictionary_file = data_dir+'/id_token_df'
token_file = data_dir+'/tokens'
lda_file = data_dir+'/lda_model'

print 'creating dictionary...'
N = 23307  # supplied idfs from rcv1/lyrl2004 were based on 23307 training docs
create_dictionary_file(mapping_file,dictionary_file,23307)
dictionary = Dictionary.load_from_text(dictionary_file)

print 'creating corpus...'
corpus = SimpleLowCorpus(token_file,dictionary)

print 'training model...'
logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.INFO)
lda = LdaModel(corpus,id2word=dictionary,num_topics=200)
print 'done!'
print '\n'*3
print '======final topics======'
topics = lda.show_topics(topics=-1,topn=4)
for i,topic in enumerate(topics):
    print i,topic

print 'saving model...'
lda.save(lda_file)
开发者ID:biddyweb,项目名称:news-1,代码行数:32,代码来源:train.py


注:本文中的gensim.models.ldamodel.LdaModel.save方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。