当前位置: 首页>>代码示例>>Python>>正文


Python Dictionary.doc2bow方法代码示例

本文整理汇总了Python中gensim.corpora.dictionary.Dictionary.doc2bow方法的典型用法代码示例。如果您正苦于以下问题:Python Dictionary.doc2bow方法的具体用法?Python Dictionary.doc2bow怎么用?Python Dictionary.doc2bow使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.corpora.dictionary.Dictionary的用法示例。


在下文中一共展示了Dictionary.doc2bow方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: build_dictionaries_from_splits

# 需要导入模块: from gensim.corpora.dictionary import Dictionary [as 别名]
# 或者: from gensim.corpora.dictionary.Dictionary import doc2bow [as 别名]
def build_dictionaries_from_splits(splits_template, n, save_pickle_tup=None):
    ''' Builds all 3 dictionaries from splits. If provided, `save_pickle_tup` must
        be a 3-tuple of the picklefile names in the following order:
        
        (title, body, tags)
        
        If `save_pickle_tup[i]` is None, the corresponding dictionary will not be saved.
    '''
    utitledict, ubodydict, utagdict = Dictionary(), Dictionary(), Dictionary()
    for eid in xrange(n):
        for row in row_stream(splits_template % eid):
            ID, title, body, tags = row
            utitledict.doc2bow(title.split(), allow_update=True)
            ubodydict.doc2bow(body.split(), allow_update=True)
            utagdict.doc2bow(tags.split(), allow_update=True)
    
    assert ubodydict.num_docs == utitledict.num_docs == utagdict.num_docs
    print "Before filtering..."
    print "utitledict:", utitledict
    print "ubodydict:", ubodydict
    print "utagdict:", utagdict
    
    if save_pickle_tup:
        assert len(save_pickle_tup) == 3
        if save_pickle_tup[0]:
            print "saving utitledict..."
            utitledict.save(save_pickle_tup[0])
        if save_pickle_tup[1]:
            print "saving ubodydict..."
            ubodydict.save(save_pickle_tup[1])
        if save_pickle_tup[2]:
            print "saving utagdict..."
            utagdict.save(save_pickle_tup[2])
            
    return (utitledict, ubodydict, utagdict)
开发者ID:mr1azl,项目名称:tag_recommender,代码行数:37,代码来源:pruning.py

示例2: doc_to_gensim

# 需要导入模块: from gensim.corpora.dictionary import Dictionary [as 别名]
# 或者: from gensim.corpora.dictionary.Dictionary import doc2bow [as 别名]
def doc_to_gensim(doc, lemmatize=True,
                  filter_stops=True, filter_punct=True, filter_nums=False):
    """
    Convert a single ``spacy.Doc`` into a gensim dictionary and bag-of-words document.

    Args:
        doc (``spacy.Doc``)
        lemmatize (bool): if True, use lemmatized strings for words; otherwise,
            use the original form of the string as it appears in ``doc``
        filter_stops (bool): if True, remove stop words from word list
        filter_punct (bool): if True, remove punctuation from word list
        filter_nums (bool): if True, remove numbers from word list

    Returns:
        :class:`gensim.Dictionary <gensim.corpora.dictionary.Dictionary>`:
            integer word ID to word string mapping
        list((int, int)): bag-of-words document, a list of (integer word ID, word count)
            2-tuples
    """
    gdict = Dictionary()
    words = extract.words(doc,
                          filter_stops=filter_stops,
                          filter_punct=filter_punct,
                          filter_nums=filter_nums)
    if lemmatize is True:
        gdoc = gdict.doc2bow((word.lemma_ for word in words), allow_update=True)
    else:
        gdoc = gdict.doc2bow((word.orth_ for word in words), allow_update=True)

    return (gdict, gdoc)
开发者ID:GregBowyer,项目名称:textacy,代码行数:32,代码来源:export.py

示例3: create_dictionaries

# 需要导入模块: from gensim.corpora.dictionary import Dictionary [as 别名]
# 或者: from gensim.corpora.dictionary.Dictionary import doc2bow [as 别名]
def create_dictionaries(train=None,
                        test=None,
                        model=None):
    ''' Function does are number of Jobs:
        1- Creates a word to index mapping
        2- Creates a word to vector mapping
        3- Transforms the Training and Testing Dictionaries

    '''
    if (train is not None) and (model is not None) and (test is not None):
        gensim_dict = Dictionary()
        gensim_dict.doc2bow(model.vocab.keys(),
                            allow_update=True)
        w2indx = {v: k+1 for k, v in gensim_dict.items()}
        w2vec = {word: model[word] for word in w2indx.keys()}

        def parse_dataset(data):
            ''' Words become integers
            '''
            for key in data.keys():
                txt = data[key].lower().replace('\n', '').split()
                new_txt = []
                for word in txt:
                    try:
                        new_txt.append(w2indx[word])
                    except:
                        new_txt.append(0)
                data[key] = new_txt
            return data
        train = parse_dataset(train)
        test = parse_dataset(test)
        return w2indx, w2vec, train, test
    else:
        print('No data provided...')
开发者ID:caomw,项目名称:DeepLearning_MachineLearning,代码行数:36,代码来源:imdb_embedding_w2v.py

示例4: create_dictionaries

# 需要导入模块: from gensim.corpora.dictionary import Dictionary [as 别名]
# 或者: from gensim.corpora.dictionary.Dictionary import doc2bow [as 别名]
def create_dictionaries(model=None,
                        combined=None):
    ''' Function does are number of Jobs:
        1- Creates a word to index mapping
        2- Creates a word to vector mapping
        3- Transforms the Training and Testing Dictionaries

    '''
    if (combined is not None) and (model is not None):
        gensim_dict = Dictionary()
        gensim_dict.doc2bow(model.vocab.keys(),
                            allow_update=True)
        w2indx = {v: k+1 for k, v in gensim_dict.items()}#所有频数超过10的词语的索引
        w2vec = {word: model[word] for word in w2indx.keys()}#所有频数超过10的词语的词向量

        def parse_dataset(combined):
            ''' Words become integers
            '''
            data=[]
            for sentence in combined:
                new_txt = []
                for word in sentence:
                    try:
                        new_txt.append(w2indx[word])
                    except:
                        new_txt.append(0)
                data.append(new_txt)
            return data
        combined=parse_dataset(combined)
        combined= sequence.pad_sequences(combined, maxlen=maxlen)#每个句子所含词语对应的索引,所以句子中含有频数小于10的词语,索引为0
        return w2indx, w2vec,combined
    else:
        print 'No data provided...'
开发者ID:BUPTLdy,项目名称:Sentiment-Analysis,代码行数:35,代码来源:Sentiment_lstm.py

示例5: WordCorpus

# 需要导入模块: from gensim.corpora.dictionary import Dictionary [as 别名]
# 或者: from gensim.corpora.dictionary.Dictionary import doc2bow [as 别名]
class WordCorpus(BaseCorpus):
    """\
    Wrapper around a `gensim.corpora.dictionary.Dictionary`.

    This is a light-weight alternative to `CableCorpus` to create an initial
    word dictionary::

        wd = WordCorpus()
        wd.add_text('ref-1', 'bla bla')
        # add more texts
        wd.dct.filter_extremes()

        corpus = CableCorpus('/my/directory/', wd.dct)
        corpus.add_text('ref-1', 'bla bla')
        # add more texts
        corpus.close()
    """
    def __init__(self, dct=None, tokenizer=None):
        """\
        Initializes the wrapper.

        `dct`
            An existing Dictionary or ``None`` if a new Dictionary should be
            created (default)
        `tokenizer`
            A tokenizer function or ``None``, see `BaseCorpus`
        """
        super(WordCorpus, self).__init__(tokenizer)
        self.dct = Dictionary() if dct is None else dct

    def add_words(self, reference_id, words):
        self.dct.doc2bow(words, True)
开发者ID:Tooa,项目名称:cablemap,代码行数:34,代码来源:corpus.py

示例6: _load_vocab

# 需要导入模块: from gensim.corpora.dictionary import Dictionary [as 别名]
# 或者: from gensim.corpora.dictionary.Dictionary import doc2bow [as 别名]
 def _load_vocab(self,fname):
     logging.info("loading plain-text file:{}".format(fname))            
     src_file = codecs.open(fname, 'rb', 'utf-8')
     dictionary = Dictionary()
     
     num_instances = 0            
     for term in src_file:            
         dictionary.doc2bow(term.strip().lower().encode('utf-8').split(), allow_update=True)
         num_instances += 1
         
     logging.info("processed {} instances".format(num_instances))
     self.dictionary = dictionary
开发者ID:jmcotelo,项目名称:ls-norm,代码行数:14,代码来源:vocabulary.py

示例7: get_corpus_dictionary

# 需要导入模块: from gensim.corpora.dictionary import Dictionary [as 别名]
# 或者: from gensim.corpora.dictionary.Dictionary import doc2bow [as 别名]
def get_corpus_dictionary():
    """Crafts a toy corpus and the dictionary associated."""
    # Toy corpus.
    corpus = [
        ['carrot', 'salad', 'tomato'],
        ['carrot', 'salad', 'dish'],
        ['tomato', 'dish'],
        ['tomato', 'salad'],

        ['car', 'break', 'highway'],
        ['highway', 'accident', 'car'],
        ['moto', 'break'],
        ['accident', 'moto', 'car']
    ]

    dictionary = Dictionary(corpus)

    # Transforming corpus with dictionary.
    corpus = [dictionary.doc2bow(doc) for doc in corpus]

    # Building reverse index.
    for (token, uid) in dictionary.token2id.items():
        dictionary.id2token[uid] = token

    return corpus, dictionary
开发者ID:bmabey,项目名称:pyLDAvis,代码行数:27,代码来源:test_gensim_models.py

示例8: preprocess_corpora

# 需要导入模块: from gensim.corpora.dictionary import Dictionary [as 别名]
# 或者: from gensim.corpora.dictionary.Dictionary import doc2bow [as 别名]
def preprocess_corpora(corpora, stopwords, allowed_pos, max_doc=float('inf'), no_above=0.5, no_below=1, keep_n=None):
    """


    :rtype : gensim.corpora.dictionary.Dictionary
    :param corpora: 
    :param stopwords: 
    :param allowed_pos: 
    :param max_doc: 
    :return: 
    """
    logging.info('Lemmatizing the corpora...')
    count = 0
    corpus_num = len(corpora)
    processed_corpora = []
    corpus_id2orig_id = []

    for index, corpus in corpora.items():
        count += 1
        if count > max_doc:
            break
        if corpus is None:  # skip if corpus is None
            continue

        print '\r', count, '/', corpus_num,
        cleaned_corpus = clean_text(corpus)  # delete irrelevant characters
        corpus = []
        tokens = lemmatize(content=cleaned_corpus, allowed_tags=allowed_pos)
        for token in tokens:
            word, pos = token.split('/')
            corpus.append(word)

        # convert compound word into one token
        corpus = convert_compound(corpus)

        # filter stop words, long words, and non-english words
        corpus = [w for w in corpus if not w in stopwords and 2 <= len(w) <= 15 and w.islower()]
        processed_corpora.append(corpus)
        corpus_id2orig_id.append(index)

    print '\n'

    logging.info('Creating dictionary and corpus...')
    dictionary = Dictionary(processed_corpora)
    dictionary.corpus_id2orig_id = corpus_id2orig_id

    logging.info('Filtering unimportant terms...')
    dictionary.filter_extremes(no_below=no_below, no_above=no_above, keep_n=keep_n)
    dictionary.compactify()

    logging.info('Generating corpus...')
    dictionary.corpus = [dictionary.doc2bow(corpus) for corpus in processed_corpora]
    dictionary.id2token = revdict(dictionary.token2id)

    return dictionary
开发者ID:kensk8er,项目名称:MsTweetAnalysis,代码行数:57,代码来源:preprocess.py

示例9: create_mapping_dicts

# 需要导入模块: from gensim.corpora.dictionary import Dictionary [as 别名]
# 或者: from gensim.corpora.dictionary.Dictionary import doc2bow [as 别名]
def create_mapping_dicts(wrd_embedding, filter_corpus=False, bodies=None,
                         headlines=None): 
    """Generate word:index, word:vector, index:word dictionaries. 

    Args: 
    ----
        wrd_embedding: gensim.models.word2vec.Word2Vec fitted model
        filter_corpus (optional): boolean  
            Filter the corpus to only those words seen in the bodies/headlines. 
        bodies (optional): list of lists 
            Must be passed in if `filter_corpus` is True. 
        headlines (optional): list of lists  
            Must be passed in if `filter_corpus` is True. 

    Return: 
    ------
        word_idx_dct: dict
        idx_word_dct: dict
        word_vector_dct: dict
    """

    if filter_corpus:
        if (not bodies or not headlines): 
            excep_str = "Must pass in bodies and headlines with filter_corpus True!"
            raise Exception(excep_str)
        else: 
            wrd_embedding = _filter_corpus(bodies, headlines, wrd_embedding)

    gensim_dct = Dictionary()
    gensim_dct.doc2bow(wrd_embedding.vocab.keys(), allow_update=True)

    # Leave index 0 for the newline character
    word_idx_dct = {wrd: (idx + 1) for idx, wrd in gensim_dct.items()}
    idx_word_dct = {(idx + 1): wrd for idx, wrd in gensim_dct.items()}
    word_idx_dct['\n'] = 0
    idx_word_dct[0] = '\n'

    word_vector_dct = {wrd: wrd_embedding[wrd] for idx, wrd in gensim_dct.items()}
    vec_dim = next(len(value) for value in word_vector_dct.values())
    word_vector_dct['\n'] = np.zeros((vec_dim))

    return word_idx_dct, idx_word_dct, word_vector_dct 
开发者ID:sallamander,项目名称:headline-generation,代码行数:44,代码来源:mappings.py

示例10: create_mapping_dicts

# 需要导入模块: from gensim.corpora.dictionary import Dictionary [as 别名]
# 或者: from gensim.corpora.dictionary.Dictionary import doc2bow [as 别名]
def create_mapping_dicts(wrd_embedding, reviews=None, vocab_size=None):
    """Generate word:index, word:vector, index:word dictionaries. 

    Args: 
    ----
        wrd_embedding: gensim.models.word2vec.Word2Vec fitted model
        reviews (optional): np.array (or array-like) of lists of strings
            Used to filter the vocabulary, either to only those words in `reviews`
            or the most common `vocab_size` words in `reviews` that are also in 
            the `wrd_embedding`.
        vocab_size (optional): int
            Keep only `vocab_size` most common words from the reviews. 

    Return: 
    ------
        word_idx_dct: dict
        idx_word_dct: dict
        word_vector_dct: dict
    """

    if reviews is not None: 
        wrd_embedding = _filter_corpus(wrd_embedding, reviews, vocab_size)

    gensim_dct = Dictionary()
    gensim_dct.doc2bow(wrd_embedding.vocab.keys(), allow_update=True)

    # Leave index 0 for masking the padding, 1 for the end of sequence
    # character (EOS), and 2 for unkown words (denoted 'UNK')
    wrd_idx_dct = {wrd: (idx + 3) for idx, wrd in gensim_dct.items()}
    idx_wrd_dct = {(idx + 3): wrd for idx, wrd in gensim_dct.items()}
    wrd_idx_dct['EOS'] = 1
    idx_wrd_dct[1] = 'EOS'
    wrd_idx_dct['UNK'] = 2
    idx_wrd_dct[2] = 'UNK'

    wrd_vector_dct = {wrd: wrd_embedding[wrd] for idx, wrd in gensim_dct.items()}
    embedding_dim = wrd_embedding.vector_size
    wrd_vector_dct['EOS'] = np.zeros((embedding_dim))
    wrd_vector_dct['UNK'] = np.zeros((embedding_dim))

    return wrd_idx_dct, idx_wrd_dct, wrd_vector_dct 
开发者ID:sallamander,项目名称:review-analysis,代码行数:43,代码来源:mappings.py

示例11: __init__

# 需要导入模块: from gensim.corpora.dictionary import Dictionary [as 别名]
# 或者: from gensim.corpora.dictionary.Dictionary import doc2bow [as 别名]
class tip_rec:

	def __init__(self, num_topics = 15):
		self.numtopics = num_topics
		self.topic_dict = dict(enumerate(np.zeros(num_topics)))
		self.user_dict = {}
		self.model = None
		self.worddict = {}
		self.mydict = None


	def train(self, df):
		self.user_dict = {el:self.topic_dict.copy() for el in df.sender.unique()}
		cv = CV(stop_words='english')
		X = cv.fit_transform(df['context'])
		vocab = cv.vocabulary_.keys()
		self.worddict=dict([(i, s) for i, s in enumerate(vocab)])
		self.mydict = Dictionary()
		self.mydict = self.mydict.from_corpus(matutils.Sparse2Corpus(X, documents_columns=False), id2word=self.worddict)
		self.model = LatentDA.LdaModel(matutils.Sparse2Corpus(X, documents_columns=False), num_topics=self.numtopics, passes=20, id2word=self.worddict)
		for i in df.iterrows():
			if i[1]['context'] == '':
				continue
			else:
				values = new_model[mydict.doc2bow(i[1]['context'].split())]
				for val in values:
					if val[0] in user_dict[i[1].sender].keys():
						if i[1].amt == '':
							continue
						user_dict[i[1].sender][val[0]] += val[1] * float(i[1].amt)
						continue
					user_dict[i[1].sender][val[0]] = val[1]
		for i in user_dict.keys():
			norm_const = sum(user_dict[i].values())
			for j in user_dict[i].keys():
				user_dict[i][j] = user_dict[i][j]/norm_const

	def predict(self, text, username = ''):
		topics = self.model[self.mydict.doc2bow(text.split())]
		doc_aff = np.zeros(self.numtopics)
		for i in topics:
			doc_aff[i[0]] = i[1]
		if username == '':
			returndict = {}
			for user in self.user_dict.keys():
				user_aff = np.array(self.user_dict[user].values())    
				score = np.linalg.norm(user_aff - doc_aff)
				returndict[user] = score
			return returndict
		else:
			user_aff = np.array(self.user_dict[username].values())    
			score = np.linalg.norm(user_aff - doc_aff)
			return (username, score)
开发者ID:nspatil,项目名称:tipworthy,代码行数:55,代码来源:tipworthy_rec.py

示例12: create_mapping_dicts

# 需要导入模块: from gensim.corpora.dictionary import Dictionary [as 别名]
# 或者: from gensim.corpora.dictionary.Dictionary import doc2bow [as 别名]
def create_mapping_dicts(wrd_embedding, filter_corpus=False, bodies=None,
                         headlines=None): 
    """Generate word:index, word:vector, index:word dictionaries. 

    Args: 
    ----
        wrd_embedding: gensim.models.word2vec.Word2Vec fitted model
        filter_corpus (optional): boolean  
            Filter the corpus to only those words seen in the articles. Use
            to speed up iteration during intial building/training phases. 
        bodies (optional): list of lists 
            Must be passed in if `filter_corpus` is True. 
        headlines (optional): list of lists  
            Must be passed in if `filter_corpus` is True. 

    Return: 
    ------
        word_idx_dct: dict
        idx_word_dct: dict
        word_vector_dct: dict
    """

    if filter_corpus:
        if (not bodies or not headlines): 
            raise Exception('Must pass in bodies and headlines with filter_corpus as True!')
        else: 
            wrd_embedding = _filter_corpus(bodies, headlines, wrd_embedding)

    gensim_dct = Dictionary()
    gensim_dct.doc2bow(wrd_embedding.vocab.keys(), allow_update=True)

    word_idx_dct = {wrd: idx for idx, wrd in gensim_dct.items()}
    idx_word_dct = {idx: wrd for idx, wrd in gensim_dct.items()}
    word_vector_dct = {wrd: wrd_embedding[wrd] for idx, wrd in gensim_dct.items()}

    return word_idx_dct, idx_word_dct, word_vector_dct 
开发者ID:JostineHo,项目名称:headline-generation,代码行数:38,代码来源:preprocessing.py

示例13: DigestedDocumentCollection

# 需要导入模块: from gensim.corpora.dictionary import Dictionary [as 别名]
# 或者: from gensim.corpora.dictionary.Dictionary import doc2bow [as 别名]
class DigestedDocumentCollection(CorpusABC):
    """A bag-of-words representation of a corpus (collection of documents).

    This serves as direct input to modeling functions.  It is output from
    preprocessing functions.

    Parameters
    ----------
    corpus: A collection of tokenized documents
        Each document is a list of tokens, tokenized and normalized strings
        (either utf8 or unicode) (e.g. output of topik.SimpleTokenizer)

    Readers iterate over tuples (id, content), but discard id in return (for compatibility with Gensim.)

    """
    def __init__(self, tokenized_corpus):
        self.corpus = tokenized_corpus
        self.dict = Dictionary(tokenized_corpus.get_generator_without_id())
        super(DigestedDocumentCollection, self).__init__()

    def __iter__(self):
        """Discards id field - for compatibility with Gensim."""
        for _id, doc_tokens in self.corpus:
            yield self.dict.doc2bow(doc_tokens)

    def __len__(self):
        return len(self.corpus)

    def get_id2word_dict(self):
        return self.dict

    def save(self, filename):
        self.corpus.save(filename)

    @classmethod
    def load(cls, filename):
        return cls(load_persisted_corpus(filename))

    @property
    def persistor(self):
        return self.corpus.persistor

    @property
    def filter_string(self):
        return self.corpus.filter_string
开发者ID:lewismc,项目名称:topik,代码行数:47,代码来源:digested_document_collection.py

示例14: testing

# 需要导入模块: from gensim.corpora.dictionary import Dictionary [as 别名]
# 或者: from gensim.corpora.dictionary.Dictionary import doc2bow [as 别名]
module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
datapath = lambda fname: os.path.join(module_path, 'test_data', fname)

# set up vars used in testing ("Deerwester" from the web tutorial)
texts = [['human', 'interface', 'computer'],
         ['survey', 'user', 'computer', 'system', 'response', 'time'],
         ['eps', 'user', 'interface', 'system'],
         ['system', 'human', 'system', 'eps'],
         ['user', 'response', 'time'],
         ['trees'],
         ['graph', 'trees'],
         ['graph', 'minors', 'trees'],
         ['graph', 'minors', 'survey']]
dictionary = Dictionary(texts)
corpus = [dictionary.doc2bow(text) for text in texts]
boolean_document_based = ['u_mass']
sliding_window_based = ['c_v', 'c_uci', 'c_npmi']


def testfile():
    # temporary data will be stored to this file
    return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')

def checkCoherenceMeasure(topics1, topics2, coherence):
    """Check provided topic coherence algorithm on given topics"""
    if coherence in boolean_document_based:
        cm1 = CoherenceModel(topics=topics1, corpus=corpus, dictionary=dictionary, coherence=coherence)
        cm2 = CoherenceModel(topics=topics2, corpus=corpus, dictionary=dictionary, coherence=coherence)
    else:
        cm1 = CoherenceModel(topics=topics1, texts=texts, dictionary=dictionary, coherence=coherence)
开发者ID:ArifAhmed1995,项目名称:gensim,代码行数:32,代码来源:test_coherencemodel.py

示例15: Dictionary

# 需要导入模块: from gensim.corpora.dictionary import Dictionary [as 别名]
# 或者: from gensim.corpora.dictionary.Dictionary import doc2bow [as 别名]
logging.info('load the dictionary')
id2word, word2id = utils.loadDictionary(working_corpus + word_ids_extension)
dictionary = Dictionary(word2id=word2id, id2word=id2word)

logging.info('load the log_ent model')
log_ent = LogEntropyModel.load(results_path + norm_model)

logging.info('load the LSI model')
lsi = LsiModel.load(results_path + trans_model)

for key in articles.iterkeys():

    logging.info('current term: %s' % key)

    term_list = articles[key].keys()
    text_list = [dictionary.doc2bow(article['text'], allowUpdate=False, returnMissingWords=False) 
            for article in articles[key].values()]
    sim_matrix = np.zeros((len(text_list), len(text_list)))

    logging.info('transform the textlist')
    text_list = lsi[log_ent[text_list]]

    logging.info('compute similarity matrix')
    for i, par1 in enumerate(text_list):
        for j, par2 in enumerate(text_list):
            sim_matrix[i, j] = matutils.cossim(par1, par2)
    matrices[key] = {}
    matrices[key]['term_list'] = term_list
    matrices[key]['sim_matrix'] = sim_matrix
    assert np.shape(sim_matrix)[0] == len(term_list)
    
开发者ID:dedan,项目名称:dedan_runs,代码行数:32,代码来源:sparql_sim_matrix.py


注:本文中的gensim.corpora.dictionary.Dictionary.doc2bow方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。