当前位置: 首页>>代码示例>>Python>>正文


Python imdb.get_word_index方法代码示例

本文整理汇总了Python中keras.datasets.imdb.get_word_index方法的典型用法代码示例。如果您正苦于以下问题:Python imdb.get_word_index方法的具体用法?Python imdb.get_word_index怎么用?Python imdb.get_word_index使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在keras.datasets.imdb的用法示例。


在下文中一共展示了imdb.get_word_index方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_reuters

# 需要导入模块: from keras.datasets import imdb [as 别名]
# 或者: from keras.datasets.imdb import get_word_index [as 别名]
def test_reuters():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = reuters.load_data()
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test)
        assert len(x_train) + len(x_test) == 11228
        (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10)
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test)
        word_index = reuters.get_word_index()
        assert isinstance(word_index, dict) 
开发者ID:hello-sea,项目名称:DeepLearning_Wavelet-LSTM,代码行数:16,代码来源:test_datasets.py

示例2: test_imdb

# 需要导入模块: from keras.datasets import imdb [as 别名]
# 或者: from keras.datasets.imdb import get_word_index [as 别名]
def test_imdb():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = imdb.load_data()
        (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40)
        assert len(x_train) == len(y_train)
        assert len(x_test) == len(y_test)
        word_index = imdb.get_word_index()
        assert isinstance(word_index, dict) 
开发者ID:hello-sea,项目名称:DeepLearning_Wavelet-LSTM,代码行数:13,代码来源:test_datasets.py

示例3: load_data

# 需要导入模块: from keras.datasets import imdb [as 别名]
# 或者: from keras.datasets.imdb import get_word_index [as 别名]
def load_data(data_source):
    assert data_source in ["keras_data_set", "local_dir"], "Unknown data source"
    if data_source == "keras_data_set":
        (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_words, start_char=None,
                                                              oov_char=None, index_from=None)

        x_train = sequence.pad_sequences(x_train, maxlen=sequence_length, padding="post", truncating="post")
        x_test = sequence.pad_sequences(x_test, maxlen=sequence_length, padding="post", truncating="post")

        vocabulary = imdb.get_word_index()
        vocabulary_inv = dict((v, k) for k, v in vocabulary.items())
        vocabulary_inv[0] = "<PAD/>"
    else:
        x, y, vocabulary, vocabulary_inv_list = data_helpers.load_data()
        vocabulary_inv = {key: value for key, value in enumerate(vocabulary_inv_list)}
        y = y.argmax(axis=1)

        # Shuffle data
        shuffle_indices = np.random.permutation(np.arange(len(y)))
        x = x[shuffle_indices]
        y = y[shuffle_indices]
        train_len = int(len(x) * 0.9)
        x_train = x[:train_len]
        y_train = y[:train_len]
        x_test = x[train_len:]
        y_test = y[train_len:]

    return x_train, y_train, x_test, y_test, vocabulary_inv


# Data Preparation 
开发者ID:alexander-rakhlin,项目名称:CNN-for-Sentence-Classification-in-Keras,代码行数:33,代码来源:sentiment_cnn.py

示例4: get_vectors_from_text

# 需要导入模块: from keras.datasets import imdb [as 别名]
# 或者: from keras.datasets.imdb import get_word_index [as 别名]
def get_vectors_from_text(dataset_list,word_to_ind=imdb.get_word_index(),
                           start_char=1,
                           index_from=3,
                           maxlen=400,
                           num_words=5000,
                          oov_char=2,skip_top=0):
    '''
    Gets the list vector mapped according to the word to indices dictionary.
    
    @param
        dataset_list = list of review texts in unicode format
        word_to_ind = word to indices dictionary
        hyperparameters: start_char-->sentence starting after this char.
                        index_from-->indices below this will not be encoded.
                        max-len-->maximum length of the sequence to be considered.
                        num_words-->number of words to be considered according to the rank.Rank is
                                    given according to the frequency of occurence
                        oov_char-->out of variable character.
                        skip_top-->no of top rank words to be skipped
    @returns:
        x_train:       Final list of vectors(as list) of the review texts
    '''
    x_train = []
    for review_string in dataset_list:
        review_string_list = text_to_word_sequence(review_string)
        review_string_list = [ele for ele in review_string_list]
        
        x_predict = []
        for i in range(len(review_string_list)):
            if review_string_list[i] not in word_to_ind:
                continue
            x_predict.append(word_to_ind[review_string_list[i]])
        x_train.append((x_predict))
    # add te start char and also take care of indexfrom
    if start_char is not None:
        x_train = [[start_char] + [w + index_from for w in x] for x in x_train]
    elif index_from:
        x_train = [[w + index_from for w in x] for x in x_train]
    # only maxlen is out criteria
    x_train=[ele[:maxlen] for ele in x_train]
    # if num is not given take care
    if not num_words:
        num_words = max([max(x) for x in x_train])
    # by convention, use 2 as OOV word
    # reserve 'index_from' (=3 by default) characters:
    # 0 (padding), 1 (start), 2 (OOV)
    if oov_char is not None:
        x_train = [[w if (skip_top <= w < num_words) else oov_char for w in x] for x in x_train]
    else:
        x_train = [[w for w in x if (skip_top <= w < num_words)] for x in x_train]
    # padd the sequences
    x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
    # return the vectors form of the text
    return x_train 
开发者ID:Azure-Samples,项目名称:MachineLearningSamples-SentimentAnalysis,代码行数:56,代码来源:SentimentExtraction.py


注:本文中的keras.datasets.imdb.get_word_index方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。