本文整理汇总了Python中keras.datasets.imdb.get_word_index方法的典型用法代码示例。如果您正苦于以下问题:Python imdb.get_word_index方法的具体用法?Python imdb.get_word_index怎么用?Python imdb.get_word_index使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类keras.datasets.imdb
的用法示例。
在下文中一共展示了imdb.get_word_index方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_reuters
# 需要导入模块: from keras.datasets import imdb [as 别名]
# 或者: from keras.datasets.imdb import get_word_index [as 别名]
def test_reuters():
# only run data download tests 20% of the time
# to speed up frequent testing
random.seed(time.time())
if random.random() > 0.8:
(x_train, y_train), (x_test, y_test) = reuters.load_data()
assert len(x_train) == len(y_train)
assert len(x_test) == len(y_test)
assert len(x_train) + len(x_test) == 11228
(x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10)
assert len(x_train) == len(y_train)
assert len(x_test) == len(y_test)
word_index = reuters.get_word_index()
assert isinstance(word_index, dict)
示例2: test_imdb
# 需要导入模块: from keras.datasets import imdb [as 别名]
# 或者: from keras.datasets.imdb import get_word_index [as 别名]
def test_imdb():
# only run data download tests 20% of the time
# to speed up frequent testing
random.seed(time.time())
if random.random() > 0.8:
(x_train, y_train), (x_test, y_test) = imdb.load_data()
(x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40)
assert len(x_train) == len(y_train)
assert len(x_test) == len(y_test)
word_index = imdb.get_word_index()
assert isinstance(word_index, dict)
示例3: load_data
# 需要导入模块: from keras.datasets import imdb [as 别名]
# 或者: from keras.datasets.imdb import get_word_index [as 别名]
def load_data(data_source):
assert data_source in ["keras_data_set", "local_dir"], "Unknown data source"
if data_source == "keras_data_set":
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_words, start_char=None,
oov_char=None, index_from=None)
x_train = sequence.pad_sequences(x_train, maxlen=sequence_length, padding="post", truncating="post")
x_test = sequence.pad_sequences(x_test, maxlen=sequence_length, padding="post", truncating="post")
vocabulary = imdb.get_word_index()
vocabulary_inv = dict((v, k) for k, v in vocabulary.items())
vocabulary_inv[0] = "<PAD/>"
else:
x, y, vocabulary, vocabulary_inv_list = data_helpers.load_data()
vocabulary_inv = {key: value for key, value in enumerate(vocabulary_inv_list)}
y = y.argmax(axis=1)
# Shuffle data
shuffle_indices = np.random.permutation(np.arange(len(y)))
x = x[shuffle_indices]
y = y[shuffle_indices]
train_len = int(len(x) * 0.9)
x_train = x[:train_len]
y_train = y[:train_len]
x_test = x[train_len:]
y_test = y[train_len:]
return x_train, y_train, x_test, y_test, vocabulary_inv
# Data Preparation
示例4: get_vectors_from_text
# 需要导入模块: from keras.datasets import imdb [as 别名]
# 或者: from keras.datasets.imdb import get_word_index [as 别名]
def get_vectors_from_text(dataset_list,word_to_ind=imdb.get_word_index(),
start_char=1,
index_from=3,
maxlen=400,
num_words=5000,
oov_char=2,skip_top=0):
'''
Gets the list vector mapped according to the word to indices dictionary.
@param
dataset_list = list of review texts in unicode format
word_to_ind = word to indices dictionary
hyperparameters: start_char-->sentence starting after this char.
index_from-->indices below this will not be encoded.
max-len-->maximum length of the sequence to be considered.
num_words-->number of words to be considered according to the rank.Rank is
given according to the frequency of occurence
oov_char-->out of variable character.
skip_top-->no of top rank words to be skipped
@returns:
x_train: Final list of vectors(as list) of the review texts
'''
x_train = []
for review_string in dataset_list:
review_string_list = text_to_word_sequence(review_string)
review_string_list = [ele for ele in review_string_list]
x_predict = []
for i in range(len(review_string_list)):
if review_string_list[i] not in word_to_ind:
continue
x_predict.append(word_to_ind[review_string_list[i]])
x_train.append((x_predict))
# add te start char and also take care of indexfrom
if start_char is not None:
x_train = [[start_char] + [w + index_from for w in x] for x in x_train]
elif index_from:
x_train = [[w + index_from for w in x] for x in x_train]
# only maxlen is out criteria
x_train=[ele[:maxlen] for ele in x_train]
# if num is not given take care
if not num_words:
num_words = max([max(x) for x in x_train])
# by convention, use 2 as OOV word
# reserve 'index_from' (=3 by default) characters:
# 0 (padding), 1 (start), 2 (OOV)
if oov_char is not None:
x_train = [[w if (skip_top <= w < num_words) else oov_char for w in x] for x in x_train]
else:
x_train = [[w for w in x if (skip_top <= w < num_words)] for x in x_train]
# padd the sequences
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
# return the vectors form of the text
return x_train
开发者ID:Azure-Samples,项目名称:MachineLearningSamples-SentimentAnalysis,代码行数:56,代码来源:SentimentExtraction.py