本文整理匯總了Python中torchtext.datasets方法的典型用法代碼示例。如果您正苦於以下問題:Python torchtext.datasets方法的具體用法?Python torchtext.datasets怎麽用?Python torchtext.datasets使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類torchtext
的用法示例。
在下文中一共展示了torchtext.datasets方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: sst
# 需要導入模塊: import torchtext [as 別名]
# 或者: from torchtext import datasets [as 別名]
def sst(text_field, label_field, batch_size, **kargs):
train_data, dev_data, test_data = datasets.SST.splits(text_field, label_field, fine_grained=True)
text_field.build_vocab(train_data, dev_data, test_data)
label_field.build_vocab(train_data, dev_data, test_data)
train_iter, dev_iter, test_iter = data.BucketIterator.splits(
(train_data, dev_data, test_data),
batch_sizes=(batch_size,
len(dev_data),
len(test_data)),
**kargs)
return train_iter, dev_iter, test_iter
# load MR dataset
示例2: __init__
# 需要導入模塊: import torchtext [as 別名]
# 或者: from torchtext import datasets [as 別名]
def __init__(self, text_field, label_field, path=None, examples=None, **kwargs):
"""Create an MR dataset instance given a path and fields.
Arguments:
text_field: The field that will be used for text data.
label_field: The field that will be used for label data.
path: Path to the data file.
examples: The examples contain all the data.
Remaining keyword arguments: Passed to the constructor of
data.Dataset.
"""
def clean_str(string):
"""
Tokenization/string cleaning for all datasets except for SST.
Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
"""
string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
string = re.sub(r"\'s", " \'s", string)
string = re.sub(r"\'ve", " \'ve", string)
string = re.sub(r"n\'t", " n\'t", string)
string = re.sub(r"\'re", " \'re", string)
string = re.sub(r"\'d", " \'d", string)
string = re.sub(r"\'ll", " \'ll", string)
string = re.sub(r",", " , ", string)
string = re.sub(r"!", " ! ", string)
string = re.sub(r"\(", " \( ", string)
string = re.sub(r"\)", " \) ", string)
string = re.sub(r"\?", " \? ", string)
string = re.sub(r"\s{2,}", " ", string)
return string.strip()
text_field.preprocessing = data.Pipeline(clean_str)
fields = [('text', text_field), ('label', label_field)]
if examples is None:
path = self.dirname if path is None else path
examples = []
with codecs.open(os.path.join(path, 'rt-polarity.neg'), encoding='utf-8', errors='ignore') as f:
examples += [
data.Example.fromlist([line, 'negative'], fields) for line in f]
with codecs.open(os.path.join(path, 'rt-polarity.pos'), encoding='utf-8', errors='ignore') as f:
examples += [
data.Example.fromlist([line, 'positive'], fields) for line in f]
super(MR, self).__init__(examples, fields, **kwargs)