本文整理汇总了Python中Dataset.Dataset类的典型用法代码示例。如果您正苦于以下问题:Python Dataset类的具体用法?Python Dataset怎么用?Python Dataset使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Dataset类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_data
def load_data(config, cache_byte_size, files_config_key, **kwargs):
"""
:type config: Config
:type cache_byte_size: int
:type chunking: str
:type seq_ordering: str
:rtype: (Dataset,int)
:returns the dataset, and the cache byte size left over if we cache the whole dataset.
"""
if not config.has(files_config_key):
return None, 0
if config.is_typed(files_config_key) and isinstance(config.typed_value(files_config_key), dict):
new_kwargs = config.typed_value(files_config_key)
assert isinstance(new_kwargs, dict)
kwargs.update(new_kwargs)
if 'cache_byte_size' not in new_kwargs:
if kwargs.get('class', None) == 'HDFDataset':
kwargs["cache_byte_size"] = cache_byte_size
Dataset.kwargs_update_from_config(config, kwargs)
data = init_dataset(kwargs)
else:
config_str = config.value(files_config_key, "")
data = init_dataset_via_str(config_str, config=config, cache_byte_size=cache_byte_size, **kwargs)
cache_leftover = 0
if isinstance(data, HDFDataset):
cache_leftover = data.definite_cache_leftover
return data, cache_leftover
示例2: load_data
def load_data(config, cache_byte_size, files_config_key, **kwargs):
"""
:param Config config:
:param int cache_byte_size:
:param str files_config_key: such as "train" or "dev"
:param kwargs: passed on to init_dataset() or init_dataset_via_str()
:rtype: (Dataset,int)
:returns the dataset, and the cache byte size left over if we cache the whole dataset.
"""
if not config.bool_or_other(files_config_key, None):
return None, 0
kwargs = kwargs.copy()
kwargs.setdefault("name", files_config_key)
if config.is_typed(files_config_key) and isinstance(config.typed_value(files_config_key), dict):
config_opts = config.typed_value(files_config_key)
assert isinstance(config_opts, dict)
kwargs.update(config_opts)
if 'cache_byte_size' not in config_opts:
if kwargs.get('class', None) == 'HDFDataset':
kwargs["cache_byte_size"] = cache_byte_size
Dataset.kwargs_update_from_config(config, kwargs)
data = init_dataset(kwargs)
else:
config_str = config.value(files_config_key, "")
data = init_dataset_via_str(config_str, config=config, cache_byte_size=cache_byte_size, **kwargs)
cache_leftover = 0
if isinstance(data, HDFDataset):
cache_leftover = data.definite_cache_leftover
return data, cache_leftover
示例3: test_read_data_points
def test_read_data_points(self):
set = Dataset()
set.read_data_points("flueaeg.txt")
data = set.get_data
self.assertEqual(data[100.0], 16.6)
# here we should see an error printet
set.read_data_points("findes-ikke.txt")
示例4: fixSizeTypes
def fixSizeTypes(dataLines):
fixedDataLines = []
dataset = Dataset(dataLines)
for line in dataset.dataLines:
columns = dataset.getColumns(line)
columns['size_type'] = columns['size_type'].replace(" ", "_")
fixedDataLines.append( dataset.getLine(columns) )
return fixedDataLines
示例5: dev
def dev():
generator = Generator(10)
print("Generating...")
samples, labels = generator.generate(20000)
print("Done generating. Shuffling...")
Generator.shuffle_in_unison_scary(samples, labels)
print("Done shuffling. Splitting...")
db = Dataset()
db.init(samples, labels, 17000, 1500)
print("Done splitting. Saving...")
fileName = 'medium20000_10_shuffled_0.3obstacles.pkl'
db.saveTo(fileName)
print("Done saving to", fileName)
示例6: changeURLs
def changeURLs(dataLines):
changedUrlsLines = []
dataset = Dataset(dataLines)
for line in dataset.dataLines:
columns = dataset.getColumns(line)
columns['url'] = Brand.brandsUrls[ columns['brand'] ]
changedUrlsLines.append( dataset.getLine(columns) )
return changedUrlsLines
示例7: __init__
def __init__(self, id=None, drawing=None,
posX=0, posY=0,
x1=0, y1=0, x2=0, y2=0,
pen=None, brush=None):
Dataset.__init__(self, id)
self.drawing = drawing
self.posX = posX
self.posY = posY
self.x1 = x1
self.y1 = y1
self.x2 = x2
self.y2 = y2
self.pen = pen
self.brush = brush
示例8: mergeSynonymousSizeTypes
def mergeSynonymousSizeTypes(dataLines):
mergedDataLines = []
dataset = Dataset(dataLines)
for line in dataset.dataLines:
columns = dataset.getColumns(line)
if columns['size_type'] in SizeType.mergedSizeTypes:
columns['size_type'] = SizeType.mergedSizeTypes[ columns['size_type'] ]
mergedDataLines.append( dataset.getLine(columns) )
return mergedDataLines
示例9: train_set_loss_vars_for_cur_batches
def train_set_loss_vars_for_cur_batches(self):
"""
Called via Engine.SeqTrainParallelControl.
"""
assert self.train_have_loss_for_cur_batches()
# See EngineUtil.assign_dev_data for reference.
from Dataset import Dataset
n_time, n_batch = Dataset.index_shape_for_batches(self.train_batches)
n_output_dim = self.output_layer.attrs['n_out']
output_loss = numpy.zeros((n_batch,), "float32")
output_hat_y = numpy.zeros((n_time, n_batch, n_output_dim), "float32")
offset_slice = 0
for batch in self.train_batches:
for seq in batch.seqs:
o = seq.batch_frame_offset
q = seq.batch_slice + offset_slice
l = seq.frame_length
# input-data, input-index will also be set in this loop. That is data-key "data".
for k in [self.output_target]:
if l[k] == 0: continue
loss, hat_y = self.get_loss_and_hat_y(seq.seq_idx)
assert seq.seq_start_frame[k] < hat_y.shape[0]
assert seq.seq_end_frame[k] <= hat_y.shape[0]
output_loss[q] += loss * float(l[k]) / hat_y.shape[0]
output_hat_y[o[k]:o[k] + l[k], q] = hat_y[seq.seq_start_frame[k]:seq.seq_end_frame[k]]
self.output_var_loss.set_value(output_loss)
self.output_var_hat_y.set_value(output_hat_y)
示例10: clone
def clone():
print 'Enter name for new dataset:'
dsname = raw_input()
os.system('mkdir %s' % dsname)
ds = Dataset()
db = getDB()
cur = db.cursor(MySQLdb.cursors.DictCursor)
cur.execute("SELECT * FROM Answers WHERE isRetrieved=1")
for row in cur.fetchall():
ds.X.append(row['answer'])
ds.Y.append(row['author'])
ds.ts = max([ds.ts, row['updated_at']])
with open('%s/data' % dsname, 'w') as f:
cPickle.dump(ds, f)
print 'Dataset cloned'
示例11: getXAtMaxIm
def getXAtMaxIm(dataset: Dataset):
data = dataset.getPlane()
curMin = data[0][0]
bestCycle = 0
for cycle in range(len(data)):
if max(data[cycle]) > curMin:
curMin = max(data[cycle])
bestCycle = cycle
return bestCycle
示例12: doUpperCase
def doUpperCase(dataLines):
upperCaseLines = []
dataset = Dataset(dataLines)
for line in dataset.dataLines:
columns = dataset.getColumns(line)
columns['size_type'] = columns['size_type'].upper()
columns['label'] = columns['label'].upper()
columns['brand'] = columns['brand'].upper()
columns['clothe_category'] = columns['clothe_category'].upper()
columns['size_category'] = columns['size_category'].upper()
columns['gender'] = columns['gender'].upper()
upperCaseLines.append( dataset.getLine(columns) )
return upperCaseLines
示例13: get_all_dataset
def get_all_dataset(self, idmodel):
con = lite.connect(self.name)
with con:
con.row_factory = lite.Row
cur = con.cursor()
cur.execute("SELECT * FROM dataset where idmodel=:idmodel", {'idmodel': idmodel})
rows = cur.fetchall()
dataset = Dataset.from_db_rows(rows, self.setup)
return dataset
示例14: main
def main():
global X
global Y
ds = Dataset.open('quora')
X,Y = ds.X,ds.Y
# Z = [re.findall(r"[\w']+", x) for x in X]
# Z = [filter(None, x.split('.')) for x in X]
# Z = ["".join(s) for s in Z]
# Z = [z.split(' ') for z in Z]
# Z = [[len(s) for s in z] for z in Z]
# feature = []
# for a in Z:
# wordLenDist = [0]*100
# for ln in a:
# wordLenDist[ln]+=1
# feature.append(wordLenDist)
feature = []
tokenizer = RegexpTokenizer(r'\w+')
for x in X:
All = len(nltk.word_tokenize(x))
numPunctuation = All - len(tokenizer.tokenize(x))
numWords = All - numPunctuation
ff = [numPunctuation, numWords]
feature.append(ff)
X = feature
Z = zip(X, Y)
shuffle(Z)
(X, Y) = zip(*Z)
si=0
acc = 0.0
cnt = 0
while si<len(X):
Xe = X[si:si+50]
Ye = Y[si:si+50]
X1 = X[:si] + X[si+50:]
Y1 = Y[:si] + Y[si+50:]
acc += train_chunk(X1, Y1, Xe, Ye)
cnt += 1
si += 50
print 'Accuracy: %f' % (acc/cnt)
示例15: benchmark
def benchmark(lstm_unit, use_gpu):
"""
:param str lstm_unit: e.g. "LSTMBlock", one of LstmCellTypes
:param bool use_gpu:
:return: runtime in seconds of the training itself, excluding initialization
:rtype: float
"""
device = {True: "GPU", False: "CPU"}[use_gpu]
key = "%s:%s" % (device, lstm_unit)
print(">>> Start benchmark for %s." % key)
config = Config()
config.update(make_config_dict(lstm_unit=lstm_unit, use_gpu=use_gpu))
dataset_kwargs = config.typed_value("train")
Dataset.kwargs_update_from_config(config, dataset_kwargs)
dataset = init_dataset(dataset_kwargs)
engine = Engine(config=config)
engine.init_train_from_config(config=config, train_data=dataset)
print(">>> Start training now for %s." % key)
start_time = time.time()
engine.train()
runtime = time.time() - start_time
print(">>> Runtime of %s: %s" % (key, hms_fraction(runtime)))
engine.finalize()
return runtime