本文整理汇总了Java中org.datavec.api.records.reader.RecordReader类的典型用法代码示例。如果您正苦于以下问题:Java RecordReader类的具体用法?Java RecordReader怎么用?Java RecordReader使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
RecordReader类属于org.datavec.api.records.reader包,在下文中一共展示了RecordReader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: test
import org.datavec.api.records.reader.RecordReader; //导入依赖的package包/类
@Test
public void test() throws Exception {
CSVRecordReader rr = new CSVRecordReader(0, ',');
rr.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));
CSVRecordReader rr2 = new CSVRecordReader(0, ',');
rr2.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));
RecordReader rrC = new ConcatenatingRecordReader(rr, rr2);
int count = 0;
while(rrC.hasNext()){
rrC.next();
count++;
}
assertEquals(300, count);
}
示例2: createDataSource
import org.datavec.api.records.reader.RecordReader; //导入依赖的package包/类
private void createDataSource() throws IOException, InterruptedException {
//First: get the dataset using the record reader. CSVRecordReader handles loading/parsing
int numLinesToSkip = 0;
String delimiter = ",";
RecordReader recordReader = new CSVRecordReader(numLinesToSkip, delimiter);
recordReader.initialize(new InputStreamInputSplit(dataFile));
//Second: the RecordReaderDataSetIterator handles conversion to DataSet objects, ready for use in neural network
int labelIndex = 4; //5 values in each row of the iris.txt CSV: 4 input features followed by an integer label (class) index. Labels are the 5th value (index 4) in each row
int numClasses = 3; //3 classes (types of iris flowers) in the iris data set. Classes have integer values 0, 1 or 2
DataSetIterator iterator = new RecordReaderDataSetIterator(recordReader, batchSize, labelIndex, numClasses);
DataSet allData = iterator.next();
allData.shuffle();
SplitTestAndTrain testAndTrain = allData.splitTestAndTrain(0.80); //Use 80% of data for training
trainingData = testAndTrain.getTrain();
testData = testAndTrain.getTest();
//We need to normalize our data. We'll use NormalizeStandardize (which gives us mean 0, unit variance):
DataNormalization normalizer = new NormalizerStandardize();
normalizer.fit(trainingData); //Collect the statistics (mean/stdev) from the training data. This does not modify the input data
normalizer.transform(trainingData); //Apply normalization to the training data
normalizer.transform(testData); //Apply normalization to the test data. This is using statistics calculated from the *training* set
}
示例3: createDataSource
import org.datavec.api.records.reader.RecordReader; //导入依赖的package包/类
private void createDataSource() throws IOException, InterruptedException {
//First: get the dataset using the record reader. CSVRecordReader handles loading/parsing
int numLinesToSkip = 0;
String delimiter = ",";
RecordReader recordReader = new CSVRecordReader(numLinesToSkip, delimiter);
recordReader.initialize(new InputStreamInputSplit(dataFile));
//Second: the RecordReaderDataSetIterator handles conversion to DataSet objects, ready for use in neural network
int labelIndex = 11;
DataSetIterator iterator = new RecordReaderDataSetIterator(recordReader, batchSize, labelIndex, labelIndex, true);
DataSet allData = iterator.next();
SplitTestAndTrain testAndTrain = allData.splitTestAndTrain(0.80); //Use 80% of data for training
trainingData = testAndTrain.getTrain();
testData = testAndTrain.getTest();
//We need to normalize our data. We'll use NormalizeStandardize (which gives us mean 0, unit variance):
DataNormalization normalizer = new NormalizerStandardize();
normalizer.fit(trainingData); //Collect the statistics (mean/stdev) from the training data. This does not modify the input data
normalizer.transform(trainingData); //Apply normalization to the training data
normalizer.transform(testData); //Apply normalization to the test data. This is using statistics calculated from the *training* set
}
示例4: testReader
import org.datavec.api.records.reader.RecordReader; //导入依赖的package包/类
@Test
public void testReader() throws Exception {
TfidfVectorizer vectorizer = new TfidfVectorizer();
Configuration conf = new Configuration();
conf.setInt(TfidfVectorizer.MIN_WORD_FREQUENCY, 1);
conf.setBoolean(RecordReader.APPEND_LABEL, true);
vectorizer.initialize(conf);
TfidfRecordReader reader = new TfidfRecordReader();
reader.initialize(conf, new FileSplit(new ClassPathResource("labeled").getFile()));
int count = 0;
int[] labelAssertions = new int[3];
while (reader.hasNext()) {
Collection<Writable> record = reader.next();
Iterator<Writable> recordIter = record.iterator();
NDArrayWritable writable = (NDArrayWritable) recordIter.next();
labelAssertions[count] = recordIter.next().toInt();
count++;
}
assertArrayEquals(new int[] {0, 1, 2}, labelAssertions);
assertEquals(3, reader.getLabels().size());
assertEquals(3, count);
}
示例5: testRecordMetaData
import org.datavec.api.records.reader.RecordReader; //导入依赖的package包/类
@Test
public void testRecordMetaData() throws Exception {
TfidfVectorizer vectorizer = new TfidfVectorizer();
Configuration conf = new Configuration();
conf.setInt(TfidfVectorizer.MIN_WORD_FREQUENCY, 1);
conf.setBoolean(RecordReader.APPEND_LABEL, true);
vectorizer.initialize(conf);
TfidfRecordReader reader = new TfidfRecordReader();
reader.initialize(conf, new FileSplit(new ClassPathResource("labeled").getFile()));
while (reader.hasNext()) {
Record record = reader.nextRecord();
assertNotNull(record.getMetaData().getURI());
assertEquals(record.getMetaData().getReaderClass(), TfidfRecordReader.class);
}
}
示例6: testReadRecordFromMetaData
import org.datavec.api.records.reader.RecordReader; //导入依赖的package包/类
@Test
public void testReadRecordFromMetaData() throws Exception {
TfidfVectorizer vectorizer = new TfidfVectorizer();
Configuration conf = new Configuration();
conf.setInt(TfidfVectorizer.MIN_WORD_FREQUENCY, 1);
conf.setBoolean(RecordReader.APPEND_LABEL, true);
vectorizer.initialize(conf);
TfidfRecordReader reader = new TfidfRecordReader();
reader.initialize(conf, new FileSplit(new ClassPathResource("labeled").getFile()));
Record record = reader.nextRecord();
Record reread = reader.loadFromMetaData(record.getMetaData());
assertEquals(record.getRecord().size(), 2);
assertEquals(reread.getRecord().size(), 2);
assertEquals(record.getRecord().get(0), reread.getRecord().get(0));
assertEquals(record.getRecord().get(1), reread.getRecord().get(1));
assertEquals(record.getMetaData(), reread.getMetaData());
}
示例7: testReadingJson
import org.datavec.api.records.reader.RecordReader; //导入依赖的package包/类
@Test
public void testReadingJson() throws Exception {
//Load 3 values from 3 JSON files
//stricture: a:value, b:value, c:x:value, c:y:value
//And we want to load only a:value, b:value and c:x:value
//For first JSON file: all values are present
//For second JSON file: b:value is missing
//For third JSON file: c:x:value is missing
ClassPathResource cpr = new ClassPathResource("json/json_test_0.txt");
String path = cpr.getFile().getAbsolutePath().replace("0", "%d");
InputSplit is = new NumberedFileInputSplit(path, 0, 2);
RecordReader rr = new JacksonRecordReader(getFieldSelection(), new ObjectMapper(new JsonFactory()));
rr.initialize(is);
testJacksonRecordReader(rr);
}
示例8: testJacksonRecordReader
import org.datavec.api.records.reader.RecordReader; //导入依赖的package包/类
private static void testJacksonRecordReader(RecordReader rr) {
List<Writable> json0 = rr.next();
List<Writable> exp0 = Arrays.asList((Writable) new Text("aValue0"), new Text("bValue0"), new Text("cxValue0"));
assertEquals(exp0, json0);
List<Writable> json1 = rr.next();
List<Writable> exp1 =
Arrays.asList((Writable) new Text("aValue1"), new Text("MISSING_B"), new Text("cxValue1"));
assertEquals(exp1, json1);
List<Writable> json2 = rr.next();
List<Writable> exp2 =
Arrays.asList((Writable) new Text("aValue2"), new Text("bValue2"), new Text("MISSING_CX"));
assertEquals(exp2, json2);
assertFalse(rr.hasNext());
//Test reset
rr.reset();
assertEquals(exp0, rr.next());
assertEquals(exp1, rr.next());
assertEquals(exp2, rr.next());
assertFalse(rr.hasNext());
}
示例9: testRegexLineRecordReader
import org.datavec.api.records.reader.RecordReader; //导入依赖的package包/类
@Test
public void testRegexLineRecordReader() throws Exception {
String regex = "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (\\d+) ([A-Z]+) (.*)";
RecordReader rr = new RegexLineRecordReader(regex, 1);
rr.initialize(new FileSplit(new ClassPathResource("/logtestdata/logtestfile0.txt").getFile()));
List<Writable> exp0 = Arrays.asList((Writable) new Text("2016-01-01 23:59:59.001"), new Text("1"),
new Text("DEBUG"), new Text("First entry message!"));
List<Writable> exp1 = Arrays.asList((Writable) new Text("2016-01-01 23:59:59.002"), new Text("2"),
new Text("INFO"), new Text("Second entry message!"));
List<Writable> exp2 = Arrays.asList((Writable) new Text("2016-01-01 23:59:59.003"), new Text("3"),
new Text("WARN"), new Text("Third entry message!"));
assertEquals(exp0, rr.next());
assertEquals(exp1, rr.next());
assertEquals(exp2, rr.next());
assertFalse(rr.hasNext());
//Test reset:
rr.reset();
assertEquals(exp0, rr.next());
assertEquals(exp1, rr.next());
assertEquals(exp2, rr.next());
assertFalse(rr.hasNext());
}
示例10: testCsvRRSerializationResults
import org.datavec.api.records.reader.RecordReader; //导入依赖的package包/类
@Test
public void testCsvRRSerializationResults() throws Exception {
int skipLines = 3;
RecordReader r1 = new CSVRecordReader(skipLines, '\t');
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream os = new ObjectOutputStream(baos);
os.writeObject(r1);
byte[] bytes = baos.toByteArray();
ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(bytes));
RecordReader r2 = (RecordReader) ois.readObject();
File f = new ClassPathResource("iris_tab_delim.txt").getFile();
r1.initialize(new FileSplit(f));
r2.initialize(new FileSplit(f));
int count = 0;
while(r1.hasNext()){
List<Writable> n1 = r1.next();
List<Writable> n2 = r2.next();
assertEquals(n1, n2);
count++;
}
assertEquals(150-skipLines, count);
}
示例11: testCsvRecordReader
import org.datavec.api.records.reader.RecordReader; //导入依赖的package包/类
@Test
public void testCsvRecordReader() throws Exception {
SerializerInstance si = sc.env().serializer().newInstance();
assertTrue(si instanceof KryoSerializerInstance);
RecordReader r1 = new CSVRecordReader(1,'\t');
RecordReader r2 = serDe(r1, si);
File f = new ClassPathResource("iris_tab_delim.txt").getFile();
r1.initialize(new FileSplit(f));
r2.initialize(new FileSplit(f));
while(r1.hasNext()){
assertEquals(r1.next(), r2.next());
}
assertFalse(r2.hasNext());
}
示例12: Pan15DataSetIterator
import org.datavec.api.records.reader.RecordReader; //导入依赖的package包/类
/**
* Main constructor
*
* @param recordReader the recordreader to use
* @param converter the batch size
* @param maxNumBatches Maximum number of batches to return
* @param labelIndexFrom the index of the label (for classification), or the first index of the labels for multi-output regression
* @param labelIndexTo only used if regression == true. The last index _inclusive_ of the multi-output regression
* @param numPossibleLabels the number of possible labels for classification. Not used if regression == true
* @param regression if true: regression. If false: classification (assume labelIndexFrom is a
*/
private Pan15DataSetIterator(RecordReader recordReader, WritableConverter converter, int batchSize, int labelIndexFrom,
int labelIndexTo, int numPossibleLabels, int maxNumBatches, boolean regression,
Language language, Model model) {
super(recordReader, batchSize, maxNumBatches);
this.recordReader = recordReader;
this.converter = converter;
this.batchSize = batchSize;
this.maxNumBatches = maxNumBatches;
this.labelIndex = labelIndexFrom;
this.labelIndexTo = labelIndexTo;
this.numPossibleLabels = numPossibleLabels;
this.regression = regression;
this.language = language;
this.model = model;
}
示例13: getRecordReader
import org.datavec.api.records.reader.RecordReader; //导入依赖的package包/类
@Override
public RecordReader getRecordReader(long rngSeed, int[] imgDim, DataSetType set, ImageTransform imageTransform) {
try {
Random rng = new Random(rngSeed);
File datasetPath = getDataSetPath(set);
FileSplit data = new FileSplit(datasetPath, BaseImageLoader.ALLOWED_FORMATS, rng);
ObjectDetectionRecordReader recordReader = new ObjectDetectionRecordReader(imgDim[1], imgDim[0], imgDim[2],
imgDim[4], imgDim[3], null);
recordReader.initialize(data);
return recordReader;
} catch (IOException e) {
throw new RuntimeException("Could not download SVHN", e);
}
}
示例14: RecordReaderMultiDataSetIterator
import org.datavec.api.records.reader.RecordReader; //导入依赖的package包/类
private RecordReaderMultiDataSetIterator(Builder builder) {
this.batchSize = builder.batchSize;
this.alignmentMode = builder.alignmentMode;
this.recordReaders = builder.recordReaders;
this.sequenceRecordReaders = builder.sequenceRecordReaders;
this.inputs.addAll(builder.inputs);
this.outputs.addAll(builder.outputs);
this.timeSeriesRandomOffset = builder.timeSeriesRandomOffset;
if (this.timeSeriesRandomOffset) {
timeSeriesRandomOffsetRng = new Random(builder.timeSeriesRandomOffsetSeed);
}
if(recordReaders != null){
for(RecordReader rr : recordReaders.values()){
resetSupported &= rr.resetSupported();
}
}
if(sequenceRecordReaders != null){
for(SequenceRecordReader srr : sequenceRecordReaders.values()){
resetSupported &= srr.resetSupported();
}
}
}
示例15: testLRN
import org.datavec.api.records.reader.RecordReader; //导入依赖的package包/类
@Test
public void testLRN() throws Exception {
List<String> labels = new ArrayList<>(Arrays.asList("Zico", "Ziwang_Xu"));
String rootDir = new ClassPathResource("lfwtest").getFile().getAbsolutePath();
RecordReader reader = new ImageRecordReader(28, 28, 3);
reader.initialize(new FileSplit(new File(rootDir)));
DataSetIterator recordReader = new RecordReaderDataSetIterator(reader, 10, 1, labels.size());
labels.remove("lfwtest");
NeuralNetConfiguration.ListBuilder builder = (NeuralNetConfiguration.ListBuilder) incompleteLRN();
builder.setInputType(InputType.convolutional(28, 28, 3));
MultiLayerConfiguration conf = builder.build();
ConvolutionLayer layer2 = (ConvolutionLayer) conf.getConf(3).getLayer();
assertEquals(6, layer2.getNIn());
}