本文整理汇总了Java中org.datavec.api.util.ClassPathResource.getFile方法的典型用法代码示例。如果您正苦于以下问题:Java ClassPathResource.getFile方法的具体用法?Java ClassPathResource.getFile怎么用?Java ClassPathResource.getFile使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.datavec.api.util.ClassPathResource
的用法示例。
在下文中一共展示了ClassPathResource.getFile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: before
import org.datavec.api.util.ClassPathResource; //导入方法依赖的package包/类
@Before
public void before() throws Exception {
if (vec == null) {
ClassPathResource resource = new ClassPathResource("/labeled/");
File file = resource.getFile();
SentenceIterator iter = UimaSentenceIterator.createWithPath(file.getAbsolutePath());
new File("cache.ser").delete();
TokenizerFactory t = new UimaTokenizerFactory();
vec = new Word2Vec.Builder().minWordFrequency(1).iterations(5).layerSize(100)
.stopWords(new ArrayList<String>()).useUnknown(true).windowSize(5).iterate(iter)
.tokenizerFactory(t).build();
vec.fit();
}
}
示例2: testHasNext
import org.datavec.api.util.ClassPathResource; //导入方法依赖的package包/类
@Test
public void testHasNext() throws Exception {
ClassPathResource reuters5250 = new ClassPathResource("/reuters/5250");
File f = reuters5250.getFile();
StreamLineIterator iterator = new StreamLineIterator.Builder(new FileInputStream(f)).setFetchSize(100).build();
int cnt = 0;
while (iterator.hasNext()) {
String line = iterator.nextSentence();
assertNotEquals(null, line);
logger.info("Line: " + line);
cnt++;
}
assertEquals(24, cnt);
}
示例3: testLoadedIterator1
import org.datavec.api.util.ClassPathResource; //导入方法依赖的package包/类
@Test
public void testLoadedIterator1() throws Exception {
ClassPathResource resource = new ClassPathResource("/big/raw_sentences.txt");
File file = resource.getFile();
BasicLineIterator iterator = new BasicLineIterator(file);
PrefetchingSentenceIterator fetcher =
new PrefetchingSentenceIterator.Builder(iterator).setFetchSize(1000).build();
log.info("Phase 1 starting");
int cnt = 0;
while (fetcher.hasNext()) {
String line = fetcher.nextSentence();
// we'll imitate some workload in current thread by using ThreadSleep.
// there's no need to keep it enabled forever, just uncomment next line if you're going to test this iterator.
// otherwise this test will
// Thread.sleep(0, 10);
cnt++;
if (cnt % 10000 == 0)
log.info("Line processed: " + cnt);
}
}
示例4: testNextDocument
import org.datavec.api.util.ClassPathResource; //导入方法依赖的package包/类
/**
* Checks actual number of documents retrieved by DocumentIterator
* @throws Exception
*/
@Test
public void testNextDocument() throws Exception {
ClassPathResource reuters5250 = new ClassPathResource("/reuters/5250");
File f = reuters5250.getFile();
DocumentIterator iter = new FileDocumentIterator(f.getAbsolutePath());
log.info(f.getAbsolutePath());
int cnt = 0;
while (iter.hasNext()) {
InputStream stream = iter.nextDocument();
stream.close();
cnt++;
}
assertEquals(24, cnt);
}
示例5: trainParagraghVecModel
import org.datavec.api.util.ClassPathResource; //导入方法依赖的package包/类
public void trainParagraghVecModel(String locationToSave) throws FileNotFoundException {
ClassPathResource resource = new ClassPathResource("/paragraphVectors/paragraphVectorTraining.txt");
File file = resource.getFile();
SentenceIterator iter = new BasicLineIterator(file);
AbstractCache<VocabWord> cache = new AbstractCache<VocabWord>();
TokenizerFactory t = new DefaultTokenizerFactory();
t.setTokenPreProcessor(new CommonPreprocessor());
/*
if you don't have LabelAwareIterator handy, you can use synchronized labels generator
it will be used to label each document/sequence/line with it's own label.
But if you have LabelAwareIterator ready, you can can provide it, for your in-house labels
*/
LabelsSource source = new LabelsSource("DOC_");
ParagraphVectors vec = new ParagraphVectors.Builder()
.minWordFrequency(1)
.iterations(100)
.epochs(1)
.layerSize(50)
.learningRate(0.02)
.labelsSource(source)
.windowSize(5)
.iterate(iter)
.trainWordVectors(true)
.vocabCache(cache)
.tokenizerFactory(t)
.sampling(0)
.build();
vec.fit();
WordVectorSerializer.writeParagraphVectors(vec, locationToSave);
}
示例6: before
import org.datavec.api.util.ClassPathResource; //导入方法依赖的package包/类
@Before
public void before() throws Exception {
ClassPathResource resource = new ClassPathResource("/raw_sentences.txt");
File file = resource.getFile();
iter = new LineSentenceIterator(file);
iter.setPreProcessor(new SentencePreProcessor() {
@Override
public String preProcess(String sentence) {
return sentence.toLowerCase();
}
});
}
示例7: testPerformance1
import org.datavec.api.util.ClassPathResource; //导入方法依赖的package包/类
@Test
public void testPerformance1() throws Exception {
ClassPathResource resource = new ClassPathResource("/big/raw_sentences.txt");
File file = resource.getFile();
BasicLineIterator iterator = new BasicLineIterator(file);
PrefetchingSentenceIterator fetcher = new PrefetchingSentenceIterator.Builder(new BasicLineIterator(file))
.setFetchSize(500000).build();
long time01 = System.currentTimeMillis();
int cnt0 = 0;
while (iterator.hasNext()) {
iterator.nextSentence();
cnt0++;
}
long time02 = System.currentTimeMillis();
long time11 = System.currentTimeMillis();
int cnt1 = 0;
while (fetcher.hasNext()) {
fetcher.nextSentence();
cnt1++;
}
long time12 = System.currentTimeMillis();
log.info("Basic iterator: " + (time02 - time01));
log.info("Prefetched iterator: " + (time12 - time11));
long difference = (time12 - time11) - (time02 - time01);
log.info("Difference: " + difference);
// on small corpus time difference can fluctuate a lot
// but it's still can be used as effectiveness measurement
assertTrue(difference < 150);
}
示例8: testUpdateCoords
import org.datavec.api.util.ClassPathResource; //导入方法依赖的package包/类
@Test
@Ignore
public void testUpdateCoords() throws Exception {
Nd4j.ENFORCE_NUMERICAL_STABILITY = true;
Nd4j.factory().setDType(DataBuffer.Type.DOUBLE);
Nd4j.getRandom().setSeed(123);
BarnesHutTsne b = new BarnesHutTsne.Builder().stopLyingIteration(250).theta(0.5).learningRate(500)
.useAdaGrad(false).numDimension(2).build();
ClassPathResource resource = new ClassPathResource("/mnist2500_X.txt");
File f = resource.getFile();
INDArray data = Nd4j.readNumpy(f.getAbsolutePath(), " ").get(NDArrayIndex.interval(0, 100),
NDArrayIndex.interval(0, 784));
ClassPathResource labels = new ClassPathResource("mnist2500_labels.txt");
List<String> labelsList = IOUtils.readLines(labels.getInputStream()).subList(0, 100);
b.fit(data);
b.saveAsFile(labelsList, "coords.csv");
// String coords = client.target("http://localhost:8080").path("api").path("update")
// .request().accept(MediaType.APPLICATION_JSON)
//// .post(Entity.entity(new UrlResource("http://localhost:8080/api/coords.csv"), MediaType.APPLICATION_JSON))
// .readEntity(String.class);
// ObjectMapper mapper = new ObjectMapper();
// List<String> testLines = mapper.readValue(coords,List.class);
// List<String> lines = IOUtils.readLines(new FileInputStream("coords.csv"));
// assertEquals(testLines,lines);
throw new RuntimeException("Not implemented");
}
示例9: main
import org.datavec.api.util.ClassPathResource; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
ClassPathResource srcFile = new ClassPathResource("/raw_sentences.txt");
File file = srcFile.getFile();
SentenceIterator iter = new BasicLineIterator(file);
TokenizerFactory tFact = new DefaultTokenizerFactory();
tFact.setTokenPreProcessor(new CommonPreprocessor());
LabelsSource labelFormat = new LabelsSource("LINE_");
ParagraphVectors vec = new ParagraphVectors.Builder()
.minWordFrequency(1)
.iterations(5)
.epochs(1)
.layerSize(100)
.learningRate(0.025)
.labelsSource(labelFormat)
.windowSize(5)
.iterate(iter)
.trainWordVectors(false)
.tokenizerFactory(tFact)
.sampling(0)
.build();
vec.fit();
double similar1 = vec.similarity("LINE_9835", "LINE_12492");
out.println("Comparing lines 9836 & 12493 ('This is my house .'/'This is my world .') Similarity = " + similar1);
double similar2 = vec.similarity("LINE_3720", "LINE_16392");
out.println("Comparing lines 3721 & 16393 ('This is my way .'/'This is my work .') Similarity = " + similar2);
double similar3 = vec.similarity("LINE_6347", "LINE_3720");
out.println("Comparing lines 6348 & 3721 ('This is my case .'/'This is my way .') Similarity = " + similar3);
double dissimilar1 = vec.similarity("LINE_3720", "LINE_9852");
out.println("Comparing lines 3721 & 9853 ('This is my way .'/'We now have one .') Similarity = " + dissimilar1);
double dissimilar2 = vec.similarity("LINE_3720", "LINE_3719");
out.println("Comparing lines 3721 & 3720 ('This is my way .'/'At first he says no .') Similarity = " + dissimilar2);
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-End-to-Endguide-for-Java-developers,代码行数:46,代码来源:ClassifyBySimilarity.java
示例10: testParagraphVectorsVocabBuilding1
import org.datavec.api.util.ClassPathResource; //导入方法依赖的package包/类
/**
* This test checks, how vocab is built using SentenceIterator provided, without labels.
*
* @throws Exception
*/
@Test
public void testParagraphVectorsVocabBuilding1() throws Exception {
ClassPathResource resource = new ClassPathResource("/big/raw_sentences.txt");
File file = resource.getFile();//.getParentFile();
SentenceIterator iter = new BasicLineIterator(file); //UimaSentenceIterator.createWithPath(file.getAbsolutePath());
int numberOfLines = 0;
while (iter.hasNext()) {
iter.nextSentence();
numberOfLines++;
}
iter.reset();
InMemoryLookupCache cache = new InMemoryLookupCache(false);
TokenizerFactory t = new DefaultTokenizerFactory();
t.setTokenPreProcessor(new CommonPreprocessor());
// LabelsSource source = new LabelsSource("DOC_");
ParagraphVectors vec = new ParagraphVectors.Builder().minWordFrequency(1).iterations(5).layerSize(100)
// .labelsGenerator(source)
.windowSize(5).iterate(iter).vocabCache(cache).tokenizerFactory(t).build();
vec.buildVocab();
LabelsSource source = vec.getLabelsSource();
//VocabCache cache = vec.getVocab();
log.info("Number of lines in corpus: " + numberOfLines);
assertEquals(numberOfLines, source.getLabels().size());
assertEquals(97162, source.getLabels().size());
assertNotEquals(null, cache);
assertEquals(97406, cache.numWords());
// proper number of words for minWordsFrequency = 1 is 244
assertEquals(244, cache.numWords() - source.getLabels().size());
}
示例11: testParagraphVectorsDM
import org.datavec.api.util.ClassPathResource; //导入方法依赖的package包/类
@Test
public void testParagraphVectorsDM() throws Exception {
ClassPathResource resource = new ClassPathResource("/big/raw_sentences.txt");
File file = resource.getFile();
SentenceIterator iter = new BasicLineIterator(file);
AbstractCache<VocabWord> cache = new AbstractCache.Builder<VocabWord>().build();
TokenizerFactory t = new DefaultTokenizerFactory();
t.setTokenPreProcessor(new CommonPreprocessor());
LabelsSource source = new LabelsSource("DOC_");
ParagraphVectors vec = new ParagraphVectors.Builder().minWordFrequency(1).iterations(2).seed(119).epochs(3)
.layerSize(100).learningRate(0.025).labelsSource(source).windowSize(5).iterate(iter)
.trainWordVectors(true).vocabCache(cache).tokenizerFactory(t).negativeSample(0)
.useHierarchicSoftmax(true).sampling(0).workers(1).usePreciseWeightInit(true)
.sequenceLearningAlgorithm(new DM<VocabWord>()).build();
vec.fit();
int cnt1 = cache.wordFrequency("day");
int cnt2 = cache.wordFrequency("me");
assertNotEquals(1, cnt1);
assertNotEquals(1, cnt2);
assertNotEquals(cnt1, cnt2);
double simDN = vec.similarity("day", "night");
log.info("day/night similariry: {}", simDN);
double similarity1 = vec.similarity("DOC_9835", "DOC_12492");
log.info("9835/12492 similarity: " + similarity1);
// assertTrue(similarity1 > 0.2d);
double similarity2 = vec.similarity("DOC_3720", "DOC_16392");
log.info("3720/16392 similarity: " + similarity2);
// assertTrue(similarity2 > 0.2d);
double similarity3 = vec.similarity("DOC_6347", "DOC_3720");
log.info("6347/3720 similarity: " + similarity3);
// assertTrue(similarity3 > 0.6d);
double similarityX = vec.similarity("DOC_3720", "DOC_9852");
log.info("3720/9852 similarity: " + similarityX);
assertTrue(similarityX < 0.5d);
// testing DM inference now
INDArray original = vec.getWordVectorMatrix("DOC_16392").dup();
INDArray inferredA1 = vec.inferVector("This is my work");
INDArray inferredB1 = vec.inferVector("This is my work .");
double cosAO1 = Transforms.cosineSim(inferredA1.dup(), original.dup());
double cosAB1 = Transforms.cosineSim(inferredA1.dup(), inferredB1.dup());
log.info("Cos O/A: {}", cosAO1);
log.info("Cos A/B: {}", cosAB1);
}
示例12: testParagraphVectorsDBOW
import org.datavec.api.util.ClassPathResource; //导入方法依赖的package包/类
@Test
public void testParagraphVectorsDBOW() throws Exception {
ClassPathResource resource = new ClassPathResource("/big/raw_sentences.txt");
File file = resource.getFile();
SentenceIterator iter = new BasicLineIterator(file);
AbstractCache<VocabWord> cache = new AbstractCache.Builder<VocabWord>().build();
TokenizerFactory t = new DefaultTokenizerFactory();
t.setTokenPreProcessor(new CommonPreprocessor());
LabelsSource source = new LabelsSource("DOC_");
ParagraphVectors vec = new ParagraphVectors.Builder().minWordFrequency(1).iterations(5).seed(119).epochs(1)
.layerSize(100).learningRate(0.025).labelsSource(source).windowSize(5).iterate(iter)
.trainWordVectors(true).vocabCache(cache).tokenizerFactory(t).negativeSample(0)
.allowParallelTokenization(true).useHierarchicSoftmax(true).sampling(0).workers(2)
.usePreciseWeightInit(true).sequenceLearningAlgorithm(new DBOW<VocabWord>()).build();
vec.fit();
int cnt1 = cache.wordFrequency("day");
int cnt2 = cache.wordFrequency("me");
assertNotEquals(1, cnt1);
assertNotEquals(1, cnt2);
assertNotEquals(cnt1, cnt2);
double simDN = vec.similarity("day", "night");
log.info("day/night similariry: {}", simDN);
double similarity1 = vec.similarity("DOC_9835", "DOC_12492");
log.info("9835/12492 similarity: " + similarity1);
// assertTrue(similarity1 > 0.2d);
double similarity2 = vec.similarity("DOC_3720", "DOC_16392");
log.info("3720/16392 similarity: " + similarity2);
// assertTrue(similarity2 > 0.2d);
double similarity3 = vec.similarity("DOC_6347", "DOC_3720");
log.info("6347/3720 similarity: " + similarity3);
// assertTrue(similarity3 > 0.6d);
double similarityX = vec.similarity("DOC_3720", "DOC_9852");
log.info("3720/9852 similarity: " + similarityX);
assertTrue(similarityX < 0.5d);
// testing DM inference now
INDArray original = vec.getWordVectorMatrix("DOC_16392").dup();
INDArray inferredA1 = vec.inferVector("This is my work");
INDArray inferredB1 = vec.inferVector("This is my work .");
INDArray inferredC1 = vec.inferVector("This is my day");
INDArray inferredD1 = vec.inferVector("This is my night");
log.info("A: {}", Arrays.toString(inferredA1.data().asFloat()));
log.info("C: {}", Arrays.toString(inferredC1.data().asFloat()));
assertNotEquals(inferredA1, inferredC1);
double cosAO1 = Transforms.cosineSim(inferredA1.dup(), original.dup());
double cosAB1 = Transforms.cosineSim(inferredA1.dup(), inferredB1.dup());
double cosAC1 = Transforms.cosineSim(inferredA1.dup(), inferredC1.dup());
double cosCD1 = Transforms.cosineSim(inferredD1.dup(), inferredC1.dup());
log.info("Cos O/A: {}", cosAO1);
log.info("Cos A/B: {}", cosAB1);
log.info("Cos A/C: {}", cosAC1);
log.info("Cos C/D: {}", cosCD1);
}
示例13: testParagraphVectorsWithWordVectorsModelling1
import org.datavec.api.util.ClassPathResource; //导入方法依赖的package包/类
@Test
public void testParagraphVectorsWithWordVectorsModelling1() throws Exception {
ClassPathResource resource = new ClassPathResource("/big/raw_sentences.txt");
File file = resource.getFile();
SentenceIterator iter = new BasicLineIterator(file);
// InMemoryLookupCache cache = new InMemoryLookupCache(false);
AbstractCache<VocabWord> cache = new AbstractCache.Builder<VocabWord>().build();
TokenizerFactory t = new DefaultTokenizerFactory();
t.setTokenPreProcessor(new CommonPreprocessor());
LabelsSource source = new LabelsSource("DOC_");
ParagraphVectors vec = new ParagraphVectors.Builder().minWordFrequency(1).iterations(3).epochs(1).layerSize(100)
.learningRate(0.025).labelsSource(source).windowSize(5).iterate(iter).trainWordVectors(true)
.vocabCache(cache).tokenizerFactory(t).sampling(0).build();
vec.fit();
int cnt1 = cache.wordFrequency("day");
int cnt2 = cache.wordFrequency("me");
assertNotEquals(1, cnt1);
assertNotEquals(1, cnt2);
assertNotEquals(cnt1, cnt2);
/*
We have few lines that contain pretty close words invloved.
These sentences should be pretty close to each other in vector space
*/
// line 3721: This is my way .
// line 6348: This is my case .
// line 9836: This is my house .
// line 12493: This is my world .
// line 16393: This is my work .
// this is special sentence, that has nothing common with previous sentences
// line 9853: We now have one .
assertTrue(vec.hasWord("DOC_3720"));
double similarityD = vec.similarity("day", "night");
log.info("day/night similarity: " + similarityD);
double similarityW = vec.similarity("way", "work");
log.info("way/work similarity: " + similarityW);
double similarityH = vec.similarity("house", "world");
log.info("house/world similarity: " + similarityH);
double similarityC = vec.similarity("case", "way");
log.info("case/way similarity: " + similarityC);
double similarity1 = vec.similarity("DOC_9835", "DOC_12492");
log.info("9835/12492 similarity: " + similarity1);
// assertTrue(similarity1 > 0.7d);
double similarity2 = vec.similarity("DOC_3720", "DOC_16392");
log.info("3720/16392 similarity: " + similarity2);
// assertTrue(similarity2 > 0.7d);
double similarity3 = vec.similarity("DOC_6347", "DOC_3720");
log.info("6347/3720 similarity: " + similarity3);
// assertTrue(similarity2 > 0.7d);
// likelihood in this case should be significantly lower
// however, since corpus is small, and weight initialization is random-based, sometimes this test CAN fail
double similarityX = vec.similarity("DOC_3720", "DOC_9852");
log.info("3720/9852 similarity: " + similarityX);
assertTrue(similarityX < 0.5d);
double sim119 = vec.similarityToLabel("This is my case .", "DOC_6347");
double sim120 = vec.similarityToLabel("This is my case .", "DOC_3720");
log.info("1/2: " + sim119 + "/" + sim120);
//assertEquals(similarity3, sim119, 0.001);
}
示例14: testGlove1
import org.datavec.api.util.ClassPathResource; //导入方法依赖的package包/类
@Ignore
@Test
public void testGlove1() throws Exception {
logger.info("Max available memory: " + Runtime.getRuntime().maxMemory());
ClassPathResource resource = new ClassPathResource("big/raw_sentences.txt");
File file = resource.getFile();
BasicLineIterator underlyingIterator = new BasicLineIterator(file);
TokenizerFactory t = new DefaultTokenizerFactory();
t.setTokenPreProcessor(new CommonPreprocessor());
SentenceTransformer transformer =
new SentenceTransformer.Builder().iterator(underlyingIterator).tokenizerFactory(t).build();
AbstractSequenceIterator<VocabWord> sequenceIterator =
new AbstractSequenceIterator.Builder<>(transformer).build();
VectorsConfiguration configuration = new VectorsConfiguration();
configuration.setWindow(5);
configuration.setLearningRate(0.06);
configuration.setLayersSize(100);
SequenceVectors<VocabWord> vectors = new SequenceVectors.Builder<VocabWord>(configuration)
.iterate(sequenceIterator).iterations(1).epochs(45)
.elementsLearningAlgorithm(new GloVe.Builder<VocabWord>().shuffle(true).symmetric(true)
.learningRate(0.05).alpha(0.75).xMax(100.0).build())
.resetModel(true).trainElementsRepresentation(true).trainSequencesRepresentation(false).build();
vectors.fit();
double sim = vectors.similarity("day", "night");
logger.info("Day/night similarity: " + sim);
sim = vectors.similarity("day", "another");
logger.info("Day/another similarity: " + sim);
sim = vectors.similarity("night", "year");
logger.info("Night/year similarity: " + sim);
sim = vectors.similarity("night", "me");
logger.info("Night/me similarity: " + sim);
sim = vectors.similarity("day", "know");
logger.info("Day/know similarity: " + sim);
sim = vectors.similarity("best", "police");
logger.info("Best/police similarity: " + sim);
Collection<String> labels = vectors.wordsNearest("day", 10);
logger.info("Nearest labels to 'day': " + labels);
sim = vectors.similarity("day", "night");
assertTrue(sim > 0.6d);
}
示例15: testDocumentIterator
import org.datavec.api.util.ClassPathResource; //导入方法依赖的package包/类
@Test
public void testDocumentIterator() throws Exception {
ClassPathResource reuters5250 = new ClassPathResource("/reuters/5250");
File f = reuters5250.getFile();
DocumentIterator iter = new FileDocumentIterator(f.getAbsolutePath());
InputStream doc = iter.nextDocument();
TokenizerFactory t = new DefaultTokenizerFactory();
Tokenizer next = t.create(doc);
String[] list = "PEARSON CONCENTRATES ON FOUR SECTORS".split(" ");
///PEARSON CONCENTRATES ON FOUR SECTORS
int count = 0;
while (next.hasMoreTokens() && count < list.length) {
String token = next.nextToken();
assertEquals(list[count++], token);
}
doc.close();
}