本文整理汇总了Java中org.deeplearning4j.text.sentenceiterator.SentenceIterator.hasNext方法的典型用法代码示例。如果您正苦于以下问题:Java SentenceIterator.hasNext方法的具体用法?Java SentenceIterator.hasNext怎么用?Java SentenceIterator.hasNext使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.deeplearning4j.text.sentenceiterator.SentenceIterator
的用法示例。
在下文中一共展示了SentenceIterator.hasNext方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getBagOfWordsWithCounts
import org.deeplearning4j.text.sentenceiterator.SentenceIterator; //导入方法依赖的package包/类
private static LinkedHashMap<String, Integer> getBagOfWordsWithCounts(Language language) {
HashMap<String, Integer> bagOfWords = new HashMap<>();
List<String> sentences = getSentencesFromLanguage(language);
SentenceIterator iter = new CollectionSentenceIterator(new Pan15SentencePreProcessor(), sentences);
while(iter.hasNext()) {
String sentence = iter.nextSentence();
for(String word : sentence.split("\\s+")) {
word = normalize(word);
if (Objects.equals(word, "") || (word.length() == 1 && word.matches("\\p{Punct}"))) continue;
bagOfWords.put(word, bagOfWords.getOrDefault(word, 0) + 1);
}
}
LinkedHashMap<String, Integer> sorted = new LinkedHashMap<>();
final int[] count = {0};
bagOfWords.entrySet().stream()
.sorted(Map.Entry.comparingByValue(Collections.reverseOrder())).forEach(
entry -> {
if (count[0] < VEC_LENGTH) sorted.put(entry.getKey(), entry.getValue());
count[0]++;
}
);
return sorted;
}
示例2: testVocab
import org.deeplearning4j.text.sentenceiterator.SentenceIterator; //导入方法依赖的package包/类
@Test
public void testVocab() throws Exception {
File inputFile = new ClassPathResource("big/raw_sentences.txt").getFile();
SentenceIterator iter = new BasicLineIterator(inputFile);
Set<String> set = new HashSet<>();
int lines = 0;
int cnt = 0;
while (iter.hasNext()) {
Tokenizer tok = t.create(iter.nextSentence());
for (String token : tok.getTokens()) {
if (token == null || token.isEmpty() || token.trim().isEmpty())
continue;
cnt++;
if (!set.contains(token))
set.add(token);
}
lines++;
}
log.info("Total number of tokens: [" + cnt + "], lines: [" + lines + "], set size: [" + set.size() + "]");
log.info("Set:\n" + set);
}
示例3: testParagraphVectorsVocabBuilding1
import org.deeplearning4j.text.sentenceiterator.SentenceIterator; //导入方法依赖的package包/类
/**
* This test checks, how vocab is built using SentenceIterator provided, without labels.
*
* @throws Exception
*/
@Test
public void testParagraphVectorsVocabBuilding1() throws Exception {
ClassPathResource resource = new ClassPathResource("/big/raw_sentences.txt");
File file = resource.getFile();//.getParentFile();
SentenceIterator iter = new BasicLineIterator(file); //UimaSentenceIterator.createWithPath(file.getAbsolutePath());
int numberOfLines = 0;
while (iter.hasNext()) {
iter.nextSentence();
numberOfLines++;
}
iter.reset();
InMemoryLookupCache cache = new InMemoryLookupCache(false);
TokenizerFactory t = new DefaultTokenizerFactory();
t.setTokenPreProcessor(new CommonPreprocessor());
// LabelsSource source = new LabelsSource("DOC_");
ParagraphVectors vec = new ParagraphVectors.Builder().minWordFrequency(1).iterations(5).layerSize(100)
// .labelsGenerator(source)
.windowSize(5).iterate(iter).vocabCache(cache).tokenizerFactory(t).build();
vec.buildVocab();
LabelsSource source = vec.getLabelsSource();
//VocabCache cache = vec.getVocab();
log.info("Number of lines in corpus: " + numberOfLines);
assertEquals(numberOfLines, source.getLabels().size());
assertEquals(97162, source.getLabels().size());
assertNotEquals(null, cache);
assertEquals(97406, cache.numWords());
// proper number of words for minWordsFrequency = 1 is 244
assertEquals(244, cache.numWords() - source.getLabels().size());
}
示例4: getLASI
import org.deeplearning4j.text.sentenceiterator.SentenceIterator; //导入方法依赖的package包/类
protected LabelAwareSentenceIterator getLASI(final SentenceIterator iterator, final List<String> labels) {
iterator.reset();
return new LabelAwareSentenceIterator() {
private AtomicInteger cnt = new AtomicInteger(0);
@Override
public String currentLabel() {
return labels.get(cnt.incrementAndGet() % labels.size());
}
@Override
public List<String> currentLabels() {
return Collections.singletonList(currentLabel());
}
@Override
public String nextSentence() {
return iterator.nextSentence();
}
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public void reset() {
iterator.reset();
}
@Override
public void finish() {
iterator.finish();
}
@Override
public SentencePreProcessor getPreProcessor() {
return iterator.getPreProcessor();
}
@Override
public void setPreProcessor(SentencePreProcessor preProcessor) {
iterator.setPreProcessor(preProcessor);
}
};
}