当前位置: 首页>>代码示例>>Java>>正文


Java Sentence类代码示例

本文整理汇总了Java中org.cleartk.token.type.Sentence的典型用法代码示例。如果您正苦于以下问题:Java Sentence类的具体用法?Java Sentence怎么用?Java Sentence使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Sentence类属于org.cleartk.token.type包,在下文中一共展示了Sentence类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: mpAnalyzerTest

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Test
public void mpAnalyzerTest() throws Exception {
  this.jCas.reset();
  tokenBuilder = new TokenBuilder<Token, Sentence>(Token.class, Sentence.class, "pos", "stem");

  this.tokenBuilder.buildTokens(jCas, 
      "jump jumping jumped jumper happy happier happiest", 
      "jump jumping jumped jumper happy happier happiest", 
      "VBP VBG VBP NN JJ JJ JJ"
      );

  mpAnalyzer.process(jCas);

  List<String> expected = Arrays.asList("jump jump jumped jumper happy happier happiest".split(" "));
  List<String> actual = new ArrayList<String>();
  for (Token token : JCasUtil.select(this.jCas, Token.class)) {
    actual.add(token.getLemma());
  }
  Assert.assertEquals(expected, actual);
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:21,代码来源:MpAnalyzerTest.java

示例2: testPeriod

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Test
public void testPeriod() throws Exception {
	String text = "The sides was so steep and the bushes so thick. We tramped and clumb. ";
	jCas.setDocumentText(text);
	new Sentence(jCas, 0, 47).addToIndexes();
	new Sentence(jCas, 48, 70).addToIndexes();
	SimplePipeline.runPipeline(jCas, tokenizer);
	int i = 0;
	assertEquals("The", getToken(i++).getCoveredText());
	assertEquals("sides", getToken(i++).getCoveredText());
	assertEquals("was", getToken(i++).getCoveredText());
	assertEquals("so", getToken(i++).getCoveredText());
	assertEquals("steep", getToken(i++).getCoveredText());
	assertEquals("and", getToken(i++).getCoveredText());
	assertEquals("the", getToken(i++).getCoveredText());
	assertEquals("bushes", getToken(i++).getCoveredText());
	assertEquals("so", getToken(i++).getCoveredText());
	assertEquals("thick", getToken(i++).getCoveredText());
	assertEquals(".", getToken(i++).getCoveredText());
	assertEquals("We", getToken(i++).getCoveredText());
	assertEquals("tramped", getToken(i++).getCoveredText());
	assertEquals("and", getToken(i++).getCoveredText());
	assertEquals("clumb", getToken(i++).getCoveredText());
	assertEquals(".", getToken(i++).getCoveredText());

}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:27,代码来源:TokenizerTest.java

示例3: createSentences

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
/**
 * Creates sentences, one for each non-whitespace line, and sets the CAS text.
 */
private void createSentences(String... lines) {
  this.jCas.setDocumentText(Joiner.on("\n").join(lines));
  int offset = 0;
  for (String line : lines) {
    int length = line.length();
    int start = 0;
    while (start < length && Character.isWhitespace(line.charAt(start))) {
      ++start;
    }
    int end = length;
    while (end > 0 && Character.isWhitespace(line.charAt(end - 1))) {
      --end;
    }
    if (start != length && end != 0) {
      Sentence sentence = new Sentence(this.jCas, offset + start, offset + end);
      sentence.addToIndexes();
    }
    offset += length + 1;
  }
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:24,代码来源:TokenizerTest.java

示例4: posTaggerTest

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Test
public void posTaggerTest() throws Exception {
   this.assumeBigMemoryTestsEnabled();
   this.logger.info(BIG_MEMORY_TEST_MESSAGE);
   
  initDefaultModel();
	this.jCas.reset();
	tokenBuilder = new TokenBuilder<Token, Sentence>(Token.class, Sentence.class, "pos", "stem");

	this.tokenBuilder.buildTokens(
			this.jCas,
			"The brown fox jumped quickly over the lazy dog .",
			"The brown fox jumped quickly over the lazy dog .");
	SimplePipeline.runPipeline(jCas, posTagger);
	
   List<String> expectedPos = Arrays.asList("DT JJ NN VBD RB IN DT JJ NN .".split(" "));
	List<String> actualPos = new ArrayList<String>();
   //List<String> expectedLemma = Arrays.asList("the brown fox jump quickly over the lazy dog .".split(" "));
	//List<String> actualLemma = new ArrayList<String>();
	for (Token token : JCasUtil.select(this.jCas, Token.class)) {
		actualPos.add(token.getPos());
		//actualLemma.add(token.getLemma());
	}
	Assert.assertEquals(expectedPos, actualPos);
	//Assert.assertEquals(expectedLemma, actualLemma);
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:27,代码来源:PosTaggerTest.java

示例5: createSentences

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
/**
 * Creates sentences, one for each non-whitespace line, and sets the CAS text.
 */
private void createSentences(String ... lines) {
  this.jCas.setDocumentText(Joiner.on("\n").join(lines));
  int offset = 0;
  for (String line : lines) {
    int length = line.length();
    int start = 0;
    while (start < length && Character.isWhitespace(line.charAt(start))) {
      ++start;
    }
    int end = length;
    while (end > 0 && Character.isWhitespace(line.charAt(end - 1))) {
      --end;
    }
    if (start != length && end != 0) {
      Sentence sentence = new Sentence(this.jCas, offset + start, offset + end);
      sentence.addToIndexes();
    }
    offset += length + 1;
  }
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:24,代码来源:TokenizerAndTokenAnnotatorTest.java

示例6: getDescription

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
public static AnalysisEngineDescription getDescription(String languageCode)
    throws ResourceInitializationException {
  String modelPath = String.format("/models/%s-pos-maxent.bin", languageCode);
  return AnalysisEngineFactory.createEngineDescription(
      opennlp.uima.postag.POSTagger.class,
      opennlp.uima.util.UimaUtil.MODEL_PARAMETER,
      ExternalResourceFactory.createExternalResourceDescription(
          POSModelResourceImpl.class,
          PosTagger.class.getResource(modelPath).toString()),
      opennlp.uima.util.UimaUtil.SENTENCE_TYPE_PARAMETER,
      Sentence.class.getName(),
      opennlp.uima.util.UimaUtil.TOKEN_TYPE_PARAMETER,
      Token.class.getName(),
      opennlp.uima.util.UimaUtil.POS_FEATURE_PARAMETER,
      "pos");
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:PosTagger.java

示例7: test1

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Test
public void test1() throws Exception {
  this.sentenceSegmentFile("src/test/resources/data/sentence/test1.txt");

  Sentence sentence = JCasUtil.selectByIndex(jCas, Sentence.class, 0);
  assertEquals("aaaa aaaa aaaa aaaa", sentence.getCoveredText());

  sentence = JCasUtil.selectByIndex(jCas, Sentence.class, 1);
  assertEquals("bbbb", sentence.getCoveredText());

  sentence = JCasUtil.selectByIndex(jCas, Sentence.class, 2);
  assertEquals("ccc cccc ccc cccc", sentence.getCoveredText());

  sentence = JCasUtil.selectByIndex(jCas, Sentence.class, 3);
  assertEquals("dddddd ddd.", sentence.getCoveredText());
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:SentenceAnnotatorTest.java

示例8: testWindowClassNames

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Test
public void testWindowClassNames() throws Exception {
  String text = "I bought a lamp. I love lamp. Lamps are great!";
  this.jCas.setDocumentText(text);
  Sentence window = new Sentence(this.jCas, 0, 30);
  window.addToIndexes();

  AnalysisEngineDescription desc = SentenceAnnotator.getDescription();
  ConfigurationParameterFactory.addConfigurationParameter(
      desc,
      SentenceAnnotator.PARAM_WINDOW_CLASS_NAMES,
      new String[] { "org.cleartk.token.type.Sentence" });
  AnalysisEngine engine = AnalysisEngineFactory.createEngine(desc);
  engine.process(this.jCas);
  engine.collectionProcessComplete();

  Collection<Sentence> sentences = JCasUtil.select(this.jCas, Sentence.class);
  Iterator<Sentence> sentenceIter = sentences.iterator();
  assertEquals(3, sentences.size());
  assertEquals(window, sentenceIter.next());
  assertEquals("I bought a lamp.", sentenceIter.next().getCoveredText());
  assertEquals("I love lamp.", sentenceIter.next().getCoveredText());
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:24,代码来源:SentenceAnnotatorTest.java

示例9: convert

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
/**
 * Convert to ClearTK <em>Predicate</em> / <em>SemanticArgument</em> annotations and add them to
 * <b>view</b>.
 * 
 * @param view
 *          the view where the annotations should be added
 * @param topNode
 *          the top node annotation of the corresponding Treebank parse
 * @param sentence
 *          the sentence annotation of the corresponding sentence
 * @return the generated <em>Predicate</em> annotation
 */
public Predicate convert(JCas view, TopTreebankNode topNode, Sentence sentence) {
  Predicate p = new Predicate(view);
  p.setPropTxt(this.propTxt);
  p.setAnnotation(this.terminal.convert(view, topNode));
  p.setBegin(p.getAnnotation().getBegin());
  p.setEnd(p.getAnnotation().getEnd());
  p.setSentence(sentence);
  p.setFrameSet(this.frameSet);
  p.setBaseForm(this.baseForm);

  List<Argument> aList = new ArrayList<Argument>();
  for (Proplabel proplabel : this.proplabels) {
    aList.add(proplabel.convert(view, topNode));
  }
  p.setArguments(new FSArray(view, aList.size()));
  FSCollectionFactory.fillArrayFS(p.getArguments(), aList);
  p.addToIndexes();

  return p;
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:33,代码来源:Propbank.java

示例10: process

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
  String id = new File(ViewUriUtil.getURI(jCas)).getName();
  PrintWriter outputWriter;
  try {
    outputWriter = new PrintWriter(new File(this.outputDir, id + ".pos"));
  } catch (FileNotFoundException e) {
    throw new AnalysisEngineProcessException(e);
  }
  for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
    for (Token token : JCasUtil.selectCovered(jCas, Token.class, sentence)) {
      outputWriter.print(token.getCoveredText());
      outputWriter.print('/');
      outputWriter.print(token.getPos());
      outputWriter.print(' ');
    }
    outputWriter.println();
  }
  outputWriter.close();
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:21,代码来源:ExamplePosPlainTextWriter.java

示例11: main

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
  Options options = CliFactory.parseArguments(Options.class, args);

  CollectionReader reader = UriCollectionReader.getCollectionReaderFromDirectory(options.getInputDirectory());

  AggregateBuilder builder = new AggregateBuilder();
  builder.add(UriToDocumentTextAnnotator.getDescription());
  builder.add(SentenceAnnotator.getDescription());
  builder.add(AnalysisEngineFactory.createEngineDescription(
      LineWriter.class,
      LineWriter.PARAM_OUTPUT_FILE_NAME,
      options.getOutputFile(),
      LineWriter.PARAM_OUTPUT_ANNOTATION_CLASS_NAME,
      Sentence.class.getName()));

  SimplePipeline.runPipeline(reader, builder.createAggregateDescription());
  System.out.println("results written to " + options.getOutputFile());

}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:20,代码来源:Docs2Sentences.java

示例12: initZmusExtractor

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
private ZeroMeanUnitStddevExtractor<String, DocumentAnnotation> initZmusExtractor()
    throws IOException {
  CombinedExtractor1<DocumentAnnotation> featuresToNormalizeExtractor = new CombinedExtractor1<DocumentAnnotation>(
      new CountAnnotationExtractor<DocumentAnnotation>(Sentence.class),
      new CountAnnotationExtractor<DocumentAnnotation>(Token.class));

  ZeroMeanUnitStddevExtractor<String, DocumentAnnotation> zmusExtractor = new ZeroMeanUnitStddevExtractor<String, DocumentAnnotation>(
      ZMUS_EXTRACTOR_KEY,
      featuresToNormalizeExtractor);

  if (this.zmusUri != null) {
    zmusExtractor.load(this.zmusUri);
  }

  return zmusExtractor;
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:DocumentClassificationAnnotator.java

示例13: initMinMaxExtractor

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
private MinMaxNormalizationExtractor<String, DocumentAnnotation> initMinMaxExtractor()
    throws IOException {
  CombinedExtractor1<DocumentAnnotation> featuresToNormalizeExtractor = new CombinedExtractor1<DocumentAnnotation>(
      new CountAnnotationExtractor<DocumentAnnotation>(Sentence.class),
      new CountAnnotationExtractor<DocumentAnnotation>(Token.class));

  MinMaxNormalizationExtractor<String, DocumentAnnotation> minmaxExtractor = new MinMaxNormalizationExtractor<String, DocumentAnnotation>(
      MINMAX_EXTRACTOR_KEY,
      featuresToNormalizeExtractor);

  if (this.minmaxUri != null) {
    minmaxExtractor.load(this.minmaxUri);
  }

  return minmaxExtractor;
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:DocumentClassificationAnnotator.java

示例14: getSubordinateEvents

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
private List<Event> getSubordinateEvents(JCas jCas, Event source, Sentence sentence) {
  List<Event> targets = new ArrayList<Event>();
  TreebankNode sourceNode = TreebankNodeUtil.selectMatchingLeaf(jCas, source);
  for (Event target : JCasUtil.selectCovered(jCas, Event.class, sentence)) {
    if (!target.equals(source)) {
      TreebankNode targetNode = TreebankNodeUtil.selectMatchingLeaf(jCas, target);
      if (sourceNode != null && targetNode != null) {
        String path = noLeavesPath(TreebankNodeUtil.getPath(sourceNode, targetNode));
        if (SUBORDINATE_PATH_PATTERN.matcher(path).matches()) {
          targets.add(target);
        }
      }
    }
  }
  return targets;
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:TemporalLinkEventToSubordinatedEventAnnotator.java

示例15: getSourceTargetPairs

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Override
protected List<SourceTargetPair> getSourceTargetPairs(JCas jCas) {
  List<SourceTargetPair> pairs = Lists.newArrayList();
  Iterator<Sentence> sentences = JCasUtil.select(jCas, Sentence.class).iterator();
  Sentence prev = sentences.hasNext() ? sentences.next() : null;
  while (sentences.hasNext()) {
    Sentence curr = sentences.next();
    Event source = getMainEvent(jCas, prev);
    Event target = getMainEvent(jCas, curr);
    if (source != null && target != null) {
      pairs.add(new SourceTargetPair(source, target));
    }
    prev = curr;
  }
  return pairs;
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:TemporalLinkMainEventToNextSentenceMainEventAnnotator.java


注:本文中的org.cleartk.token.type.Sentence类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。