本文整理汇总了Java中de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence.addToIndexes方法的典型用法代码示例。如果您正苦于以下问题:Java Sentence.addToIndexes方法的具体用法?Java Sentence.addToIndexes怎么用?Java Sentence.addToIndexes使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence
的用法示例。
在下文中一共展示了Sentence.addToIndexes方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getNext
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
@Override
public void getNext(JCas aJcas) throws IOException, CollectionException {
File f = documents.get(i);
LineIterator it = FileUtils.lineIterator(f);
int start =0;
int inds=0;
StringBuffer sb = new StringBuffer();
while(it.hasNext()){
String line = it.nextLine();
Sentence sent = new Sentence(aJcas, start, start+line.length());
sent.addToIndexes();
start = start + line.length() + 1;
sb.append(line+"\n");
if (inds%10000==0)
System.out.println("R"+inds);
}
aJcas.setDocumentText(sb.toString());
//had to add english as default language, one could also add another configuration parameter
aJcas.setDocumentLanguage("en");
i++;
}
示例2: testSelectImplicitComponent
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
@Test
public void testSelectImplicitComponent()
throws Exception
{
Sentence s1 = new Sentence(jCas);
s1.setBegin(this.tokenThis.getBegin());
s1.setEnd(this.tokenIs.getEnd());
s1.addToIndexes();
Sentence s = new ArrayList<Sentence>(JCasUtil.select(jCas, Sentence.class)).get(0);
// it ignore implicit (zero-length) component -- here at [0, 0], sentence starts at 0
ArgumentComponent implicitComponent = new ArgumentComponent(jCas, 0, 0);
implicitComponent.addToIndexes();
assertEquals(0, JCasUtil2.selectOverlapping(ArgumentComponent.class, s, jCas).size());
}
示例3: convertSentences
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
private void convertSentences(JCas aJCas, TextCorpus aCorpusData,
Map<String, Token> aTokens)
{
if (aCorpusData.getSentencesLayer() == null) {
// No layer to read from.
return;
}
for (int i = 0; i < aCorpusData.getSentencesLayer().size(); i++) {
eu.clarin.weblicht.wlfxb.tc.api.Token[] sentencesTokens = aCorpusData
.getSentencesLayer().getTokens(aCorpusData.getSentencesLayer().getSentence(i));
Sentence outSentence = new Sentence(aJCas);
outSentence.setBegin(aTokens.get(sentencesTokens[0].getID()).getBegin());
outSentence.setEnd(aTokens.get(sentencesTokens[sentencesTokens.length - 1].getID())
.getEnd());
outSentence.addToIndexes();
}
}
示例4: createSentence
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
private void createSentence(JCas aJCas, String aLine, int aBegin, int aEnd, int aPrevEnd)
{
// If the next sentence immediately follows the last one without any space or line break
// in between, then we need to chop off again the linebreak that we added at the end of the
// last sentence - otherwise offsets will be off on a round-trip.
if (aPrevEnd == aBegin && coveredText.length() > 0
&& (coveredText.charAt(coveredText.length() - 1) == '\n')) {
coveredText.deleteCharAt(coveredText.length() - 1);
}
if (aPrevEnd + 1 < aBegin) {
// FIXME This is very slow. Better use StringUtils.repeat()
StringBuilder pad = new StringBuilder(); // if there is plenty of spaces between
// sentences
for (int i = aPrevEnd + 1; i < aBegin; i++) {
pad.append(" ");
}
coveredText.append(pad).append(aLine).append(LF);
}
else {
coveredText.append(aLine).append(LF);
}
Sentence sentence = new Sentence(aJCas, aBegin, aEnd);
sentence.addToIndexes();
}
示例5: splitSentences
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
public static void splitSentences(JCas aJCas)
{
BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US);
bi.setText(aJCas.getDocumentText());
int last = bi.first();
int cur = bi.next();
while (cur != BreakIterator.DONE) {
int[] span = new int[] { last, cur };
trim(aJCas.getDocumentText(), span);
if (!isEmpty(span[0], span[1])) {
Sentence seg = new Sentence(aJCas, span[0], span[1]);
seg.addToIndexes(aJCas);
}
last = cur;
cur = bi.next();
}
}
示例6: terminateSentence
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
private void terminateSentence(Sentence sentence, Token token, StringBuffer docText)
{
sentence.setEnd(token.getEnd());
sentence.addToIndexes();
logger.log(Level.FINE,
"Sentence:[" + docText.substring(sentence.getBegin(), sentence.getEnd()) + "]\t"
+ sentence.getBegin() + "\t" + sentence.getEnd());
}
示例7: setUp
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
@Before
public void setUp()
throws Exception
{
jCas = JCasFactory.createJCas();
jCas.setDocumentText("s0t0 s0t2 s1t0 s2t0 s2t0 s3t0");
jCas.setDocumentLanguage("en");
Sentence s0 = new Sentence(jCas, 0, 9);
s0.addToIndexes();
Sentence s1 = new Sentence(jCas, 10, 14);
s1.addToIndexes();
Sentence s2 = new Sentence(jCas, 15, 24);
s2.addToIndexes();
Sentence s3 = new Sentence(jCas, 25, 29);
s3.addToIndexes();
Premise p1 = new Premise(jCas, 0, 14);
p1.addToIndexes();
Premise p2 = new Premise(jCas, 25, 29);
p2.addToIndexes();
System.out.println("'" + s0.getCoveredText() + "'");
System.out.println("'" + s1.getCoveredText() + "'");
System.out.println("'" + s2.getCoveredText() + "'");
System.out.println("'" + s3.getCoveredText() + "'");
System.out.println("p1: '" + p1.getCoveredText() + "'");
System.out.println("p2: '" + p2.getCoveredText() + "'");
}
示例8: posTag
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
public List<String> posTag(List<String> sentence)
throws UIMAException
{
JCas jCas = JCasFactory.createJCas();
StringBuilder documentText = new StringBuilder();
int start = 0;
for (int i = 0; i < sentence.size(); i++) {
String token = sentence.get(i);
documentText.append(token);
Token t = new Token(jCas, start, documentText.length());
t.addToIndexes();
if (i + 1 < sentence.size()) {
documentText.append(" ");
}
start = documentText.length();
}
jCas.setDocumentText(documentText.toString());
Sentence s = new Sentence(jCas, 0, jCas.getDocumentText().length());
s.addToIndexes();
callFlexTag(jCas);
List<String> posTags = new ArrayList<String>();
JCasUtil.select(jCas, POS.class).forEach(x -> posTags.add(x.getPosValue()));
return posTags;
}
示例9: tagSentences
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
public List<List<String>> tagSentences(List<List<String>> sentences)
throws Exception
{
JCas jCas = JCasFactory.createJCas();
StringBuilder sb = new StringBuilder();
for (List<String> sentence : sentences) {
int sentStart = sb.length();
for (String token : sentence) {
int start = sb.length();
int end = sb.length() + token.length();
Token t = new Token(jCas, start, end);
t.addToIndexes();
sb.append(token);
}
int sentEnd = sb.length();
Sentence s = new Sentence(jCas, sentStart, sentEnd);
s.addToIndexes();
}
jCas.setDocumentText(sb.toString().trim());
flexTagEngine.process(jCas);
return extractTags(jCas);
}
示例10: createSentence
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
/**
* Add sentence layer to CAS
*/
private void createSentence(JCas aJCas, List<Integer> firstTokenInSentence,
Map<String, Token> tokensStored)
{
for (int i = 0; i < firstTokenInSentence.size(); i++) {
Sentence outSentence = new Sentence(aJCas);
// Only last sentence, and no the only sentence in the document (i!=0)
if (i == firstTokenInSentence.size() - 1 && i != 0) {
outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i)).getEnd());
outSentence.setEnd(tokensStored.get("t_" + (tokensStored.size())).getEnd());
outSentence.addToIndexes();
break;
}
if (i == firstTokenInSentence.size() - 1 && i == 0) {
outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i))
.getBegin());
outSentence.setEnd(tokensStored.get("t_" + (tokensStored.size())).getEnd());
outSentence.addToIndexes();
}
else if (i == 0) {
outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i))
.getBegin());
outSentence.setEnd(tokensStored.get("t_" + firstTokenInSentence.get(i + 1))
.getEnd());
outSentence.addToIndexes();
}
else {
outSentence.setBegin(
tokensStored.get("t_" + firstTokenInSentence.get(i)).getEnd() + 1);
outSentence
.setEnd(tokensStored.get("t_" + firstTokenInSentence.get(i + 1)).getEnd());
outSentence.addToIndexes();
}
}
}
示例11: createSentence
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
protected Sentence createSentence(final JCas aJCas, final int aBegin,
final int aEnd)
{
int[] span = new int[] { aBegin, aEnd };
trim(aJCas.getDocumentText(), span);
if (!isEmpty(span[0], span[1])) {
Sentence seg = new Sentence(aJCas, span[0], span[1]);
seg.addToIndexes(aJCas);
return seg;
}
else {
return null;
}
}
示例12: convert
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
private void convert(JCas aJCas, CrowdsourceTextSequence crowdsourceTextSequence)
throws IOException
{
JCasBuilder doc = new JCasBuilder(aJCas);
int sentenceBegin = doc.getPosition();
int sentenceEnd = sentenceBegin;
List<Token> tokens = new ArrayList<Token>();
// Tokens, NER-IOB
for (CrowdsourceSequenceUnit sequenceUnit: crowdsourceTextSequence.getCrowdsourceSequenceUnits()) {
String word = sequenceUnit.getText();
// Read token
Token token = doc.add(word, Token.class);
sentenceEnd = token.getEnd();
doc.add(" ");
TextClassificationUnit unit = new TextClassificationUnit(aJCas, token.getBegin(), token.getEnd());
unit.setSuffix("unit" + sequenceUnit.getId() + "_" + sequenceUnit.getText());
unit.addToIndexes();
TextClassificationOutcome outcome = new TextClassificationOutcome(aJCas, token.getBegin(), token.getEnd());
outcome.setOutcome(sequenceUnit.useThisLabel);
outcome.addToIndexes();
tokens.add(token);
}
// Sentence
Sentence sentence = new Sentence(aJCas, sentenceBegin, sentenceEnd);
sentence.addToIndexes();
TextClassificationSequence sequence = new TextClassificationSequence(aJCas, sentenceBegin, sentenceEnd);
sequence.addToIndexes();
// Once sentence per line.
doc.add("\n");
doc.close();
}
示例13: getNext
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
public void getNext(JCas aJCas)
throws IOException, CollectionException
{
DocumentMetaData md = new DocumentMetaData(aJCas);
md.setDocumentTitle("");
md.setDocumentId("" + (instanceId++));
md.setLanguage(language);
md.addToIndexes();
try {
posMappingProvider.configure(aJCas.getCas());
}
catch (AnalysisEngineProcessException e1) {
throw new CollectionException(e1);
}
StringBuilder documentText = new StringBuilder();
int seqStart = 0;
for (int k = 0; k < sequences.size(); k++) {
List<String> sequence = sequences.get(k);
for (int i = 0; i < sequence.size(); i++) {
String pairs = sequence.get(i).replaceAll(" +", " ");
int idxLastSpace = pairs.lastIndexOf(" ");
String token = pairs.substring(0, idxLastSpace);
String tag = pairs.substring(idxLastSpace+1);
int tokenLen = token.length();
if(lowerCase){
token = token.toLowerCase();
}
documentText.append(token);
int tokStart = documentText.length() - tokenLen;
int tokEnd = documentText.length();
Token t = new Token(aJCas, tokStart, tokEnd);
t.addToIndexes();
if (i + 1 < sequence.size()) {
documentText.append(" ");
}
Type posTag = posMappingProvider.getTagType(tag);
POS pos = (POS) aJCas.getCas().createAnnotation(posTag, t.getBegin(),
t.getEnd());
pos.setPosValue(tag);
pos.addToIndexes();
t.setPos(pos);
}
Sentence sentence = new Sentence(aJCas, seqStart, documentText.length());
sentence.addToIndexes();
if (k + 1 < sequences.size()) {
documentText.append(" ");
}
seqStart = documentText.length();
}
aJCas.setDocumentText(documentText.toString());
}
示例14: testSelectOverlapping
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
@Test
public void testSelectOverlapping()
throws Exception
{
{
Sentence s1 = new Sentence(jCas);
s1.setBegin(this.tokenThis.getBegin());
s1.setEnd(this.tokenIs.getEnd());
s1.addToIndexes();
Sentence s2 = new Sentence(jCas);
s2.setBegin(this.tokenA.getBegin());
s2.setEnd(this.tokenDot.getEnd());
s2.addToIndexes();
}
List<Sentence> sentences = new ArrayList<Sentence>(
JCasUtil.select(jCas, Sentence.class));
assertEquals(2, sentences.size());
// annotation that covers "is" and "a" (each from different sentence)
ArgumentComponent argumentComponent = new ArgumentComponent(jCas);
argumentComponent.setBegin(tokenIs.getBegin());
argumentComponent.setEnd(this.tokenA.getEnd());
argumentComponent.addToIndexes();
Collection<ArgumentComponent> argumentComponents = JCasUtil
.select(jCas, ArgumentComponent.class);
assertEquals(1, argumentComponents.size());
ArgumentComponent component = argumentComponents.iterator().next();
List<Token> coveredTokens = JCasUtil.selectCovered(Token.class, component);
assertEquals(2, coveredTokens.size());
assertEquals(this.tokenIs.getBegin(), coveredTokens.get(0).getBegin());
assertEquals(this.tokenA.getBegin(), coveredTokens.get(1).getBegin());
Sentence sent1 = sentences.get(0);
Sentence sent2 = sentences.get(0);
// !!! selectCovered won't find the ArgumentComponent annotation, as it crosses
// the boundaries!!
assertEquals(0, JCasUtil.selectCovered(ArgumentComponent.class, sent1).size());
assertEquals(0, JCasUtil.selectCovering(ArgumentComponent.class, sent1).size());
assertEquals(0, JCasUtil.selectCovered(ArgumentComponent.class, sent2).size());
assertEquals(0, JCasUtil.selectCovering(ArgumentComponent.class, sent2).size());
// now we select overlapping -- we get the same component from both sentences
assertEquals(1, JCasUtil2.selectOverlapping(ArgumentComponent.class, sent1, jCas).size());
assertEquals(1, JCasUtil2.selectOverlapping(ArgumentComponent.class, sent2, jCas).size());
// and this is indeed the "component" that overlaps both sentences
assertEquals(component,
JCasUtil2.selectOverlapping(ArgumentComponent.class, sent1, jCas).iterator()
.next());
assertEquals(component,
JCasUtil2.selectOverlapping(ArgumentComponent.class, sent2, jCas).iterator()
.next());
}
示例15: terminateSentence
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
/**
* Creates {@link Sentence} Annotation, sets the sentence end.
* @param sentence the current {@link Sentence} annotation
* @param token the last {@link Token} in the sentence
*/
private void terminateSentence(Sentence sentence, Token token) {
sentence.setEnd(token.getEnd());
sentence.addToIndexes();
}