本文整理匯總了Java中opennlp.tools.util.Span.getStart方法的典型用法代碼示例。如果您正苦於以下問題:Java Span.getStart方法的具體用法?Java Span.getStart怎麽用?Java Span.getStart使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類opennlp.tools.util.Span
的用法示例。
在下文中一共展示了Span.getStart方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: incrementToken
import opennlp.tools.util.Span; //導入方法依賴的package包/類
@Override
public final boolean incrementToken() throws IOException {
if (sentences == null) {
fillSentences();
}
if (tokenOffset >= sentences.length) {
return false;
}
Span sentenceSpan = sentences[tokenOffset];
clearAttributes();
int start = sentenceSpan.getStart();
int end = sentenceSpan.getEnd();
termAtt.copyBuffer(inputSentence, start, end - start);
posIncrAtt.setPositionIncrement(1);
offsetAtt.setOffset(start, end);
tokenOffset++;
return true;
}
示例2: addSpan
import opennlp.tools.util.Span; //導入方法依賴的package包/類
public void addSpan(Span span, CSList<Character> labels) throws Exception {
if (span.getStart() == span.getEnd())
{
LabelledPosition lpos = this.get(span.getStart());
if (lpos == null)
{
lpos = new LabelledPosition();
this.put(span.getStart(),lpos);
lpos.IsSingleSpan = true;
}
else
lpos.IsSingleSpan = false;
lpos.IsStart = true;
lpos.IsEnd = true;
LabelledPosition.addNewLabels(lpos.StartLabels,labels);
LabelledPosition.addNewLabels(lpos.EndLabels,labels);
}
else
{
addStart(span.getStart(),labels);
addEnd(span.getEnd(),labels);
}
}
示例3: createAll
import opennlp.tools.util.Span; //導入方法依賴的package包/類
public String[] createAll(String[] words) {
Span[] nerSpans;
synchronized(nameFinder) {
nerSpans = nameFinder.find(words);
nameFinder.clearAdaptiveData();
}
String[] nerTags = new String[words.length];
if (nerSpans.length == 0) {
return nerTags;
}
String tag = nerSpans[0].getType();
for (Span tagged : nerSpans) {
for (int j = tagged.getStart(); j < tagged.getEnd(); j++) {
nerTags[j] = tag;
}
}
return nerTags;
}
示例4: containsContext
import opennlp.tools.util.Span; //導入方法依賴的package包/類
/**
* @return True if the tree contains the entire context span (char based span)
* Overlaps are returned false.
*/
private boolean containsContext(Tree full, Tree subtree, MentionContext mc) {
// Get the offset of the subtree
int offset = TreeOperator.inorderTraverse(full, subtree);
Span span = mc.getSpan();
// if( span.getEnd() < offset ) return false;
if( span.getStart() < offset ) {
// System.out.println("Not contained");
return false;
}
// Get the length of the subtree
int length = TreeOperator.treeStringLength(subtree);
if( span.getEnd() <= (offset+length) ) return true;
return false;
}
示例5: getEntities
import opennlp.tools.util.Span; //導入方法依賴的package包/類
@Override
public List<String> getEntities(EntityType entityCat, String text)
{
NameFinderME temp = getNameFinderModel(entityCat);
List<String> entityList = new ArrayList<String>();
String [] tokens=null;
tokens = tokenizer.tokenize(text);
Span nameSpans[] = temp.find(tokens);
for(Span s: nameSpans)
{
StringBuilder sb = new StringBuilder();
for(int i=s.getStart();i<s.getEnd();i++){
sb.append(tokens[i]+" ");
}
sb.deleteCharAt(sb.length()-1);
entityList.add(sb.toString());
}
return entityList;
}
示例6: performAnnotation
import opennlp.tools.util.Span; //導入方法依賴的package包/類
@Override
void performAnnotation(RawTextAnnotation annotation)
throws MissingRequiredAnnotationException {
for (SentenceAnnotation sentence : annotation.getAnnotationsFor(SentenceAnnotation.class)) {
List<TokenAnnotation> atoks = sentence.getAnnotationsFor(TokenAnnotation.class);
List<String> toks = AnnotationUtils.getStringTokensFromTokenAnnotationList(atoks);
Span nameSpans[] = nameFinder.find(AnnotationUtils.ListToArray(toks));
for(Span s :nameSpans){
// NamedEntityAnnotation nea = new NamedEntityAnnotation();
// NamedEntity ne = new NamedEntity();
for(int i = s.getStart();i<s.getEnd();i++){
atoks.get(i).addAnnotation(annotation);
}
}
}
nameFinder.clearAdaptiveData();
}
示例7: annotate
import opennlp.tools.util.Span; //導入方法依賴的package包/類
private Integer annotate(Parse p, Long sentStart)
throws gate.util.InvalidOffsetException {
List<Integer> childIDs = new ArrayList<Integer>();
Parse[] children = p.getChildren();
for(Parse cp : children) {
Integer childID = annotate(cp, sentStart);
if(childID >= 0) childIDs.add(childID);
}
String type = p.getType();
if(type.equals("TK")) return -1;
Span span = p.getSpan();
Long start = sentStart + span.getStart();
Long end = sentStart + span.getEnd();
FeatureMap fm = gate.Factory.newFeatureMap();
String text = document.getContent().getContent(start, end).toString();
fm.put("text", text);
fm.put("cat", p.getType());
if(!childIDs.isEmpty()) fm.put("consists", childIDs);
return annotations.add(start, end, "SyntaxTreeNode", fm);
}
示例8: getTokensWithMultiWords
import opennlp.tools.util.Span; //導入方法依賴的package包/類
/**
* Get input text and join the multiwords found in the dictionary object.
*
* @param tokens
* the input text
* @return the output text with the joined multiwords
*/
public final String[] getTokensWithMultiWords(final String[] tokens) {
final Span[] multiWordSpans = multiWordsToSpans(tokens);
final List<String> tokenList = new ArrayList<String>(Arrays.asList(tokens));
int counter = 0;
for (final Span mwSpan : multiWordSpans) {
final int fromIndex = mwSpan.getStart() - counter;
final int toIndex = mwSpan.getEnd() - counter;
// System.err.println(fromIndex + " " + toIndex);
// add to the counter the length of the sublist removed
// to allow the fromIndex and toIndex to match wrt to the tokenList
// indexes
counter = counter + tokenList.subList(fromIndex, toIndex).size() - 1;
// create the multiword joining the sublist
final String multiWord = Joiner.on("#").join(
tokenList.subList(fromIndex, toIndex));
// remove the sublist containing the tokens to be replaced in the span
tokenList.subList(fromIndex, toIndex).clear();
// add the multiword containing the tokens in one Span
tokenList.add(fromIndex, multiWord);
}
return tokenList.toArray(new String[tokenList.size()]);
}
示例9: getMultiWordSpans
import opennlp.tools.util.Span; //導入方法依賴的package包/類
/**
* Creates the multiword spans. It gets an initial list of spans (one per
* token) and creates a multiword span when a multiword is detected.
*
* @param tokens
* the list of tokens
* @param wfs
* the list of WFs
* @param tokenSpans
* the list of initial token spans
*/
private void getMultiWordSpans(final String[] tokens, final List<WF> wfs,
final List<ixa.kaflib.Span<WF>> tokenSpans) {
final Span[] multiWordSpans = this.multiWordMatcher
.multiWordsToSpans(tokens);
int counter = 0;
for (final Span mwSpan : multiWordSpans) {
final Integer fromIndex = mwSpan.getStart() - counter;
final Integer toIndex = mwSpan.getEnd() - counter;
// add to the counter the length of the span removed
counter = counter + tokenSpans.subList(fromIndex, toIndex).size() - 1;
// create multiword targets and Span
final List<WF> wfTargets = wfs
.subList(mwSpan.getStart(), mwSpan.getEnd());
final ixa.kaflib.Span<WF> multiWordSpan = KAFDocument
.newWFSpan(wfTargets);
// remove the token Spans to be replaced by the multiword span
tokenSpans.subList(fromIndex, toIndex).clear();
// add the new Span containing several WFs (multiWordSpan)
// the counter is used to allow matching the spans to the
// tokenSpans list indexes
tokenSpans.add(fromIndex, multiWordSpan);
}
}
示例10: isInQuotes
import opennlp.tools.util.Span; //導入方法依賴的package包/類
public boolean isInQuotes(Span span) {
boolean foundprioropenquote = false;
for (int i = 0;i < Quotes.size();i++)
{
if (Quotes.get(i).getStart() < span.getStart())
foundprioropenquote = !foundprioropenquote;
else
break;
}
return foundprioropenquote;
}
示例11: chunk
import opennlp.tools.util.Span; //導入方法依賴的package包/類
public List<Chunk> chunk(List<String> tokens, List<String> tags, List<Double> probs) {
final Span[] spans = chunker.chunkAsSpans(
tokens.toArray(new String[tokens.size()]), tags.toArray(new String[tags.size()])
);
final List<Chunk> chunks = new ArrayList<>();
for (final Span span : spans) {
boolean valid = true;
for (int j = span.getStart(); j < span.getEnd(); j++) {
if (chunker.probs()[j] < threshold || probs.get(j) < threshold) {
valid = false;
break;
}
}
if (valid && span.getEnd() - span.getStart() > 1) {
final List<String> tagsSubList = tags.subList(span.getStart(), span.getEnd());
if (tagsSubList.contains(NNP.name()) || tagsSubList.contains(NNPS.name())) {
continue;
}
chunks.add(new Chunk(
tokens.subList(span.getStart(), span.getEnd()),
tagsSubList)
);
}
}
return chunks;
}
示例12: getFacetedOrderedTagging
import opennlp.tools.util.Span; //導入方法依賴的package包/類
/**
* Gets the faceted ordered tagging.
*
* @param spans the spans
* @return the faceted ordered tagging
*/
private static Span[] getFacetedOrderedTagging(List<Span> spans) {
Iterator<Span> it = spans.iterator();
List<Span> spans2 = new ArrayList<Span>();
Span lastSpan = null;
String type = "";
int s = 0;
int e = 0;
while (it.hasNext()) {
Span span = it.next();
if (lastSpan != null) {
if (!lastSpan.intersects(span)) {
spans2.add(new Span(s, e, type));
type = span.getType();
s = span.getStart();
e = span.getEnd();
} else {
type += "|" + span.getType();
}
} else {
type = span.getType();
s = span.getStart();
e = span.getEnd();
}
lastSpan = span;
}
if (!type.isEmpty())
spans2.add(new Span(s, e, type));
return spans2.toArray(new Span[spans2.size()]);
}
示例13: chunk
import opennlp.tools.util.Span; //導入方法依賴的package包/類
/**
* 利用open nlp進行chunk,並添加一些修正規則
*
* @param words
*
* @return
*
* @throws Exception
*/
public static List<ChunkPhrase> chunk(List<Word> words) throws Exception {
List<ChunkPhrase> phrases = new ArrayList<ChunkPhrase>();
int wordsCount = words.size();
String[] toks = new String[wordsCount - 1]; // 忽略第一個單詞Root
String[] tags = new String[wordsCount - 1];
for (int i = 1; i < words.size(); i++) {
toks[i - 1] = words.get(i).getName();
tags[i - 1] = words.get(i).getPos();
}
// 采用open nlp進行chunk
ChunkerModel chunkerModel;
try {
chunkerModel = ModelLoader.getChunkerModel();
} catch (Exception e) {
log.error("Failed to load chunk model!", e);
throw e;
}
ChunkerME chunkerME = new ChunkerME(chunkerModel);
Span[] spans = chunkerME.chunkAsSpans(toks, tags);
for (Span span : spans) {
Word word = words.get(span.getStart() + 1);
if ("'s".equals(word.getName())) {
ChunkPhrase prePhrase = phrases.get(phrases.size() - 1);
prePhrase.setRightIndex(span.getEnd());
prePhrase.getWords().addAll(words.subList(span.getStart() + 1, span.getEnd() + 1));
phrases.set(phrases.size() - 1, prePhrase);
} else {
ChunkPhrase chunkPhrase = new ChunkPhrase(span.getStart() + 1, span.getEnd(), new ArrayList<Word>(words.subList(span.getStart() + 1, span.getEnd() + 1)));
phrases.add(chunkPhrase);
}
}
return phrases;
}
示例14: companyNames
import opennlp.tools.util.Span; //導入方法依賴的package包/類
public static Set<String> companyNames(String tokens[]) {
Set<String> ret = new HashSet<String>();
Span[] nameSpans = organizationFinder.find(tokens);
if (nameSpans.length == 0) return ret;
for (int i = 0; i < nameSpans.length; i++) {
Span span = nameSpans[i];
StringBuilder sb = new StringBuilder();
for (int j = span.getStart(); j < span.getEnd(); j++) sb.append(tokens[j] + " ");
ret.add(sb.toString().trim().replaceAll(" ,", ","));
}
return ret;
}
示例15: locationNames
import opennlp.tools.util.Span; //導入方法依賴的package包/類
public static Set<String> locationNames(String tokens[]) {
Set<String> ret = new HashSet<String>();
Span[] nameSpans = locationFinder.find(tokens);
if (nameSpans.length == 0) return ret;
for (int i = 0; i < nameSpans.length; i++) {
Span span = nameSpans[i];
StringBuilder sb = new StringBuilder();
for (int j = span.getStart(); j < span.getEnd(); j++)
sb.append(tokens[j] + " ");
ret.add(sb.toString().trim().replaceAll(" ,", ","));
}
return ret;
}