本文整理汇总了Java中ixa.kaflib.WF类的典型用法代码示例。如果您正苦于以下问题:Java WF类的具体用法?Java WF怎么用?Java WF使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
WF类属于ixa.kaflib包,在下文中一共展示了WF类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: slot2opinionsFromAnnotations
import ixa.kaflib.WF; //导入依赖的package包/类
/**
* Read NAF file containing ATE annotations in the entity layer and print them in Semeval-absa 2015 format
*
* @param naf
* @throws ParserConfigurationException
* @throws Exception
*/
public void slot2opinionsFromAnnotations(String naf) throws ParserConfigurationException, Exception
{
int oId = 0;
KAFDocument kaf = KAFDocument.createFromFile(new File(naf));
for (Entity e : kaf.getEntities())
{
oId++;
//create and add opinion to the structure
String polarity ="";
String cat = "";
String trgt = e.getStr();
int offsetFrom = e.getTerms().get(0).getWFs().get(0).getOffset();
List<WF> entWFs = e.getTerms().get(e.getTerms().size()-1).getWFs();
int offsetTo = entWFs.get(entWFs.size()-1).getOffset()+entWFs.get(entWFs.size()-1).getLength();
String sId = e.getTerms().get(0).getWFs().get(0).getXpath();
Opinion op = new Opinion("o"+oId, trgt, offsetFrom, offsetTo, polarity, cat, sId);
this.addOpinion(op);
}
print2Semeval2015format("EliXa_Arun.xml");
}
示例2: filterTerms
import ixa.kaflib.WF; //导入依赖的package包/类
public static List<Term> filterTerms(final Iterable<Term> terms) {
final List<Term> result = Lists.newArrayList();
boolean atBeginning = true;
for (final Term term : Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms)) {
final char pos = Character.toUpperCase(term.getPos().charAt(0));
if (atBeginning && (pos == 'D' || pos == 'P')) {
continue;
}
for (final WF word : term.getWFs()) {
final String text = word.getForm();
if (SYMBOLS.contains(text) || !WF_EXCLUSION_PATTERN.matcher(text).matches()) {
result.add(term);
atBeginning = false;
break;
}
}
}
return result;
}
示例3: getText
import ixa.kaflib.WF; //导入依赖的package包/类
public String getText(){
String text = "";
int textOffset = 0;
List<WF> wordForms = kaf.getWFs();
for (int i = 0; i < wordForms.size(); i++) {
WF wordForm = wordForms.get(i);
if (textOffset != wordForm.getOffset()){
while(textOffset < wordForm.getOffset()) {
text += " ";
textOffset += 1;
}
}
text += wordForm.getForm();
textOffset += wordForm.getLength();
}
return text;
}
示例4: addTimex
import ixa.kaflib.WF; //导入依赖的package包/类
public void addTimex(int sentence,int begin, int end, String value, String type){
ixa.kaflib.Timex3 time = kaf.newTimex3(type);
time.setValue(value);
List<WF> wfs = kaf.getWFsBySent(sentence);
List<WF> wfSpan = new ArrayList<WF>();
for (WF wf:wfs){
int offset = wf.getOffset();
if (offset >= begin && offset < end){
wfSpan.add(wf);
}
else{
//check if the identified timex is a substring of the wf
int endoff = offset + wf.getLength();
if (offset < begin && endoff >= end){
wfSpan.add(wf);
}
}
}
time.setSpan(KAFDocument.newWFSpan(wfSpan));
}
示例5: nafToBARREntities
import ixa.kaflib.WF; //导入依赖的package包/类
public static String nafToBARREntities(String inputNAF) throws IOException {
// DOCUMENT_ID SECTION INIT END ANNOTATED_TEXT TYPE
// 72280 A 207 211 TDAH SHORT
StringBuilder sb = new StringBuilder();
Path kafPath = Paths.get(inputNAF);
KAFDocument kaf = KAFDocument.createFromFile(kafPath.toFile());
List<Entity> entities = kaf.getEntities();
for (Entity entity : entities) {
String type = entity.getType();
String annotation = entity.getStr();
int fromOffset = entity.getTerms().get(0).getWFs().get(0).getOffset();
List<WF> targetWFs = entity.getTerms().get(entity.getTerms().size() - 1)
.getWFs();
int toOffset = targetWFs.get(targetWFs.size() - 1).getOffset()
+ targetWFs.get(targetWFs.size() - 1).getLength();
// 100005#T
String xpath = entity.getTerms().get(0).getWFs().get(0).getXpath();
String[] xpathElems = xpath.split("#");
String section = xpathElems[1];
String document = xpathElems[0];
sb.append(document).append("\t").append(section).append("\t")
.append(fromOffset).append("\t").append(toOffset).append("\t")
.append(annotation).append("\t").append(type).append("\n");
}
return sb.toString().trim();
}
示例6: textToNAF
import ixa.kaflib.WF; //导入依赖的package包/类
/**
* Takes a text file and put the contents in a NAF document. It creates the WF
* elements.
*
* @param inputFile
* @throws IOException
*/
public static void textToNAF(final Path inputFile) throws IOException {
KAFDocument kaf = new KAFDocument("en", "v1.naf");
int noSents = 0;
int noParas = 1;
final List<String> sentences = Files.readAllLines(inputFile);
for (final String sentence : sentences) {
noSents = noSents + 1;
final String[] tokens = sentence.split(" ");
for (final String token : tokens) {
if (token.equals(RuleBasedSegmenter.PARAGRAPH)) {
++noParas;
// TODO sentences without end markers;
// crap rule
while (noParas > noSents) {
++noSents;
}
} else {
// TODO add offset
final WF wf = kaf.newWF(0, token, noSents);
wf.setPara(noParas);
// wf.setSent(noSents);
}
}
}
}
示例7: getMultiWordSpans
import ixa.kaflib.WF; //导入依赖的package包/类
/**
* Creates the multiword spans. It gets an initial list of spans (one per
* token) and creates a multiword span when a multiword is detected.
*
* @param tokens
* the list of tokens
* @param wfs
* the list of WFs
* @param tokenSpans
* the list of initial token spans
*/
private void getMultiWordSpans(final String[] tokens, final List<WF> wfs,
final List<ixa.kaflib.Span<WF>> tokenSpans) {
final Span[] multiWordSpans = this.multiWordMatcher
.multiWordsToSpans(tokens);
int counter = 0;
for (final Span mwSpan : multiWordSpans) {
final Integer fromIndex = mwSpan.getStart() - counter;
final Integer toIndex = mwSpan.getEnd() - counter;
// add to the counter the length of the span removed
counter = counter + tokenSpans.subList(fromIndex, toIndex).size() - 1;
// create multiword targets and Span
final List<WF> wfTargets = wfs
.subList(mwSpan.getStart(), mwSpan.getEnd());
final ixa.kaflib.Span<WF> multiWordSpan = KAFDocument
.newWFSpan(wfTargets);
// remove the token Spans to be replaced by the multiword span
tokenSpans.subList(fromIndex, toIndex).clear();
// add the new Span containing several WFs (multiWordSpan)
// the counter is used to allow matching the spans to the
// tokenSpans list indexes
tokenSpans.add(fromIndex, multiWordSpan);
}
}
示例8: chunkToKAF
import ixa.kaflib.WF; //导入依赖的package包/类
public String chunkToKAF(KAFDocument kaf) throws IOException {
List<List<WF>> sentences = kaf.getSentences();
for (List<WF> sentence : sentences) {
/* Get an array of token forms from a list of WF objects. */
String posTags[] = new String[sentence.size()];
String tokens[] = new String[sentence.size()];
String[] tokenIds = new String[sentence.size()];
for (int i = 0; i < sentence.size(); i++) {
tokens[i] = sentence.get(i).getForm();
tokenIds[i] = sentence.get(i).getId();
List<Term> terms = kaf.getTermsBySent(sentence.get(i).getSent());
posTags[i] = terms.get(i).getMorphofeat();
}
Span[] chunks = chunker.chunk(tokens, posTags);
for (int i = 0; i < chunks.length; i++) {
String type = chunks[i].getType();
Integer start_index = chunks[i].getStart();
Integer end_index = chunks[i].getEnd();
// TODO use new functions and proper heads
List<Term> chunkTerms = kaf.getTermsFromWFs(Arrays.asList(Arrays.copyOfRange(tokenIds, start_index, end_index)));
kaf.createChunk(chunkTerms.get(chunkTerms.size()-1), type, chunkTerms);
}
}
return kaf.toString();
}
示例9: getChunks
import ixa.kaflib.WF; //导入依赖的package包/类
private List<ChunkSample> getChunks(KAFDocument kaf)
throws IOException {
List<ChunkSample> chunkList = new ArrayList<ChunkSample>();
List<List<WF>> sentences = kaf.getSentences();
for (List<WF> sentence : sentences) {
/* Get an array of token forms from a list of WF objects. */
String posTags[] = new String[sentence.size()];
String tokens[] = new String[sentence.size()];
for (int i = 0; i < sentence.size(); i++) {
tokens[i] = sentence.get(i).getForm();
List<Term> terms = kaf.getTermsBySent(sentence.get(i).getSent());
posTags[i] = terms.get(i).getMorphofeat();
}
String[] chunks = chunker.chunkToString(tokens, posTags);
ChunkSample chunkSample = new ChunkSample(tokens,posTags,chunks);
chunkList.add(chunkSample);
}
return chunkList;
}
示例10: tokenizeToKAF
import ixa.kaflib.WF; //导入依赖的package包/类
/**
* Tokenize document to NAF.
*
* @param kaf
* the incoming naf document
* @throws IOException
* if io problems
*/
public void tokenizeToKAF(final KAFDocument kaf) throws IOException {
int noSents = 0;
int noParas = 1;
final String[] sentences = segmenter.segmentSentence();
final List<List<Token>> tokens = toker.tokenize(sentences);
for (final List<Token> tokenizedSentence : tokens) {
noSents = noSents + 1;
for (final Token token : tokenizedSentence) {
if (token.getTokenValue().equals(RuleBasedSegmenter.PARAGRAPH)) {
++noParas;
// TODO debug this
if (noSents < noParas) {
++noSents;
}
} else {
final WF wf = kaf.newWF(token.startOffset(), token.getTokenValue(),
noSents);
wf.setLength(token.tokenLength());
wf.setPara(noParas);
}
}
}
}
示例11: tokensToKAF
import ixa.kaflib.WF; //导入依赖的package包/类
/**
* Read already tokenized text (one sentence per line) and builds a NAF
* document.
*
* @param breader
* the reader
* @param kaf
* the naf document
* @throws IOException
* if io problems
*/
public static void tokensToKAF(final Reader breader, final KAFDocument kaf)
throws IOException {
int noSents = 0;
int noParas = 1;
final List<String> sentences = CharStreams.readLines(breader);
for (final String sentence : sentences) {
noSents = noSents + 1;
final String[] tokens = sentence.split(" ");
for (final String token : tokens) {
if (token.equals(RuleBasedSegmenter.PARAGRAPH)) {
++noParas;
// TODO sentences without end markers;
// crap rule
while (noParas > noSents) {
++noSents;
}
} else {
// TODO add offset
final WF wf = kaf.newWF(0, token, noSents);
wf.setPara(noParas);
// wf.setSent(noSents);
}
}
}
}
示例12: endOf
import ixa.kaflib.WF; //导入依赖的package包/类
private static int endOf(final Term term) {
final List<WF> wfs = term.getWFs();
final WF wf = wfs.get(wfs.size() - 1);
final String str = wf.getForm();
if (str.equals("-LSB-") || str.equals("-RSB-") || str.equals("''")) {
return wf.getOffset() + 1;
}
return wf.getOffset() + wf.getLength();
}
示例13: getText
import ixa.kaflib.WF; //导入依赖的package包/类
public static String getText(final Iterable<Term> terms) {
final StringBuilder builder = new StringBuilder();
boolean atBeginning = true;
for (final Term term : Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms)) {
final boolean properNoun = term.getMorphofeat().startsWith("NNP");
for (final WF word : term.getWFs()) {
builder.append(atBeginning ? "" : " ");
builder.append(properNoun ? word.getForm() : word.getForm().toLowerCase());
atBeginning = false;
}
}
return builder.toString();
}
示例14: getEnd
import ixa.kaflib.WF; //导入依赖的package包/类
public static int getEnd(final Term term) {
final List<WF> wfs = term.getWFs();
final WF wf = wfs.get(wfs.size() - 1);
final String str = wf.getForm();
if (str.equals("-LSB-") || str.equals("-RSB-") || str.equals("''")) {
return wf.getOffset() + 1;
}
return wf.getOffset() + wf.getLength();
}
示例15: applyEntityRemoveOverlaps
import ixa.kaflib.WF; //导入依赖的package包/类
private void applyEntityRemoveOverlaps(final KAFDocument document) {
// Consider all the entities in the document
outer: for (final Entity entity : ImmutableList.copyOf(document.getEntities())) {
for (final Term term : entity.getTerms()) {
// Remove entities whose span is contained in the span of another entity
for (final Entity entity2 : document.getEntitiesByTerm(term)) {
if (entity2 != entity && entity2.getTerms().containsAll(entity.getTerms())) {
document.removeAnnotation(entity);
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Removed " + NAFUtils.toString(entity)
+ " overlapping with " + NAFUtils.toString(entity2));
}
continue outer;
}
}
// Remove entities whose span overlaps with the span of some timex
for (final WF wf : term.getWFs()) {
final List<Timex3> timex = document.getTimeExsByWF(wf);
if (!timex.isEmpty()) {
document.removeAnnotation(entity);
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Removed " + NAFUtils.toString(entity)
+ " overlapping with TIMEX3 '" + NAFUtils.toString(timex));
}
continue outer;
}
}
}
}
}