当前位置: 首页>>代码示例>>Java>>正文


Java WF类代码示例

本文整理汇总了Java中ixa.kaflib.WF的典型用法代码示例。如果您正苦于以下问题:Java WF类的具体用法?Java WF怎么用?Java WF使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


WF类属于ixa.kaflib包,在下文中一共展示了WF类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: slot2opinionsFromAnnotations

import ixa.kaflib.WF; //导入依赖的package包/类
/**
 * Read NAF file containing ATE annotations in the entity layer and print them in Semeval-absa 2015 format
 * 
 * @param naf
 * @throws ParserConfigurationException
 * @throws Exception
 */
public void slot2opinionsFromAnnotations(String naf) throws ParserConfigurationException, Exception 
{		
	int oId = 0;
	KAFDocument kaf = KAFDocument.createFromFile(new File(naf));
		
	for (Entity e : kaf.getEntities())
	{
		oId++;
		//create and add opinion to the structure
		String polarity ="";
		String cat = "";
		String trgt = e.getStr();
		int offsetFrom = e.getTerms().get(0).getWFs().get(0).getOffset();
		List<WF> entWFs = e.getTerms().get(e.getTerms().size()-1).getWFs();
		int offsetTo = entWFs.get(entWFs.size()-1).getOffset()+entWFs.get(entWFs.size()-1).getLength();
		String sId = e.getTerms().get(0).getWFs().get(0).getXpath();
		Opinion op = new Opinion("o"+oId, trgt, offsetFrom, offsetTo, polarity, cat, sId);
		this.addOpinion(op);
	}				
	print2Semeval2015format("EliXa_Arun.xml");
}
 
开发者ID:Elhuyar,项目名称:Elixa,代码行数:29,代码来源:CorpusReader.java

示例2: filterTerms

import ixa.kaflib.WF; //导入依赖的package包/类
public static List<Term> filterTerms(final Iterable<Term> terms) {
    final List<Term> result = Lists.newArrayList();
    boolean atBeginning = true;
    for (final Term term : Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms)) {
        final char pos = Character.toUpperCase(term.getPos().charAt(0));
        if (atBeginning && (pos == 'D' || pos == 'P')) {
            continue;
        }
        for (final WF word : term.getWFs()) {
            final String text = word.getForm();
            if (SYMBOLS.contains(text) || !WF_EXCLUSION_PATTERN.matcher(text).matches()) {
                result.add(term);
                atBeginning = false;
                break;
            }
        }
    }
    return result;
}
 
开发者ID:dkmfbk,项目名称:pikes,代码行数:20,代码来源:NAFUtils.java

示例3: getText

import ixa.kaflib.WF; //导入依赖的package包/类
public String getText(){
	String text = "";
	int textOffset = 0;
	List<WF> wordForms = kaf.getWFs();
	for (int i = 0; i < wordForms.size(); i++) {
	    WF wordForm = wordForms.get(i);
	    if (textOffset != wordForm.getOffset()){
		while(textOffset < wordForm.getOffset()) {
		    text += " ";
		    textOffset += 1;
		}
	    }
	    text += wordForm.getForm();
	    textOffset += wordForm.getLength();
	}
	return text;
}
 
开发者ID:ixa-ehu,项目名称:ixa-heideltime,代码行数:18,代码来源:NAFWrapper.java

示例4: addTimex

import ixa.kaflib.WF; //导入依赖的package包/类
public void addTimex(int sentence,int begin, int end, String value, String type){
	ixa.kaflib.Timex3 time = kaf.newTimex3(type);
	time.setValue(value);
	
	List<WF> wfs = kaf.getWFsBySent(sentence);
	List<WF> wfSpan = new ArrayList<WF>();
	for (WF wf:wfs){
		int offset = wf.getOffset();
		if (offset >= begin && offset < end){
			wfSpan.add(wf);
		}
		else{
			//check if the identified timex is a substring of the wf
			int endoff = offset + wf.getLength();
			if (offset < begin && endoff >= end){
				wfSpan.add(wf);
			}
		}
	}
	time.setSpan(KAFDocument.newWFSpan(wfSpan));
}
 
开发者ID:ixa-ehu,项目名称:ixa-heideltime,代码行数:22,代码来源:NAFWrapper.java

示例5: nafToBARREntities

import ixa.kaflib.WF; //导入依赖的package包/类
public static String nafToBARREntities(String inputNAF) throws IOException {
  // DOCUMENT_ID SECTION INIT END ANNOTATED_TEXT TYPE
  // 72280 A 207 211 TDAH SHORT
  StringBuilder sb = new StringBuilder();
  Path kafPath = Paths.get(inputNAF);
  KAFDocument kaf = KAFDocument.createFromFile(kafPath.toFile());
  List<Entity> entities = kaf.getEntities();
  for (Entity entity : entities) {
    String type = entity.getType();
    String annotation = entity.getStr();
    int fromOffset = entity.getTerms().get(0).getWFs().get(0).getOffset();
    List<WF> targetWFs = entity.getTerms().get(entity.getTerms().size() - 1)
        .getWFs();
    int toOffset = targetWFs.get(targetWFs.size() - 1).getOffset()
        + targetWFs.get(targetWFs.size() - 1).getLength();
    // 100005#T
    String xpath = entity.getTerms().get(0).getWFs().get(0).getXpath();
    String[] xpathElems = xpath.split("#");
    String section = xpathElems[1];
    String document = xpathElems[0];
    sb.append(document).append("\t").append(section).append("\t")
        .append(fromOffset).append("\t").append(toOffset).append("\t")
        .append(annotation).append("\t").append(type).append("\n");
  }
  return sb.toString().trim();
}
 
开发者ID:ragerri,项目名称:ixa-pipe-convert,代码行数:27,代码来源:MarkytFormat.java

示例6: textToNAF

import ixa.kaflib.WF; //导入依赖的package包/类
/**
 * Takes a text file and put the contents in a NAF document. It creates the WF
 * elements.
 * 
 * @param inputFile
 * @throws IOException
 */
public static void textToNAF(final Path inputFile) throws IOException {
  KAFDocument kaf = new KAFDocument("en", "v1.naf");
  int noSents = 0;
  int noParas = 1;
  final List<String> sentences = Files.readAllLines(inputFile);
  for (final String sentence : sentences) {
    noSents = noSents + 1;
    final String[] tokens = sentence.split(" ");
    for (final String token : tokens) {
      if (token.equals(RuleBasedSegmenter.PARAGRAPH)) {
        ++noParas;
        // TODO sentences without end markers;
        // crap rule
        while (noParas > noSents) {
          ++noSents;
        }
      } else {
        // TODO add offset
        final WF wf = kaf.newWF(0, token, noSents);
        wf.setPara(noParas);
        // wf.setSent(noSents);
      }
    }
  }
}
 
开发者ID:ragerri,项目名称:ixa-pipe-convert,代码行数:33,代码来源:Convert.java

示例7: getMultiWordSpans

import ixa.kaflib.WF; //导入依赖的package包/类
/**
 * Creates the multiword spans. It gets an initial list of spans (one per
 * token) and creates a multiword span when a multiword is detected.
 * 
 * @param tokens
 *          the list of tokens
 * @param wfs
 *          the list of WFs
 * @param tokenSpans
 *          the list of initial token spans
 */
private void getMultiWordSpans(final String[] tokens, final List<WF> wfs,
    final List<ixa.kaflib.Span<WF>> tokenSpans) {
  final Span[] multiWordSpans = this.multiWordMatcher
      .multiWordsToSpans(tokens);
  int counter = 0;
  for (final Span mwSpan : multiWordSpans) {
    final Integer fromIndex = mwSpan.getStart() - counter;
    final Integer toIndex = mwSpan.getEnd() - counter;
    // add to the counter the length of the span removed
    counter = counter + tokenSpans.subList(fromIndex, toIndex).size() - 1;
    // create multiword targets and Span
    final List<WF> wfTargets = wfs
        .subList(mwSpan.getStart(), mwSpan.getEnd());
    final ixa.kaflib.Span<WF> multiWordSpan = KAFDocument
        .newWFSpan(wfTargets);
    // remove the token Spans to be replaced by the multiword span
    tokenSpans.subList(fromIndex, toIndex).clear();
    // add the new Span containing several WFs (multiWordSpan)
    // the counter is used to allow matching the spans to the
    // tokenSpans list indexes
    tokenSpans.add(fromIndex, multiWordSpan);
  }
}
 
开发者ID:ixa-ehu,项目名称:ixa-pipe-pos,代码行数:35,代码来源:Annotate.java

示例8: chunkToKAF

import ixa.kaflib.WF; //导入依赖的package包/类
public String chunkToKAF(KAFDocument kaf) throws IOException {
  List<List<WF>> sentences = kaf.getSentences();
  for (List<WF> sentence : sentences) {
    /* Get an array of token forms from a list of WF objects. */
    String posTags[] = new String[sentence.size()];
    String tokens[] = new String[sentence.size()];
    String[] tokenIds = new String[sentence.size()];
    for (int i = 0; i < sentence.size(); i++) {
      tokens[i] = sentence.get(i).getForm();
      tokenIds[i] = sentence.get(i).getId();
      List<Term> terms = kaf.getTermsBySent(sentence.get(i).getSent());
      posTags[i] = terms.get(i).getMorphofeat();
    }
    Span[] chunks = chunker.chunk(tokens, posTags);
    for (int i = 0; i < chunks.length; i++) {
      String type = chunks[i].getType();
      Integer start_index = chunks[i].getStart();
      Integer end_index = chunks[i].getEnd();
      // TODO use new functions and proper heads
      List<Term> chunkTerms = kaf.getTermsFromWFs(Arrays.asList(Arrays.copyOfRange(tokenIds, start_index, end_index)));
      kaf.createChunk(chunkTerms.get(chunkTerms.size()-1), type, chunkTerms);        
    }
  }
  return kaf.toString();
}
 
开发者ID:ixa-ehu,项目名称:ixa-pipe-chunk,代码行数:26,代码来源:Annotate.java

示例9: getChunks

import ixa.kaflib.WF; //导入依赖的package包/类
private List<ChunkSample> getChunks(KAFDocument kaf)
            throws IOException {
  List<ChunkSample> chunkList = new ArrayList<ChunkSample>();
  List<List<WF>> sentences = kaf.getSentences();
  for (List<WF> sentence : sentences) {
    /* Get an array of token forms from a list of WF objects. */
    String posTags[] = new String[sentence.size()];
    String tokens[] = new String[sentence.size()];
    for (int i = 0; i < sentence.size(); i++) {
      tokens[i] = sentence.get(i).getForm();
      List<Term> terms = kaf.getTermsBySent(sentence.get(i).getSent());
      posTags[i] = terms.get(i).getMorphofeat();
    }
    String[] chunks = chunker.chunkToString(tokens, posTags);
    ChunkSample chunkSample = new ChunkSample(tokens,posTags,chunks);
    chunkList.add(chunkSample);
  }
  return chunkList;
}
 
开发者ID:ixa-ehu,项目名称:ixa-pipe-chunk,代码行数:20,代码来源:Annotate.java

示例10: tokenizeToKAF

import ixa.kaflib.WF; //导入依赖的package包/类
/**
 * Tokenize document to NAF.
 * 
 * @param kaf
 *          the incoming naf document
 * @throws IOException
 *           if io problems
 */
public void tokenizeToKAF(final KAFDocument kaf) throws IOException {

  int noSents = 0;
  int noParas = 1;

  final String[] sentences = segmenter.segmentSentence();
  final List<List<Token>> tokens = toker.tokenize(sentences);
  for (final List<Token> tokenizedSentence : tokens) {
    noSents = noSents + 1;
    for (final Token token : tokenizedSentence) {
      if (token.getTokenValue().equals(RuleBasedSegmenter.PARAGRAPH)) {
        ++noParas;
        // TODO debug this
        if (noSents < noParas) {
          ++noSents;
        }
      } else {
        final WF wf = kaf.newWF(token.startOffset(), token.getTokenValue(),
            noSents);
        wf.setLength(token.tokenLength());
        wf.setPara(noParas);
      }
    }
  }
}
 
开发者ID:ixa-ehu,项目名称:ixa-pipe-tok,代码行数:34,代码来源:Annotate.java

示例11: tokensToKAF

import ixa.kaflib.WF; //导入依赖的package包/类
/**
 * Read already tokenized text (one sentence per line) and builds a NAF
 * document.
 * 
 * @param breader
 *          the reader
 * @param kaf
 *          the naf document
 * @throws IOException
 *           if io problems
 */
public static void tokensToKAF(final Reader breader, final KAFDocument kaf)
    throws IOException {
  int noSents = 0;
  int noParas = 1;
  final List<String> sentences = CharStreams.readLines(breader);
  for (final String sentence : sentences) {
    noSents = noSents + 1;
    final String[] tokens = sentence.split(" ");
    for (final String token : tokens) {
      if (token.equals(RuleBasedSegmenter.PARAGRAPH)) {
        ++noParas;
        // TODO sentences without end markers;
        // crap rule
        while (noParas > noSents) {
          ++noSents;
        }
      } else {
        // TODO add offset
        final WF wf = kaf.newWF(0, token, noSents);
        wf.setPara(noParas);
        // wf.setSent(noSents);
      }
    }
  }
}
 
开发者ID:ixa-ehu,项目名称:ixa-pipe-tok,代码行数:37,代码来源:Annotate.java

示例12: endOf

import ixa.kaflib.WF; //导入依赖的package包/类
private static int endOf(final Term term) {
    final List<WF> wfs = term.getWFs();
    final WF wf = wfs.get(wfs.size() - 1);
    final String str = wf.getForm();
    if (str.equals("-LSB-") || str.equals("-RSB-") || str.equals("''")) {
        return wf.getOffset() + 1;
    }
    return wf.getOffset() + wf.getLength();
}
 
开发者ID:dkmfbk,项目名称:pikes,代码行数:10,代码来源:NafRenderUtils.java

示例13: getText

import ixa.kaflib.WF; //导入依赖的package包/类
public static String getText(final Iterable<Term> terms) {
    final StringBuilder builder = new StringBuilder();
    boolean atBeginning = true;
    for (final Term term : Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms)) {
        final boolean properNoun = term.getMorphofeat().startsWith("NNP");
        for (final WF word : term.getWFs()) {
            builder.append(atBeginning ? "" : " ");
            builder.append(properNoun ? word.getForm() : word.getForm().toLowerCase());
            atBeginning = false;
        }
    }
    return builder.toString();
}
 
开发者ID:dkmfbk,项目名称:pikes,代码行数:14,代码来源:NAFUtils.java

示例14: getEnd

import ixa.kaflib.WF; //导入依赖的package包/类
public static int getEnd(final Term term) {
    final List<WF> wfs = term.getWFs();
    final WF wf = wfs.get(wfs.size() - 1);
    final String str = wf.getForm();
    if (str.equals("-LSB-") || str.equals("-RSB-") || str.equals("''")) {
        return wf.getOffset() + 1;
    }
    return wf.getOffset() + wf.getLength();
}
 
开发者ID:dkmfbk,项目名称:pikes,代码行数:10,代码来源:NAFUtils.java

示例15: applyEntityRemoveOverlaps

import ixa.kaflib.WF; //导入依赖的package包/类
private void applyEntityRemoveOverlaps(final KAFDocument document) {

        // Consider all the entities in the document
        outer: for (final Entity entity : ImmutableList.copyOf(document.getEntities())) {
            for (final Term term : entity.getTerms()) {

                // Remove entities whose span is contained in the span of another entity
                for (final Entity entity2 : document.getEntitiesByTerm(term)) {
                    if (entity2 != entity && entity2.getTerms().containsAll(entity.getTerms())) {
                        document.removeAnnotation(entity);
                        if (LOGGER.isDebugEnabled()) {
                            LOGGER.debug("Removed " + NAFUtils.toString(entity)
                                    + " overlapping with " + NAFUtils.toString(entity2));
                        }
                        continue outer;
                    }
                }

                // Remove entities whose span overlaps with the span of some timex
                for (final WF wf : term.getWFs()) {
                    final List<Timex3> timex = document.getTimeExsByWF(wf);
                    if (!timex.isEmpty()) {
                        document.removeAnnotation(entity);
                        if (LOGGER.isDebugEnabled()) {
                            LOGGER.debug("Removed " + NAFUtils.toString(entity)
                                    + " overlapping with TIMEX3 '" + NAFUtils.toString(timex));
                        }
                        continue outer;
                    }
                }
            }
        }
    }
 
开发者ID:dkmfbk,项目名称:pikes,代码行数:34,代码来源:NAFFilter.java


注:本文中的ixa.kaflib.WF类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。