当前位置: 首页>>代码示例>>Java>>正文


Java Token.getLemma方法代码示例

本文整理汇总了Java中de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token.getLemma方法的典型用法代码示例。如果您正苦于以下问题:Java Token.getLemma方法的具体用法?Java Token.getLemma怎么用?Java Token.getLemma使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token的用法示例。


在下文中一共展示了Token.getLemma方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: PToken

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入方法依赖的package包/类
public PToken(Token t) {
	this.text = t.getCoveredText();
	this.pos = t.getPos() != null ? t.getPos().getPosValue() : null;
	this.lemma = t.getLemma() != null ? t.getLemma().getValue() : null;
	List<NamedEntity> nes = JCasUtil.selectCovered(NamedEntity.class, t);
	if (nes.size() > 0)
		this.neTag = nes.get(0).getValue();

	DocumentMetaData meta = (DocumentMetaData) t.getCAS().getDocumentAnnotation();
	this.documentId = meta.getDocumentId();
	this.start = t.getBegin();
	this.end = t.getEnd();
	this.docLength = t.getCAS().getDocumentText().length();
}
 
开发者ID:UKPLab,项目名称:ijcnlp2017-cmaps,代码行数:15,代码来源:PToken.java

示例2: PToken

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入方法依赖的package包/类
public PToken(Token t) {

		this.text = t.getCoveredText().replaceAll("\\p{C}", " ").replaceAll("  ", " ");
		this.pos = t.getPos() != null ? t.getPos().getPosValue() : null;
		this.stem = t.getStem() != null ? t.getStem().getValue() : null;
		this.lemma = t.getLemma() != null ? t.getLemma().getValue() : null;

		DocumentMetaData meta = (DocumentMetaData) t.getCAS().getDocumentAnnotation();
		this.documentId = meta.getDocumentId();
		this.start = t.getBegin();
		this.end = t.getEnd();
		this.docLength = t.getCAS().getDocumentText().length();
	}
 
开发者ID:UKPLab,项目名称:emnlp2017-cmapsum-corpus,代码行数:14,代码来源:PToken.java

示例3: writeLemmas

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入方法依赖的package包/类
private void writeLemmas(JCas aJCas, TextCorpus aTextCorpus,
        Map<Integer, eu.clarin.weblicht.wlfxb.tc.api.Token> aTokensBeginPositionMap)
{
    if (!JCasUtil.exists(aJCas, Lemma.class)) {
        // Do nothing if there are no lemmas in the CAS
        getLogger().debug("Layer [" + TextCorpusLayerTag.LEMMAS.getXmlName() + "]: empty");
        return;
    }
    
    // Tokens layer must already exist
    TokensLayer tokensLayer = aTextCorpus.getTokensLayer();
    
    // create lemma annotation layer
    LemmasLayer lemmasLayer = aTextCorpus.createLemmasLayer();

    getLogger().debug("Layer [" + TextCorpusLayerTag.LEMMAS.getXmlName() + "]: created");

    int j = 0;
    for (Token coveredToken : select(aJCas, Token.class)) {
        Lemma lemma = coveredToken.getLemma();
        if (lemma != null && lemmasLayer != null) {
            String lemmaValue = coveredToken.getLemma().getValue();
            lemmasLayer.addLemma(lemmaValue, tokensLayer.getToken(j));
        }
        j++;
    }
    
}
 
开发者ID:webanno,项目名称:webanno,代码行数:29,代码来源:TcfWriter.java

示例4: setWordNetFeatures

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入方法依赖的package包/类
/**
 * @author afried, Annemarie Friedrich
 * 
 *         WordNet based features, use Most Frequent Sense heuristic.
 *         Adapted from code by Nils Reiter.
 * 
 * @param token
 * @param classAnnot
 */
public static void setWordNetFeatures(Token token, ClassificationAnnotation classAnnot, JCas jCas,
		String featurePrefix, IDictionary wordnet) {
	if (token.getPos() != null && token.getLemma() != null) {

		POS pos = WordNetUtils.getPOSFromPenn(token.getPos().getPosValue());
		if (pos != null) {
			IIndexWord iw = wordnet.getIndexWord(token.getLemma().getValue(), pos);

			if (iw != null) {
				ISynsetID mfs = iw.getWordIDs().get(0).getSynsetID();
				ISynset synset = wordnet.getSynset(mfs);

				// lexcial filename:
				FeaturesUtil.addFeature(featurePrefix + "wnLexicalFilename", synset.getLexicalFile().getName(),
						jCas, classAnnot);
				int gran = 0;
				ISynset curr = synset;
				Set<ISynset> seen = new HashSet<ISynset>();
				while (!seen.contains(curr) && !curr.getRelatedSynsets(Pointer.HYPERNYM).isEmpty()) {
					seen.add(curr);
					// The substring operation removes the leading 'SID-'
					String senseId = curr.getID().toString().substring(4);
					if (gran == 0) {
						FeaturesUtil.addFeature(featurePrefix + "sense0", senseId, jCas, classAnnot);
					} else if (gran == 1) {
						FeaturesUtil.addFeature(featurePrefix + "sense1", senseId, jCas, classAnnot);
					} else if (gran == 2) {
						FeaturesUtil.addFeature(featurePrefix + "sense2", senseId, jCas, classAnnot);
					} else if (gran == 3) {
						FeaturesUtil.addFeature(featurePrefix + "sense3", senseId, jCas, classAnnot);
					}
					curr = wordnet.getSynset(curr.getRelatedSynsets(Pointer.HYPERNYM).get(0));
					gran++;
				}
				FeaturesUtil.addFeature(featurePrefix + "wnGranularity", new Integer(gran).toString(), jCas,
						classAnnot);
				// curr must now refer to the top sense

				FeaturesUtil.addFeature(featurePrefix + "senseTop", curr.getID().toString().substring(4), jCas,
						classAnnot);
			}
		}
	}
}
 
开发者ID:annefried,项目名称:sitent,代码行数:54,代码来源:WordNetUtils.java

示例5: repair

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入方法依赖的package包/类
@Override
public void repair(Project aProject, CAS aCas, List<LogMessage> aMessages)
{
    try {
        for (Sentence s : select(aCas.getJCas(), Sentence.class)) {
            if (s.getBegin() >= s.getEnd()) {
                s.removeFromIndexes();
                aMessages.add(new LogMessage(this, LogLevel.INFO,
                        "Removed sentence with illegal span: %s", s));
            }
        }

        for (Token t : select(aCas.getJCas(), Token.class)) {
            if (t.getBegin() >= t.getEnd()) {
                Lemma lemma = t.getLemma();
                if (lemma != null) {
                    lemma.removeFromIndexes();
                    aMessages.add(new LogMessage(this, LogLevel.INFO,
                            "Removed lemma attached to token with illegal span: %s", t));
                }

                POS pos = t.getPos();
                if (pos != null) {
                    pos.removeFromIndexes();
                    aMessages.add(new LogMessage(this, LogLevel.INFO,
                            "Removed POS attached to token with illegal span: %s", t));
                }

                Stem stem = t.getStem();
                if (stem != null) {
                    stem.removeFromIndexes();
                    aMessages.add(new LogMessage(this, LogLevel.INFO,
                            "Removed stem attached to token with illegal span: %s", t));
                }

                t.removeFromIndexes();
                aMessages.add(new LogMessage(this, LogLevel.INFO,
                        "Removed token with illegal span: %s", t));
            }
        }

    }
    catch (CASException e) {
        log.error("Unabled to access JCas", e);
        aMessages.add(
                new LogMessage(this, LogLevel.ERROR, "Unabled to access JCas", e.getMessage()));
    }
}
 
开发者ID:webanno,项目名称:webanno,代码行数:49,代码来源:RemoveZeroSizeTokensAndSentencesRepair.java

示例6: getTokenLemma

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入方法依赖的package包/类
/**
 * Produce token lemma, return the original string if the lemma is null;
 * converts the string to lower case.
 * 
 * @param tok
 * @return
 */
public String getTokenLemma(Token tok) {
  Lemma l = tok.getLemma();
  // For some weird reason, Clear NLP lemma is sometimes NULL
  return (l!=null) ? l.getValue() : tok.getCoveredText().toLowerCase();
}
 
开发者ID:oaqa,项目名称:knn4qa,代码行数:13,代码来源:ExtractTextReps.java


注:本文中的de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token.getLemma方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。