当前位置: 首页>>代码示例>>Java>>正文


Java Token.getPos方法代码示例

本文整理汇总了Java中de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token.getPos方法的典型用法代码示例。如果您正苦于以下问题:Java Token.getPos方法的具体用法?Java Token.getPos怎么用?Java Token.getPos使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token的用法示例。


在下文中一共展示了Token.getPos方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: process

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入方法依赖的package包/类
@Override
public void process(JCas aJCas)
	throws AnalysisEngineProcessException
{
	for (Token t : select(aJCas, Token.class)) {
		//This corresponds roughly to what is happening in MorphaAnnotator.
		String token = t.getCoveredText();
		String lemma;
		if (t.getPos() != null) {
			lemma = morphology.lemmatize(new WordTag(token, t.getPos().getPosValue()))
					.lemma();
		}
		else {
			lemma = morphology.stem(token);
		}
		if (lemma == null) {
               lemma = token;
           }
		Lemma l = new Lemma(aJCas, t.getBegin(), t.getEnd());
		l.setValue(lemma);
		l.addToIndexes();
		t.setLemma(l);
	}
}
 
开发者ID:tudarmstadt-lt,项目名称:sentiment,代码行数:25,代码来源:StanfordLemmatizer.java

示例2: PToken

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入方法依赖的package包/类
public PToken(Token t) {
	this.text = t.getCoveredText();
	this.pos = t.getPos() != null ? t.getPos().getPosValue() : null;
	this.lemma = t.getLemma() != null ? t.getLemma().getValue() : null;
	List<NamedEntity> nes = JCasUtil.selectCovered(NamedEntity.class, t);
	if (nes.size() > 0)
		this.neTag = nes.get(0).getValue();

	DocumentMetaData meta = (DocumentMetaData) t.getCAS().getDocumentAnnotation();
	this.documentId = meta.getDocumentId();
	this.start = t.getBegin();
	this.end = t.getEnd();
	this.docLength = t.getCAS().getDocumentText().length();
}
 
开发者ID:UKPLab,项目名称:ijcnlp2017-cmaps,代码行数:15,代码来源:PToken.java

示例3: PToken

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入方法依赖的package包/类
public PToken(Token t) {

		this.text = t.getCoveredText().replaceAll("\\p{C}", " ").replaceAll("  ", " ");
		this.pos = t.getPos() != null ? t.getPos().getPosValue() : null;
		this.stem = t.getStem() != null ? t.getStem().getValue() : null;
		this.lemma = t.getLemma() != null ? t.getLemma().getValue() : null;

		DocumentMetaData meta = (DocumentMetaData) t.getCAS().getDocumentAnnotation();
		this.documentId = meta.getDocumentId();
		this.start = t.getBegin();
		this.end = t.getEnd();
		this.docLength = t.getCAS().getDocumentText().length();
	}
 
开发者ID:UKPLab,项目名称:emnlp2017-cmapsum-corpus,代码行数:14,代码来源:PToken.java

示例4: addDependentFeature

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入方法依赖的package包/类
private void addDependentFeature(Annotation mr, JCas jCas, HashMap<Token, Set<Dependency>> childNodeMap,
		Segment segment, String featName) {
	// extract subject-related features
	Token head = SitEntUimaUtils.getHead(mr, jCas);

	// If here, a head of the named entity mention noun phrase has
	// been
	// found.
	if (head != null && head.getPos() != null) {
		// happens rarely -- parser error / out of memory?

		// A POS tag was assigned to the head of the mention by the
		// parser.
		if (NounPhraseFeatures.isNounOrPronoun(head)) {
			String detType = NounPhraseFeatures.getDeterminerType(jCas, head, childNodeMap, false);
			String nounType = NounPhraseFeatures.getNounType(head);

			// System.out.println(head.getCoveredText() + " " + detType +
			// " "
			// + nounType);

			String isDefinite = ((Boolean) (detType.matches("def|demon|quantDef")
					|| nounType.matches("proper|pronoun"))).toString();
			String isIndefinite = ((Boolean) detType.matches("indef|quantIndef")).toString();
			FeaturesUtil.addFeature("main_verb_" + featName + "_def", isDefinite, jCas, segment);
			FeaturesUtil.addFeature("main_verb_" + featName + "_indef", isIndefinite, jCas, segment);

			String barePlural = ((Boolean) NounPhraseFeatures.isBarePlural(jCas, head, childNodeMap)).toString();
			FeaturesUtil.addFeature("main_verb_" + featName + "_barePlural", barePlural, jCas, segment);

		}
	}

}
 
开发者ID:annefried,项目名称:sitent,代码行数:35,代码来源:MathewKatzFeaturesAnnotator.java

示例5: writePosTags

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入方法依赖的package包/类
private void writePosTags(JCas aJCas, TextCorpus aTextCorpus,
        Map<Integer, eu.clarin.weblicht.wlfxb.tc.api.Token> aTokensBeginPositionMap)
{
    if (!JCasUtil.exists(aJCas, POS.class)) {
        // Do nothing if there are no part-of-speech tags in the CAS
        getLogger().debug("Layer [" + TextCorpusLayerTag.POSTAGS.getXmlName() + "]: empty");
        return;
    }

    // Tokens layer must already exist
    TokensLayer tokensLayer = aTextCorpus.getTokensLayer();
    
    // create POS tag annotation layer
    String posTagSet = "STTS";
    for (TagsetDescription tagSet : select(aJCas, TagsetDescription.class)) {
        if (tagSet.getLayer().equals(POS.class.getName())) {
            posTagSet = tagSet.getName();
            break;
        }
    }
    
    PosTagsLayer posLayer = aTextCorpus.createPosTagsLayer(posTagSet);
    
    getLogger().debug("Layer [" + TextCorpusLayerTag.POSTAGS.getXmlName() + "]: created");
    
    int j = 0;
    for (Token coveredToken : select(aJCas, Token.class)) {
        POS pos = coveredToken.getPos();

        if (pos != null && posLayer != null ) {
            String posValue = coveredToken.getPos().getPosValue();
            posLayer.addTag(posValue, tokensLayer.getToken(j));
        }

        j++;
    }
}
 
开发者ID:webanno,项目名称:webanno,代码行数:38,代码来源:TcfWriter.java

示例6: setWordNetFeatures

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入方法依赖的package包/类
/**
 * @author afried, Annemarie Friedrich
 * 
 *         WordNet based features, use Most Frequent Sense heuristic.
 *         Adapted from code by Nils Reiter.
 * 
 * @param token
 * @param classAnnot
 */
public static void setWordNetFeatures(Token token, ClassificationAnnotation classAnnot, JCas jCas,
		String featurePrefix, IDictionary wordnet) {
	if (token.getPos() != null && token.getLemma() != null) {

		POS pos = WordNetUtils.getPOSFromPenn(token.getPos().getPosValue());
		if (pos != null) {
			IIndexWord iw = wordnet.getIndexWord(token.getLemma().getValue(), pos);

			if (iw != null) {
				ISynsetID mfs = iw.getWordIDs().get(0).getSynsetID();
				ISynset synset = wordnet.getSynset(mfs);

				// lexcial filename:
				FeaturesUtil.addFeature(featurePrefix + "wnLexicalFilename", synset.getLexicalFile().getName(),
						jCas, classAnnot);
				int gran = 0;
				ISynset curr = synset;
				Set<ISynset> seen = new HashSet<ISynset>();
				while (!seen.contains(curr) && !curr.getRelatedSynsets(Pointer.HYPERNYM).isEmpty()) {
					seen.add(curr);
					// The substring operation removes the leading 'SID-'
					String senseId = curr.getID().toString().substring(4);
					if (gran == 0) {
						FeaturesUtil.addFeature(featurePrefix + "sense0", senseId, jCas, classAnnot);
					} else if (gran == 1) {
						FeaturesUtil.addFeature(featurePrefix + "sense1", senseId, jCas, classAnnot);
					} else if (gran == 2) {
						FeaturesUtil.addFeature(featurePrefix + "sense2", senseId, jCas, classAnnot);
					} else if (gran == 3) {
						FeaturesUtil.addFeature(featurePrefix + "sense3", senseId, jCas, classAnnot);
					}
					curr = wordnet.getSynset(curr.getRelatedSynsets(Pointer.HYPERNYM).get(0));
					gran++;
				}
				FeaturesUtil.addFeature(featurePrefix + "wnGranularity", new Integer(gran).toString(), jCas,
						classAnnot);
				// curr must now refer to the top sense

				FeaturesUtil.addFeature(featurePrefix + "senseTop", curr.getID().toString().substring(4), jCas,
						classAnnot);
			}
		}
	}
}
 
开发者ID:annefried,项目名称:sitent,代码行数:54,代码来源:WordNetUtils.java

示例7: process

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入方法依赖的package包/类
@Override
	public void process(JCas jCas) throws AnalysisEngineProcessException {

		Collection<Segment> segments = JCasUtil.select(jCas, Segment.class);

		for (Segment segment : segments) {

			//Collection<Token> tokens = JCasUtil.selectCovered(Token.class, segment);
			Collection<Annotation> tokenAnnots = SitEntUimaUtils.getList(segment.getTokens());
			Collection<Token> tokens = new LinkedList<Token>();
			for (Annotation annot : tokenAnnots) {
				tokens.add((Token) annot);
			}
			
			for (Token token : tokens) {

				//String word = token.getCoveredText().replaceAll(" ", "").replaceAll("\\\\", "BACKSLASH");

				// Word and lemma features proved to be impractical
				//FeaturesUtil.addFeature("segment_word_" + word, "1", jCas, segment);
				if (token.getPos() != null) {
					//String lemma = token.getLemma().getValue().replaceAll(" ", "").replaceAll("\\\\", "BACKSLASH");
					String pos = token.getPos().getPosValue().replaceAll(" ", "");
					FeaturesUtil.addFeature("segment_pos_" + pos, "1", jCas, segment);
					//FeaturesUtil.addFeature("segment_lemma_pos", lemma + "_" + pos, jCas, segment);
					//FeaturesUtil.addFeature("segment_lemma_" + lemma, "1", jCas, segment);
					//FeaturesUtil.addFeature("segment_word_pos_" + word + "_" + pos, "1", jCas, segment);
				}

				// TODO: count POS tags instead of binary feature?
			}

			// dependency relations
//			Collection<Dependency> deps = JCasUtil.selectCovered(Dependency.class, segment);
//			for (Dependency dep : deps) {
//				FeaturesUtil.addFeature("segment_depRel_" + dep.getDependencyType(), "1", jCas, segment);
//			}

		}

	}
 
开发者ID:annefried,项目名称:sitent,代码行数:42,代码来源:PosLemmaDepFeaturesAnnotator.java

示例8: repair

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; //导入方法依赖的package包/类
@Override
public void repair(Project aProject, CAS aCas, List<LogMessage> aMessages)
{
    try {
        for (Sentence s : select(aCas.getJCas(), Sentence.class)) {
            if (s.getBegin() >= s.getEnd()) {
                s.removeFromIndexes();
                aMessages.add(new LogMessage(this, LogLevel.INFO,
                        "Removed sentence with illegal span: %s", s));
            }
        }

        for (Token t : select(aCas.getJCas(), Token.class)) {
            if (t.getBegin() >= t.getEnd()) {
                Lemma lemma = t.getLemma();
                if (lemma != null) {
                    lemma.removeFromIndexes();
                    aMessages.add(new LogMessage(this, LogLevel.INFO,
                            "Removed lemma attached to token with illegal span: %s", t));
                }

                POS pos = t.getPos();
                if (pos != null) {
                    pos.removeFromIndexes();
                    aMessages.add(new LogMessage(this, LogLevel.INFO,
                            "Removed POS attached to token with illegal span: %s", t));
                }

                Stem stem = t.getStem();
                if (stem != null) {
                    stem.removeFromIndexes();
                    aMessages.add(new LogMessage(this, LogLevel.INFO,
                            "Removed stem attached to token with illegal span: %s", t));
                }

                t.removeFromIndexes();
                aMessages.add(new LogMessage(this, LogLevel.INFO,
                        "Removed token with illegal span: %s", t));
            }
        }

    }
    catch (CASException e) {
        log.error("Unabled to access JCas", e);
        aMessages.add(
                new LogMessage(this, LogLevel.ERROR, "Unabled to access JCas", e.getMessage()));
    }
}
 
开发者ID:webanno,项目名称:webanno,代码行数:49,代码来源:RemoveZeroSizeTokensAndSentencesRepair.java


注:本文中的de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token.getPos方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。