当前位置: 首页>>代码示例>>Java>>正文


Java TaggedWord.tag方法代码示例

本文整理汇总了Java中edu.stanford.nlp.ling.TaggedWord.tag方法的典型用法代码示例。如果您正苦于以下问题:Java TaggedWord.tag方法的具体用法?Java TaggedWord.tag怎么用?Java TaggedWord.tag使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在edu.stanford.nlp.ling.TaggedWord的用法示例。


在下文中一共展示了TaggedWord.tag方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: haveEquivalentPOSTags

import edu.stanford.nlp.ling.TaggedWord; //导入方法依赖的package包/类
/**
 * Check if the given tokens have a comparable POS tag 
 **/
private Boolean haveEquivalentPOSTags(TaggedWord firstToken, TaggedWord secondToken){
	if(firstToken != null && secondToken != null && firstToken.tag() != null && secondToken.tag() != null){
		if(firstToken.tag().toUpperCase().startsWith("VB") && secondToken.tag().toUpperCase().startsWith("VB")){
			return true; 
		} else if(firstToken.tag().toUpperCase().equals("MD") && secondToken.tag().toUpperCase().equals("MD")){
			return true; 
		} else if(firstToken.tag().toUpperCase().startsWith("NN") && secondToken.tag().toUpperCase().startsWith("NN")){
			return true; 
		} else if(firstToken.tag().toUpperCase().startsWith("RB") && secondToken.tag().toUpperCase().startsWith("RB")){
			return true; 
		} else if(firstToken.tag().toUpperCase().equals("WRB") && secondToken.tag().toUpperCase().equals("WRB")){
			return true; 
		} else if(firstToken.tag().toUpperCase().startsWith("JJ") && secondToken.tag().toUpperCase().startsWith("JJ")){
			return true; 
		}
	}
	return false; 
}
 
开发者ID:MDEGroup,项目名称:EMFCompare-Semantic-Extension,代码行数:22,代码来源:SemanticDistanceEvaluator.java

示例2: haveEquivalentPOSTags

import edu.stanford.nlp.ling.TaggedWord; //导入方法依赖的package包/类
private Boolean haveEquivalentPOSTags(TaggedWord firstToken, TaggedWord secondToken){
	if(firstToken != null && secondToken != null && firstToken.tag() != null && secondToken.tag() != null){
		if(firstToken.tag().toUpperCase().startsWith("VB") && secondToken.tag().toUpperCase().startsWith("VB")){
			return true; 
		} else if(firstToken.tag().toUpperCase().equals("MD") && secondToken.tag().toUpperCase().equals("MD")){
			return true; 
		} else if(firstToken.tag().toUpperCase().startsWith("NN") && secondToken.tag().toUpperCase().startsWith("NN")){
			return true; 
		} else if(firstToken.tag().toUpperCase().startsWith("RB") && secondToken.tag().toUpperCase().startsWith("RB")){
			return true; 
		} else if(firstToken.tag().toUpperCase().equals("WRB") && secondToken.tag().toUpperCase().equals("WRB")){
			return true; 
		} else if(firstToken.tag().toUpperCase().startsWith("JJ") && secondToken.tag().toUpperCase().startsWith("JJ")){
			return true; 
		}
	}
	return false; 
}
 
开发者ID:MDEGroup,项目名称:EMFCompare-Semantic-Extension,代码行数:19,代码来源:SemanticDistanceEvaluator.java

示例3: rawParse

import edu.stanford.nlp.ling.TaggedWord; //导入方法依赖的package包/类
public ConcurrentDependencyGraph rawParse(List<TaggedWord> sentence)
        throws IOException,
        MaltChainedException {
    String[] conll = new String[sentence.size()];
    for (int i = 0; i < sentence.size(); i++) {
        TaggedWord taggedWord = sentence.get(i);
        String word = taggedWord.word();
        String Lemma = "_";
        if (this.lemmatizer != null)
            Lemma = this.lemmatizer.lemmatize(word);
        String pos = taggedWord.tag();

        conll[i] = String.format("%s\t%s\t%s\t%s\t%s\t%s",
                i + 1, word, Lemma, pos, pos, "_");
    }
    return parse(conll);
}
 
开发者ID:mojtaba-khallash,项目名称:JHazm,代码行数:18,代码来源:DependencyParser.java

示例4: makeObjects

import edu.stanford.nlp.ling.TaggedWord; //导入方法依赖的package包/类
@Override
protected
Set<?> makeObjects(Tree tree) {
  List<TaggedWord> twList;
  if (useTag) {
    twList = myExtractor(tree);
  } else {
    twList = tree.taggedYield();
  }
  Set<Pair<Integer,WordTag>> set = new HashSet<Pair<Integer,WordTag>>();
  for (int i = 0, sz = twList.size(); i < sz; i++) {
    TaggedWord tw = twList.get(i);
    //IntTaggedWord iTW = new IntTaggedWord(Numberer.number("words",tw.word()), Numberer.number("tags",tw.tag()));
    Pair<Integer,WordTag> positionWT = new Pair<Integer,WordTag>(Integer.valueOf(i), new WordTag(tw.value(), tw.tag()));
    //WordTag positionWT = new WordTag(tw.value(),tw.tag());
    //System.out.println(iTW);
    //if (! tw.tag.equals("*"))
    set.add(positionWT);
  }
  if (DEBUG_MORE) System.err.println("Tags: " + set);
  return set;
}
 
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:23,代码来源:TaggingEval.java

示例5: train

import edu.stanford.nlp.ling.TaggedWord; //导入方法依赖的package包/类
/**
 * Trains this lexicon on the Collection of trees.
 */
public void train(TaggedWord tw, int loc, double weight) {
  IntTaggedWord iTW = 
    new IntTaggedWord(tw.word(), tw.tag(), wordIndex, tagIndex);
  IntTaggedWord iT = new IntTaggedWord(nullWord, iTW.tag);
  IntTaggedWord iW = new IntTaggedWord(iTW.word, nullTag);
  seenCounter.incrementCount(iW, weight);
  IntTaggedWord i = NULL_ITW;
    
  if (treesRead > indexToStartUnkCounting) {
    // start doing this once some way through trees; 
    // treesRead is 1 based counting
    if (seenCounter.getCount(iW) < 2) {
      // it's an entirely unknown word
      int s = model.getSignatureIndex(iTW.word, loc, 
                                      wordIndex.get(iTW.word));
      IntTaggedWord iTS = new IntTaggedWord(s, iTW.tag);
      IntTaggedWord iS = new IntTaggedWord(s, nullTag);
      unSeenCounter.incrementCount(iTS, weight);
      unSeenCounter.incrementCount(iT, weight);
      unSeenCounter.incrementCount(iS, weight);
      unSeenCounter.incrementCount(i, weight);
    }
  }
}
 
开发者ID:chbrown,项目名称:stanford-parser,代码行数:28,代码来源:FrenchUnknownWordModelTrainer.java

示例6: process

import edu.stanford.nlp.ling.TaggedWord; //导入方法依赖的package包/类
@Override
public TaggerResult process(Integer etextNo, Reader text) {
  final DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(text);
  documentPreprocessor.setTokenizerFactory(tokenizerFactory);

  int words = 0;
  final Map<String,Double> tagCounts = new TreeMap<String,Double>();
  final Map<String,Map<String,Integer>> wordBags = new HashMap<>();
  for (List<HasWord> sentence : documentPreprocessor) {
    for (TaggedWord word : tagger.tagSentence(sentence)) {
      // word count
      words++;

      // tag counts
      final String tag = word.tag();
      tagCounts.put(tag, tagCounts.getOrDefault(tag, 0.0) + 1.0);

      // noun/verb word bags
      if ("NN".equals(tag) || "NNS".equals(tag) /* || tag.startsWith("VB") */) {
        // get base form of word
        String lemma = morphology.stem(word).toString();
        if (lemma == null) {
          lemma = word.toString();
        }
        // get bag for words of this POS
        Map<String,Integer> wordBag = wordBags.get(tag);
        if (wordBag == null) {
          wordBag = new HashMap<>();
          wordBags.put(tag, wordBag);
        }
        // increment count
        wordBag.put(lemma, wordBag.getOrDefault(lemma, 0) + 1);
      }
    }
  }
  System.err.println("Processed: " + etextNo + " " + words + " words");
  return new TaggerResult(etextNo, tagCounts, wordBags, words);
}
 
开发者ID:tmmcguire,项目名称:ashurbanipal,代码行数:39,代码来源:EnglishTagger.java

示例7: haveComparablePOSTags

import edu.stanford.nlp.ling.TaggedWord; //导入方法依赖的package包/类
/**
 * Check if the given tokens allow the execution of the Lin Algorithm (i.e. either both nouns or both verbs) 
 **/
private Boolean haveComparablePOSTags(TaggedWord firstToken, TaggedWord secondToken){
	if(firstToken != null && secondToken != null && firstToken.tag() != null && secondToken.tag() != null){
		if(firstToken.tag().toUpperCase().startsWith("VB") && secondToken.tag().toUpperCase().startsWith("VB")){
			return true; 
		} else if(firstToken.tag().toUpperCase().equals("MD") && secondToken.tag().toUpperCase().equals("MD")){
			return true; 
		} else if(firstToken.tag().toUpperCase().startsWith("NN") && secondToken.tag().toUpperCase().startsWith("NN")){
			return true; 
		}
	}
	return false;
}
 
开发者ID:MDEGroup,项目名称:EMFCompare-Semantic-Extension,代码行数:16,代码来源:SemanticDistanceEvaluator.java

示例8: haveNounPOSTags

import edu.stanford.nlp.ling.TaggedWord; //导入方法依赖的package包/类
/**
 * Check if the given tokens have a noun tag 
 **/
private Boolean haveNounPOSTags(TaggedWord firstToken, TaggedWord secondToken){
	if(firstToken != null && secondToken != null && firstToken.tag() != null && secondToken.tag() != null){
		if(firstToken.tag().toUpperCase().startsWith("NN") && secondToken.tag().toUpperCase().startsWith("NN")){
			return true; 
		}
	}
	return false;
}
 
开发者ID:MDEGroup,项目名称:EMFCompare-Semantic-Extension,代码行数:12,代码来源:SemanticDistanceEvaluator.java

示例9: haveComparablePOSTags

import edu.stanford.nlp.ling.TaggedWord; //导入方法依赖的package包/类
private Boolean haveComparablePOSTags(TaggedWord firstToken, TaggedWord secondToken){
	if(firstToken != null && secondToken != null && firstToken.tag() != null && secondToken.tag() != null){
		if(firstToken.tag().toUpperCase().startsWith("VB") && secondToken.tag().toUpperCase().startsWith("VB")){
			return true; 
		} else if(firstToken.tag().toUpperCase().equals("MD") && secondToken.tag().toUpperCase().equals("MD")){
			return true; 
		} else if(firstToken.tag().toUpperCase().startsWith("NN") && secondToken.tag().toUpperCase().startsWith("NN")){
			return true; 
		}
	}
	return false;
}
 
开发者ID:MDEGroup,项目名称:EMFCompare-Semantic-Extension,代码行数:13,代码来源:SemanticDistanceEvaluator.java

示例10: haveNounPOSTags

import edu.stanford.nlp.ling.TaggedWord; //导入方法依赖的package包/类
private Boolean haveNounPOSTags(TaggedWord firstToken, TaggedWord secondToken){
	if(firstToken != null && secondToken != null && firstToken.tag() != null && secondToken.tag() != null){
		if(firstToken.tag().toUpperCase().startsWith("NN") && secondToken.tag().toUpperCase().startsWith("NN")){
			return true; 
		}
	}
	return false;
}
 
开发者ID:MDEGroup,项目名称:EMFCompare-Semantic-Extension,代码行数:9,代码来源:SemanticDistanceEvaluator.java

示例11: countPOSTagsFromTaggedWords

import edu.stanford.nlp.ling.TaggedWord; //导入方法依赖的package包/类
private double[] countPOSTagsFromTaggedWords(List<TaggedWord> taggedWords,
    boolean normalize) {
  // 7 = [NOUN, VERB, ADJECTIVE, ADVERB, INTERJECTION, PUNCTUATION, HASHTAG]
  double[] posTags = new double[] { 0d, 0d, 0d, 0d, 0d, 0d, 0d };
  int wordCount = 0;
  for (TaggedWord word : taggedWords) {
    wordCount++;
    String pennTag = word.tag();
    if (pennTag.startsWith("NN")) {
      posTags[0]++;
    } else if (pennTag.startsWith("VB")) {
      posTags[1]++;
    } else if (pennTag.startsWith("JJ")) {
      posTags[2]++;
    } else if (pennTag.startsWith("RB")) {
      posTags[3]++;
    } else if (pennTag.startsWith("UH")) {
      posTags[4]++;
    } else if ((pennTag.equals(".")) || (pennTag.equals(":"))) {
      posTags[5]++;
    } else if (pennTag.startsWith("HT")) {
      posTags[6]++;
    }
  }
  if (normalize) {
    for (int i = 0; i < posTags.length; i++) {
      posTags[i] /= wordCount;
    }
  }
  return posTags;
}
 
开发者ID:millecker,项目名称:storm-apps,代码行数:32,代码来源:POSFeatureVectorGenerator.java

示例12: getNonStemmedWordTagsFromTree

import edu.stanford.nlp.ling.TaggedWord; //导入方法依赖的package包/类
private static List<WordTag> getNonStemmedWordTagsFromTree(Tree t) {
  List<WordTag> wordTags = Generics.newArrayList();
  Sentence<TaggedWord> s = t.taggedYield();
  for (TaggedWord w : s) {
    WordTag wt = new WordTag(w.word(), w.tag());
    wordTags.add(wt);
  }
  return wordTags;
}
 
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:10,代码来源:CollocationFinder.java

示例13: listToEvents

import edu.stanford.nlp.ling.TaggedWord; //导入方法依赖的package包/类
protected List<IntTaggedWord> listToEvents(List<TaggedWord> taggedWords) {
  List<IntTaggedWord> itwList = new ArrayList<IntTaggedWord>();
  for (TaggedWord tw : taggedWords) {
    IntTaggedWord iTW = new IntTaggedWord(tw.word(), tw.tag(), wordIndex, tagIndex);
    itwList.add(iTW);
  }
  return itwList;
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:9,代码来源:BaseLexicon.java

示例14: getNonStemmedWordTagsFromTree

import edu.stanford.nlp.ling.TaggedWord; //导入方法依赖的package包/类
private static List<WordTag> getNonStemmedWordTagsFromTree(Tree t) {
  List<WordTag> wordTags = Generics.newArrayList();
  ArrayList<TaggedWord> s = t.taggedYield();
  for (TaggedWord w : s) {
    WordTag wt = new WordTag(w.word(), w.tag());
    wordTags.add(wt);
  }
  return wordTags;
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:10,代码来源:CollocationFinder.java

示例15: train

import edu.stanford.nlp.ling.TaggedWord; //导入方法依赖的package包/类
/**
 * Trains this UWM on the Collection of trees.
 */
public void train(TaggedWord tw, int loc, double weight) {
  IntTaggedWord iTW = 
    new IntTaggedWord(tw.word(), tw.tag(), wordIndex, tagIndex);
  IntTaggedWord iT = new IntTaggedWord(nullWord, iTW.tag);
  IntTaggedWord iW = new IntTaggedWord(iTW.word, nullTag);
  seenCounter.incrementCount(iW, weight);
  IntTaggedWord i = NULL_ITW;
  
  if (treesRead > indexToStartUnkCounting) {
    // start doing this once some way through trees; 
    // treesRead is 1 based counting
    if (seenCounter.getCount(iW) < 1.5) {
      // it's an entirely unknown word
      int s = model.getSignatureIndex(iTW.word, loc, 
                                      wordIndex.get(iTW.word));
      if (DOCUMENT_UNKNOWNS) {
        String wStr = wordIndex.get(iTW.word);
        String tStr = tagIndex.get(iTW.tag);
        String sStr = wordIndex.get(s);
        EncodingPrintWriter.err.println("Unknown word/tag/sig:\t" +
                                        wStr + '\t' + tStr + '\t' + 
                                        sStr, "UTF-8");
      }
      IntTaggedWord iTS = new IntTaggedWord(s, iTW.tag);
      IntTaggedWord iS = new IntTaggedWord(s, nullTag);
      unSeenCounter.incrementCount(iTS, weight);
      unSeenCounter.incrementCount(iT, weight);
      unSeenCounter.incrementCount(iS, weight);
      unSeenCounter.incrementCount(i, weight);
      // rules.add(iTS);
      // sigs.add(iS);
    } // else {
      // if (seenCounter.getCount(iTW) < 2) {
      // it's a new tag for a known word
      // do nothing for now
      // }
      // }
  }
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:43,代码来源:EnglishUnknownWordModelTrainer.java


注:本文中的edu.stanford.nlp.ling.TaggedWord.tag方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。