当前位置: 首页>>代码示例>>Java>>正文


Java TaggedWord类代码示例

本文整理汇总了Java中edu.stanford.nlp.ling.TaggedWord的典型用法代码示例。如果您正苦于以下问题:Java TaggedWord类的具体用法?Java TaggedWord怎么用?Java TaggedWord使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


TaggedWord类属于edu.stanford.nlp.ling包,在下文中一共展示了TaggedWord类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: wordIsNONAdjective

import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
public boolean wordIsNONAdjective(String word) {

		if (word.endsWith("ic") || word.endsWith("ical"))
			return false;

		List<HasWord> sentence = MaxentTagger.tokenizeText(new StringReader(word)).get(0);

		TaggedWord taggedWord = tagger.tagSentence(sentence).get(0);
		// if (taggedWord.tag().equals("NN") || taggedWord.tag().equals("NNS")
		// || taggedWord.tag().equals("NNP")
		// || taggedWord.tag().equals("NNPS"))

		if (taggedWord.tag().equals("JJ"))
			return false;

		return true;
	}
 
开发者ID:ag-sc,项目名称:JLink,代码行数:18,代码来源:PosTagger.java

示例2: getIDFMapForDocument

import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
/**
 * Get an IDF map for the given document string.
 *
 * @param document
 * @return
 */
private static Counter<String> getIDFMapForDocument(String document) {
  // Clean up -- remove some Gigaword patterns that slow things down
  // / don't help anything
  document = headingSeparator.matcher(document).replaceAll("");

  DocumentPreprocessor preprocessor = new DocumentPreprocessor(new StringReader(document));
  preprocessor.setTokenizerFactory(tokenizerFactory);

  Counter<String> idfMap = new ClassicCounter<String>();
  for (List<HasWord> sentence : preprocessor) {
    if (sentence.size() > MAX_SENTENCE_LENGTH)
      continue;

    List<TaggedWord> tagged = tagger.tagSentence(sentence);

    for (TaggedWord w : tagged) {
      if (w.tag().startsWith("n"))
        idfMap.incrementCount(w.word());
    }
  }

  return idfMap;
}
 
开发者ID:asmehra95,项目名称:wiseowl,代码行数:30,代码来源:DocumentFrequencyCounter.java

示例3: parse

import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
private Vector<String> parse(ArrayList<TaggedWord> taggedWords) {
    Vector<ArrayList<TaggedWord>> sentences = findAppositives(taggedWords);
    Vector<String> patterns = new Vector<String>();
    for (Iterator<ArrayList<TaggedWord>> iterator = sentences.iterator(); iterator.hasNext(); ) {
        Vector<String> pattern = null;
        taggedWords = (ArrayList<TaggedWord>) iterator.next();
        IntegerMangi index = new IntegerMangi(0);
        while (index.get() < taggedWords.size()) {
            pattern = A(taggedWords, index);
            if (pattern == null)
                pattern = B(taggedWords, index);
            if (pattern == null)
                pattern = C(taggedWords, index);
            if (pattern != null)
                patterns.addAll(pattern);
            else
                index.incr();
        }
    }
    return patterns;
}
 
开发者ID:jatecs,项目名称:jatecs,代码行数:22,代码来源:Parser.java

示例4: C

import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
private Vector<String> C(ArrayList<TaggedWord> sentence, IntegerMangi index) {
    normalization_V = new int[2];
    normalization_NF = new int[2];
    normalization_ADJF = new Vector<int[]>();
    int startIndex = index.get();
    normalization_V[0] = startIndex;
    if (match(sentence, index, "V")) {
        normalization_V[1] = index.get();
        if (ATTF(sentence, index)) {
            normalization_NF[0] = index.get();
            if (NF(sentence, index)) {
                normalization_NF[1] = index.get();
                return TaggedWordsArrayToString(sentence, "C");
            }
        }
    }
    index.set(startIndex);
    return null;
}
 
开发者ID:jatecs,项目名称:jatecs,代码行数:20,代码来源:Parser.java

示例5: tag

import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
public Sentence tag(Sentence sent) {
	List<HasWord> ss = new ArrayList<HasWord>();
	for (Token t : sent) {
		HasWord hw = new Word();
		hw.setWord(t.toString());
		ss.add(hw);
	} 
	List<TaggedWord> sst = tagger.tagSentence(ss);
	for (tuple2<Integer,TaggedWord> item : x.enumerate(sst)) {
		Token tk = sent.get(item.key);
		tk.annotate("pos", item.value.tag());
		sent.setAt(item.key).value(tk);
	}
	  
	return sent;
}
 
开发者ID:WantedTechnologies,项目名称:xpresso,代码行数:17,代码来源:MaxentPosTagger.java

示例6: haveEquivalentPOSTags

import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
/**
 * Check if the given tokens have a comparable POS tag 
 **/
private Boolean haveEquivalentPOSTags(TaggedWord firstToken, TaggedWord secondToken){
	if(firstToken != null && secondToken != null && firstToken.tag() != null && secondToken.tag() != null){
		if(firstToken.tag().toUpperCase().startsWith("VB") && secondToken.tag().toUpperCase().startsWith("VB")){
			return true; 
		} else if(firstToken.tag().toUpperCase().equals("MD") && secondToken.tag().toUpperCase().equals("MD")){
			return true; 
		} else if(firstToken.tag().toUpperCase().startsWith("NN") && secondToken.tag().toUpperCase().startsWith("NN")){
			return true; 
		} else if(firstToken.tag().toUpperCase().startsWith("RB") && secondToken.tag().toUpperCase().startsWith("RB")){
			return true; 
		} else if(firstToken.tag().toUpperCase().equals("WRB") && secondToken.tag().toUpperCase().equals("WRB")){
			return true; 
		} else if(firstToken.tag().toUpperCase().startsWith("JJ") && secondToken.tag().toUpperCase().startsWith("JJ")){
			return true; 
		}
	}
	return false; 
}
 
开发者ID:MDEGroup,项目名称:EMFCompare-Semantic-Extension,代码行数:22,代码来源:SemanticDistanceEvaluator.java

示例7: haveEquivalentPOSTags

import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
private Boolean haveEquivalentPOSTags(TaggedWord firstToken, TaggedWord secondToken){
	if(firstToken != null && secondToken != null && firstToken.tag() != null && secondToken.tag() != null){
		if(firstToken.tag().toUpperCase().startsWith("VB") && secondToken.tag().toUpperCase().startsWith("VB")){
			return true; 
		} else if(firstToken.tag().toUpperCase().equals("MD") && secondToken.tag().toUpperCase().equals("MD")){
			return true; 
		} else if(firstToken.tag().toUpperCase().startsWith("NN") && secondToken.tag().toUpperCase().startsWith("NN")){
			return true; 
		} else if(firstToken.tag().toUpperCase().startsWith("RB") && secondToken.tag().toUpperCase().startsWith("RB")){
			return true; 
		} else if(firstToken.tag().toUpperCase().equals("WRB") && secondToken.tag().toUpperCase().equals("WRB")){
			return true; 
		} else if(firstToken.tag().toUpperCase().startsWith("JJ") && secondToken.tag().toUpperCase().startsWith("JJ")){
			return true; 
		}
	}
	return false; 
}
 
开发者ID:MDEGroup,项目名称:EMFCompare-Semantic-Extension,代码行数:19,代码来源:SemanticDistanceEvaluator.java

示例8: main

import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
/**
 * Main function
 * 
 * @param args
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
	// data input
	String text = "John loves Mary. She loves him too.";
	
	// model loading
	StanfordNlpWrapper nlp = new StanfordNlpWrapper(Env.STANFORDNLP_CFG);
	nlp.loadPosTagger();

	// task run
	for (List<HasWord> words : StanfordNlpWrapper.detect(text)) {
		ArrayList<String> strs = new ArrayList<String>();
		for (TaggedWord taggedWord : nlp.tag(words))
			strs.add(String.format("%s/%s", taggedWord.word(), taggedWord.tag()));
		System.out.println(String.join(" ", strs));
	}
}
 
开发者ID:hakchul77,项目名称:irnlp_toolkit,代码行数:23,代码来源:POSTagging.java

示例9: processSentence

import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
public List<WordLemmaTag> processSentence(String sentence, boolean isTokenized)
{
	final StanfordLemmatizer lemmatizer = StanfordLemmatizer.getInstance();
	final StanfordPOSTagger tagger = StanfordPOSTagger.getInstance();
   	final List<WordLemmaTag> tlSentence = new ArrayList<WordLemmaTag>();
	
   	// the tagged sentence
   	List<TaggedWord> tSentence = null;
   	if (isTokenized) tSentence = tagger.tag(sentence);
   	else
   	{
   		StanfordTokenizer tokenizer = StanfordTokenizer.getInstance();
   		List<Word> tokens = tokenizer.tokenize(sentence);
   		tSentence = tagger.tag(tokens);
   	}
   	
   	// add to the lemmatized sentence
   	for (TaggedWord tw : tSentence) 
   		tlSentence.add(lemmatizer.lemmatize(tw));

   	return tlSentence;
}
 
开发者ID:pschuette22,项目名称:Zeppa-AppEngine,代码行数:23,代码来源:SentenceProcessor.java

示例10: generateFeatureVectorFromTaggedWords

import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
@Override
public Map<Integer, Double> generateFeatureVectorFromTaggedWords(
    List<TaggedWord> tweet) {
  if (!m_useTaggedWords) {
    throw new RuntimeException(
        "Use TaggedWords was set to false! generateFeatureVectorFromTaggedWords is not applicable!");
  }

  Map<Integer, Double> featureVector = m_sentimentFeatureVectorGenerator
      .generateFeatureVectorFromTaggedWords(tweet);

  featureVector.putAll(m_POSFeatureVectorGenerator
      .generateFeatureVectorFromTaggedWords(tweet));

  featureVector.putAll(m_tfidfFeatureVectorGenerator
      .generateFeatureVectorFromTaggedWords(tweet));

  return featureVector;
}
 
开发者ID:millecker,项目名称:storm-apps,代码行数:20,代码来源:CombinedFeatureVectorGenerator.java

示例11: pretagToken

import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
private TaggedWord pretagToken(String token, boolean tokenIsHashTag,
    boolean tokenIsUser, boolean tokenIsURL) {
  TaggedWord preTaggedToken = new TaggedWord(token);
  if (tokenIsHashTag) {
    preTaggedToken.setTag("HT");
  } else if (tokenIsUser) {
    preTaggedToken.setTag("USR");
  } else if (tokenIsURL) {
    preTaggedToken.setTag("URL");
  } else if (StringUtils.isRetweet(token)) {
    preTaggedToken.setTag("RT");
  } else if (m_nameEntities.isNameEntity(token)) {
    if (LOGGING) {
      LOG.info("NameEntity labelled for " + token);
    }
    preTaggedToken.setTag("NNP");
  } else if ((m_interjections.isInterjection(token))
      || (StringUtils.isEmoticon(token))) {
    if (LOGGING) {
      LOG.info("Interjection or Emoticon labelled for " + token);
    }
    preTaggedToken.setTag("UH");
  }
  return preTaggedToken;
}
 
开发者ID:millecker,项目名称:storm-apps,代码行数:26,代码来源:Preprocessor.java

示例12: createFromTaggedWords

import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
public static TweetTfIdf createFromTaggedWords(List<List<TaggedWord>> tweets,
    TfType type, TfIdfNormalization normalization, boolean usePOSTags) {

  TweetTfIdf tweetTfIdf = new TweetTfIdf(type, normalization, usePOSTags);

  tweetTfIdf.m_termFreqs = tfTaggedWordTweets(tweets, type, usePOSTags);
  tweetTfIdf.m_inverseDocFreq = idf(tweetTfIdf.m_termFreqs);

  tweetTfIdf.m_termIds = new HashMap<String, Integer>();
  int i = 0;
  for (String key : tweetTfIdf.m_inverseDocFreq.keySet()) {
    tweetTfIdf.m_termIds.put(key, i);
    i++;
  }

  LOG.info("Found " + tweetTfIdf.m_inverseDocFreq.size() + " terms");
  // Debug
  // print("Term Frequency", m_termFreqs, m_inverseDocFreq);
  // print("Inverse Document Frequency", m_inverseDocFreq);
  return tweetTfIdf;
}
 
开发者ID:millecker,项目名称:storm-apps,代码行数:22,代码来源:TweetTfIdf.java

示例13: rawParse

import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
public ConcurrentDependencyGraph rawParse(List<TaggedWord> sentence)
        throws IOException,
        MaltChainedException {
    String[] conll = new String[sentence.size()];
    for (int i = 0; i < sentence.size(); i++) {
        TaggedWord taggedWord = sentence.get(i);
        String word = taggedWord.word();
        String Lemma = "_";
        if (this.lemmatizer != null)
            Lemma = this.lemmatizer.lemmatize(word);
        String pos = taggedWord.tag();

        conll[i] = String.format("%s\t%s\t%s\t%s\t%s\t%s",
                i + 1, word, Lemma, pos, pos, "_");
    }
    return parse(conll);
}
 
开发者ID:mojtaba-khallash,项目名称:JHazm,代码行数:18,代码来源:DependencyParser.java

示例14: joinVerbParts

import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
/**
 * Join verb parts like Dadedgan corpus.
 * Input:
 *     دیده/ADJ_INO
*     شد/V_PA
 * Iutput:
 *     دیده شد/V_PA
 */
public static List<TaggedWord> joinVerbParts(List<TaggedWord> sentence) {
    Collections.reverse(sentence);
    List<TaggedWord> result = new ArrayList<>();
    TaggedWord beforeTaggedWord = new TaggedWord("", "");
    for (TaggedWord taggedWord : sentence) {
        if (PeykareReader.tokenizer.getBeforeVerbs().contains(taggedWord.word()) ||
                (PeykareReader.tokenizer.getAfterVerbs().contains(beforeTaggedWord.word()) &&
                        PeykareReader.tokenizer.getVerbs().contains(taggedWord.word()))) {
            beforeTaggedWord.setWord(taggedWord.word() + " " + beforeTaggedWord.word());
            if (result.isEmpty())
                result.add(beforeTaggedWord);
        }
        else {
            result.add(taggedWord);
            beforeTaggedWord = taggedWord;
        }
    }

    Collections.reverse(result);
    return result;
}
 
开发者ID:mojtaba-khallash,项目名称:JHazm,代码行数:30,代码来源:PeykareReader.java

示例15: posMapTest

import edu.stanford.nlp.ling.TaggedWord; //导入依赖的package包/类
@Test
public void posMapTest() throws IOException {
    BijankhanReader reader = new BijankhanReader(false);

    List<TaggedWord> expected = new ArrayList<>();
    expected.add(new TaggedWord("اولین", "ADJ"));
    expected.add(new TaggedWord("سیاره", "N"));
    expected.add(new TaggedWord("خارج", "ADJ"));
    expected.add(new TaggedWord("از", "PREP"));
    expected.add(new TaggedWord("منظومه", "N"));
    expected.add(new TaggedWord("شمسی", "ADJ"));
    expected.add(new TaggedWord("دیده", "ADJ"));
    expected.add(new TaggedWord("شد", "V"));
    expected.add(new TaggedWord(".", "PUNC"));
    Iterator<List<TaggedWord>> iter = reader.getSentences().iterator();
    List<TaggedWord> actual = iter.next();

    assertEquals("Failed to map pos of sentence", expected.size(), actual.size());
    for (int i = 0; i < expected.size(); i++) {
        TaggedWord actualTaggedWord = actual.get(i);
        TaggedWord expectedTaggedWord = expected.get(i);
        if (!actualTaggedWord.tag().equals(expectedTaggedWord.tag()))
            assertEquals("Failed to map pos of sentence", expectedTaggedWord, actualTaggedWord);
    }
}
 
开发者ID:mojtaba-khallash,项目名称:JHazm,代码行数:26,代码来源:BijankhanReaderTest.java


注:本文中的edu.stanford.nlp.ling.TaggedWord类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。