当前位置: 首页>>代码示例>>Java>>正文


Java NamedEntityTagAnnotation类代码示例

本文整理汇总了Java中edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation的典型用法代码示例。如果您正苦于以下问题:Java NamedEntityTagAnnotation类的具体用法?Java NamedEntityTagAnnotation怎么用?Java NamedEntityTagAnnotation使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


NamedEntityTagAnnotation类属于edu.stanford.nlp.ling.CoreAnnotations包,在下文中一共展示了NamedEntityTagAnnotation类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: needsReannotation

import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation; //导入依赖的package包/类
/**
 * Checks for the presence of some critical annotations. In the case some of those entered among the
 * parameters is missing, the texts needs to be re-annotated.
 * 
 * @param a
 *          annotation
 * @param r
 *          reader with the desired annotations
 * @return
 */
private boolean needsReannotation(Annotation a, KpeReader r) {
  List<CoreMap> sentences = a.get(SentencesAnnotation.class);
  List<CoreLabel> tokens = a.get(TokensAnnotation.class);
  if (tokens == null || sentences == null || tokens.size() == 0 || sentences.size() == 0) {
    return true;
  }
  Set<Class<?>> sentenceAnnotations = sentences.get(0).keySet();
  Set<Class<?>> tokenAnnotations = tokens.get(0).keySet();
  if ((r.getIsMweOn() && !tokenAnnotations.contains(MWEAnnotation.class))
      || (r.getIsNeOn() && !tokenAnnotations.contains(NamedEntityTagAnnotation.class))) {
    return true;
  }
  if (r.getIsSyntaxOn() && !sentenceAnnotations.contains(TreeAnnotation.class)) {
    return true;
  }
  return false;
}
 
开发者ID:begab,项目名称:kpe,代码行数:28,代码来源:DocumentData.java

示例2: en

import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation; //导入依赖的package包/类
public static void en(final String[] a) {

    /*
     * for (Entity e : new NERStanford().retrieve(FoxConst.EXAMPLE_1)) NERStanford.LOG.info(e);
     */

    final Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref, relation");
    final StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    final Annotation ann =
        new Annotation("Stanford University is located in California. It is a great university.");
    pipeline.annotate(ann);

    for (final CoreMap sentence : ann.get(SentencesAnnotation.class)) {
      for (final CoreLabel token : sentence.get(TokensAnnotation.class)) {
        System.out.println(token.get(NamedEntityTagAnnotation.class));
        System.out.println(token.get(CoreAnnotations.AnswerAnnotation.class));
        /*
         * Tree tree = sentence.get(TreeAnnotation.class); System.out.println(tree);
         * System.out.println(tree.score());
         */
      }
    }

  }
 
开发者ID:dice-group,项目名称:FOX,代码行数:26,代码来源:StanfordENOldVersion.java

示例3: PreNERCoreLabelWrapper

import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation; //导入依赖的package包/类
/**
 *
 */
public PreNERCoreLabelWrapper(final CoreLabel cl) {
  this.orig = new TokenizedCoreLabelWrapper(cl);

  this.posTag = Optional.ofNullable(cl.get(PartOfSpeechAnnotation.class));
  this.nerTag = Optional.ofNullable(cl.get(NamedEntityTagAnnotation.class));
  this.lemmaTag = Optional.ofNullable(cl.get(LemmaAnnotation.class));
}
 
开发者ID:hltcoe,项目名称:concrete-stanford-deprecated2,代码行数:11,代码来源:PreNERCoreLabelWrapper.java

示例4: annotateText

import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation; //导入依赖的package包/类
public Multimap<String, String> annotateText(String text) {
	loadResource();
	Multimap<String, String> taggerTokens = ArrayListMultimap.create();
	Annotation document = new Annotation(text);
	pipeline.annotate(document);

	for (CoreLabel token : document.get(TokensAnnotation.class)) {
		String ne = token.get(NamedEntityTagAnnotation.class);
		String word = token.get(TextAnnotation.class);
		taggerTokens.put(ne, word);
	}
	return taggerTokens;
}
 
开发者ID:TekstoSense,项目名称:word-root-finder,代码行数:14,代码来源:StanfordEntityTagger.java

示例5: tagTokens

import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation; //导入依赖的package包/类
public List<String> tagTokens(String text) {

		List<String> tagged = new ArrayList<String>();

		Annotation document = runPipeline(text);

		// these are all the sentences in this document
		// a CoreMap is essentially a Map that uses class objects as keys
		// and has values with custom types
		List<CoreMap> sentences = document.get(SentencesAnnotation.class);

		for (CoreMap sentence : sentences) {
			// traversing the words in the current sentence
			// a CoreLabel is a CoreMap with additional token-specific methods
			for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
				// this is the text of the token
				String word = token.get(TextAnnotation.class);
				// this is the POS tag of the token
				String pos = token.get(PartOfSpeechAnnotation.class);
				// this is the NER label of the token
				String ne = token.get(NamedEntityTagAnnotation.class);
				// this is the lemma of the token
				String lemma = token.get(LemmaAnnotation.class);
				// this is the sentence index
				int sentId = token.get(SentenceIndexAnnotation.class);

				tagged.add(word + "/" + pos + "/" + ne + "/" + lemma + "/" + sentId);
			}

		}

		return tagged;
	}
 
开发者ID:sunil3590,项目名称:artificial-guy,代码行数:34,代码来源:NLP.java

示例6: getNamedEntityMentions

import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation; //导入依赖的package包/类
public static List<Pair<Integer, Integer>> getNamedEntityMentions(
		CoreMap sentence) {
	List<Pair<Integer, Integer>> offsets = new ArrayList<Pair<Integer, Integer>>();
	String prevTag = "O";
	int tid = 0;
	int start = -1;
	for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
		String tag = token.get(NamedEntityTagAnnotation.class);
		if (!validTags.contains(tag)) {
			tag = "O";
		}
		if (tag.equals(prevTag)) {

		} else {
			if (tag.equals("O")) {
				offsets.add(Pair.makePair(start, tid));
				start = -1;
			} else {
				if (prevTag.equals("O")) {
					start = tid;
				} else {
					offsets.add(Pair.makePair(start, tid));
					start = tid;
				}
			}
		}
		prevTag = tag;
		tid++;
	}
	if (!prevTag.equals("O")) {
		offsets.add(Pair.makePair(start, tid));
	}
	return offsets;
}
 
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:35,代码来源:FigerSystem.java

示例7: getAllNamedEntityMentions

import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation; //导入依赖的package包/类
public static List<Pair<Integer, Integer>> getAllNamedEntityMentions(CoreMap sentence) {
	List<Pair<Integer, Integer>> offsets = new ArrayList<Pair<Integer, Integer>>();
	String prevTag = "O";
	int tid = 0;
	int start = -1;
	for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
		String tag = token.get(NamedEntityTagAnnotation.class);
		// if (!validTags.contains(tag)) {
		// tag = "O";
		// }
		if (tag.equals(prevTag)) {

		} else {
			if (tag.equals("O")) {
				offsets.add(Pair.makePair(start, tid));
				start = -1;
			} else {
				if (prevTag.equals("O")) {
					start = tid;
				} else {
					offsets.add(Pair.makePair(start, tid));
					start = tid;
				}
			}
		}
		prevTag = tag;
		tid++;
	}
	if (!prevTag.equals("O")) {
		offsets.add(Pair.makePair(start, tid));
	}
	return offsets;
}
 
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:34,代码来源:ParseStanfordFigerReverb.java

示例8: getNamedEntityMentions

import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation; //导入依赖的package包/类
public static List<Pair<Integer, Integer>> getNamedEntityMentions(CoreMap sentence) {
	List<Pair<Integer, Integer>> offsets = new ArrayList<Pair<Integer, Integer>>();
	String prevTag = "O";
	int tid = 0;
	int start = -1;
	for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
		String tag = token.get(NamedEntityTagAnnotation.class);
		if (!validTags.contains(tag)) {
			tag = "O";
		}
		if (tag.equals(prevTag)) {

		} else {
			if (tag.equals("O")) {
				offsets.add(Pair.makePair(start, tid));
				start = -1;
			} else {
				if (prevTag.equals("O")) {
					start = tid;
				} else {
					offsets.add(Pair.makePair(start, tid));
					start = tid;
				}
			}
		}
		prevTag = tag;
		tid++;
	}
	if (!prevTag.equals("O")) {
		offsets.add(Pair.makePair(start, tid));
	}
	return offsets;
}
 
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:34,代码来源:ParseStanfordFigerReverb.java

示例9: recognizeNE

import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation; //导入依赖的package包/类
public static List<String> recognizeNE(String tweet) throws IOException {
	// Find the NE in tweets using Stanford CRF tagger.
	tweet = tweet.replaceAll("@", "").replaceAll("#", "");

	// create an empty Annotation just with the given text
	final Annotation doc = new Annotation(tweet);
	pipeline.annotate(doc);

	// these are all the sentences in this document
	// a CoreMap is essentially a Map that uses class objects as keys and
	// has values with custom types
	final List<CoreMap> sentences = doc.get(SentencesAnnotation.class);
	final List<String> neList=new ArrayList<String>();

	for (CoreMap sentence : sentences) {
		// traversing the words in the current sentence
		// a CoreLabel is a CoreMap with additional token-specific methods
		for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
			// this is the text of the token
			String word = token.get(TextAnnotation.class);
			// this is the NER label of the token
			String ne = token.get(NamedEntityTagAnnotation.class);
			if(ne.equals("PERSON") || ne.equals("ORGANIZATION") || ne.equals("LOCATION"))
				neList.add(word);
		}
	}

	return neList;
}
 
开发者ID:ganeshjawahar,项目名称:ire-seimp,代码行数:30,代码来源:StanfordCrf.java

示例10: DoAll

import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation; //导入依赖的package包/类
public void DoAll(String data, String[] TokenizedData, String[] POSTags, String[] LEMMA, Map<String,String> parentEdge,
    Map<String,ArrayList<String>> childrenEdge)
{
  //if(document == null)
  {
    document = new Annotation(data);
      pipelineTags.annotate(document);
  }
    int i=0;
  List<CoreLabel> tokens = document.get(TokensAnnotation.class);

  for (CoreLabel token : tokens) {
    
          String wPOS = token.get(PartOfSpeechAnnotation.class);
          String wNER = token.get(NamedEntityTagAnnotation.class);
          String wLEMMA = token.get(LemmaAnnotation.class)  ;
          TokenizedData[i]= token.toString();
          POSTags[i]= wPOS;
          LEMMA[i]= wLEMMA;
      i++;
  }
  List<CoreMap> sentences = document.get(SentencesAnnotation.class);
  if(sentences.size()>0){
  SemanticGraph tree = sentences.get(0).get(BasicDependenciesAnnotation.class);
  //System.out.println(data+"\n"+tree.toString()+"\n");
  createEdgeMap(tree,parentEdge,childrenEdge);
  }
      
}
 
开发者ID:weizh,项目名称:geolocator-3.0,代码行数:30,代码来源:StanfordNLP.java

示例11: NERTagger

import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation; //导入依赖的package包/类
public String[] NERTagger(String data)
{
  String[] NERTags = null;
  int i=0;
  List<CoreLabel> tokens = Tokenizer( data);
  for (CoreLabel token : tokens) {
          String word = token.get(NamedEntityTagAnnotation.class);
          NERTags[i]= word;
          i++;
  }
         
  return NERTags;
      
}
 
开发者ID:weizh,项目名称:geolocator-3.0,代码行数:15,代码来源:StanfordNLP.java

示例12: annotate

import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation; //导入依赖的package包/类
@Override
public void annotate(Annotation document) {
	if(document.has(TokensAnnotation.class)){
		String[] ner = new String[3];
		List<CoreLabel> tokens = document.get(TokensAnnotation.class);
		CoreLabel prev = null;
		for(CoreLabel token : tokens){
			ner[0] = ner[1];
			ner[1] = ner[2];
			ner[2] = token.get(NamedEntityTagAnnotation.class);				
			
			if(ner[1] != null && !ner[1].equals("O") && ner[2] != null && !ner[2].equals("O")){
				// Two named entities in a row
			}else if(ner[0] != null && !ner[0].equals("O") && ner[2] != null && !ner[2].equals("O") && prev.get(TextAnnotation.class).equals(",")){
				//Named entity comma named entity
				String textRep = ner[0] + "," + ner[2];
				//System.out.println(textRep);
				if(nerPairs.containsKey(textRep)){
					nerPairs.put(textRep, nerPairs.get(textRep)+1);
				}else{
					nerPairs.put(textRep, 1);
				}
				//System.out.println(nerPairs);
				if (ner[0].equals("LOCATION") && ner[2].equals("LOCATION")) {
					prev.set(TextAnnotation.class, "and");
					prev.set(ValueAnnotation.class, "and");
					prev.set(PartOfSpeechAnnotation.class, "CC");
					prev.set(LemmaAnnotation.class, "and");
				}
			}
			
			prev = token;
		}
	}
}
 
开发者ID:U-Alberta,项目名称:exemplar,代码行数:36,代码来源:LocationJuxtapositionAnnotator.java

示例13: main

import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation; //导入依赖的package包/类
public static void main(String args[])
{
	Properties props = new Properties();
	props.setProperty("annotators", "tokenize, cleanxml, ssplit,pos,lemma,ner");
	
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	pipeline.addAnnotator(new TimeAnnotator("sutime", props));
	String text = "<mydata> refeer</mydata>today is 12 jan 2016. what is tommorow? Who is Avtar? Does he work at Apple or Google? Sumit was born on 13 feb,2011.";

	Annotation document = new Annotation(text);
	pipeline.annotate(document);
    System.out.println(document.get(CoreAnnotations.TextAnnotation.class));
    List<CoreMap> timexAnnsAll = document.get(TimeAnnotations.TimexAnnotations.class);
    for (CoreMap cm : timexAnnsAll) {
    List<CoreLabel> tokens = cm.get(CoreAnnotations.TokensAnnotation.class);
    TimeData td=new TimeData();
    td.setTime(cm.get(TimeExpression.Annotation.class).getTemporal().toISOString());
    td.setStart(tokens.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
    td.setEnd(tokens.get(tokens.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
    }
 
	List<CoreMap> sentences = document.get(SentencesAnnotation.class);
	for(CoreMap sentence: sentences) {
	  // traversing the words in the current sentence
	  // a CoreLabel is a CoreMap with additional token-specific methods
		System.out.println("in sent");
	  for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
	    // this is the text of the token
		  System.out.println("in token");
	    String word = token.get(TextAnnotation.class);
	    // this is the POS tag of the token
	    String pos = token.get(PartOfSpeechAnnotation.class);
	    // this is the NER label of the token
	    String ne = token.get(NamedEntityTagAnnotation.class);
	    System.out.println("word : "+word+" pos: "+pos+" ner: "+ne);
	    
	  }

	}

}
 
开发者ID:asmehra95,项目名称:wiseowl,代码行数:42,代码来源:WiseOwlStanfordFilter.java

示例14: findTokens

import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation; //导入依赖的package包/类
public Iterator findTokens() throws IOException
{
	/*char[] c = new char[256];
    int sz = 0;
    StringBuilder b = new StringBuilder();
    
    while ((sz = input.read(c)) >= 0) {
      b.append(c, 0, sz);
    }*/
    //String text = b.toString();
	if (!input.incrementToken()) return null;
    String text;
    text = input.getAttribute(CharTermAttribute.class).toString();
	// read some text in the text variable
	//System.out.println("before annotation");
	Annotation document = new Annotation(text);
	// these are all the sentences in this document
	// a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
	pipeline.annotate(document);
	List<CoreMap> timexAnnsAll = document.get(TimeAnnotations.TimexAnnotations.class);
    for (CoreMap cm : timexAnnsAll) {
    List<CoreLabel> tokens = cm.get(CoreAnnotations.TokensAnnotation.class);
    TimeData td=new TimeData();
    td.setTime(cm.get(TimeExpression.Annotation.class).getTemporal().toString());
    td.setStart(tokens.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
    td.setEnd(tokens.get(tokens.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
    timeQueue.add(td);
    }
	List<CoreMap> sentences = document.get(SentencesAnnotation.class);
	
	//System.out.println("after annotation and sentence getting"+sentences.size());
	for(CoreMap sentence: sentences) {
	  // traversing the words in the current sentence
	  // a CoreLabel is a CoreMap with additional token-specific methods
	  for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
	    // this is the text of the token
		  System.out.println("in token");
	    String word = token.get(TextAnnotation.class);
	    // this is the POS tag of the token
	    String pos = token.get(PartOfSpeechAnnotation.class);
	    // this is the NER label of the token
	    String ne = token.get(NamedEntityTagAnnotation.class);
	   // System.out.println("word : "+word+" pos: "+pos+" ner: "+ne);
	    TokenData tok=new TokenData();
	    tok.setNER(ne);
	    tok.setToken(word);
	    tok.setPOS(pos);
	    tokenQueue.add(tok);
	  }

	}
	Iterator<TokenData> it=tokenQueue.iterator();
	itr_cpy=tokenQueue.iterator();
	tokenOffset=0;
	start=0;
	end=0;
	return it;
}
 
开发者ID:asmehra95,项目名称:wiseowl,代码行数:59,代码来源:WiseOwlStanfordFilter.java

示例15: ExtractNamedEntityFile

import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation; //导入依赖的package包/类
@Override
	public List<ExtractNamedEntity> ExtractNamedEntityFile(File filePath) throws Exception {
		
		List<String> lstData=ReadRawData(filePath);
        List<ExtractNamedEntity> lstEntityList=new ArrayList<>();
        Properties props = new Properties();
        boolean useRegexner = true;
       // java.net.URL url = getClass().getClassLoader().getResource("data.txt");
		//File file = new File(url.toURI());
        if (useRegexner) 
        {
          props.put("annotators", "tokenize, ssplit, pos, lemma, ner,regexner");
          props.put("regexner.mapping", "data.txt");
          
        } else 
        {
          props.put("annotators", "tokenize, ssplit, pos,lemma, ner");
        }
        int wordPosition=0;
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
        for(String str:lstData){
        	Annotation document = new Annotation(str);
	          pipeline.annotate(document);
	          
	          List<CoreMap> sentences = document.get(SentencesAnnotation.class);
	          
	           for (CoreMap sentence : sentences) {
	               for (CoreLabel token : sentence.get(TokensAnnotation.class)) 
	               {
	                   String tToken = token.get(NamedEntityTagAnnotation.class);
	                   String word = token.get(TextAnnotation.class);
	                   //lstWordsLocation.add(new ExtractNamedEntity(word,wordPosition));
	                   wordPosition++;
	                   if(true)
	                   {
	                      if(tToken.equalsIgnoreCase("O"))
	                      {
	                           continue;
	                      }
	                      lstEntityList.add(new ExtractNamedEntity(word,tToken,wordPosition));
	                   }
/*	                   else
	                   {
	                      lstEntityList.add(new ExtractNamedEntity(word,tToken,wordPosition));
	                   }*/
	               }
	              // lstEntityList.add(new ExtractNamedEntity(".",".",0));
	           }
	           
        }
        
        List<ExtractNamedEntity> lstFinal=new ArrayList<>();
        for(int i=0;i<lstEntityList.size();i++)
        {
            try
            {
            String wordEnt=lstEntityList.get(i).word+" ";
            String entityEnt=lstEntityList.get(i).ner;
            int positionEnt=lstEntityList.get(i).position;
            int positionNextEnt=lstEntityList.get(i+1).position;
            while ((positionEnt+1)==positionNextEnt)
            {
              wordEnt+=lstEntityList.get(i+1).word+" ";
              positionEnt=positionNextEnt;
              i++;
              positionNextEnt=lstEntityList.get(i+1).position;
            }
             lstFinal.add(new ExtractNamedEntity(wordEnt,entityEnt,positionEnt));
             wordEnt="";
            }
            catch(Exception ex){}
        }
        return lstFinal;
	}
 
开发者ID:unsw-cse-soc,项目名称:Data-curation-API,代码行数:75,代码来源:ExtractEntityFile.java


注:本文中的edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。