当前位置: 首页>>代码示例>>Java>>正文


Java StanfordNamedEntityRecognizer类代码示例

本文整理汇总了Java中de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordNamedEntityRecognizer的典型用法代码示例。如果您正苦于以下问题:Java StanfordNamedEntityRecognizer类的具体用法?Java StanfordNamedEntityRecognizer怎么用?Java StanfordNamedEntityRecognizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


StanfordNamedEntityRecognizer类属于de.tudarmstadt.ukp.dkpro.core.stanfordnlp包,在下文中一共展示了StanfordNamedEntityRecognizer类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: setupPipeline

import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordNamedEntityRecognizer; //导入依赖的package包/类
private void setupPipeline() throws ResourceInitializationException {
	AnalysisEngineDescription segmenter = createEngineDescription(LanguageToolSegmenter.class);

	AnalysisEngineDescription dbpedia = createEngineDescription(SpotlightAnnotator.class,
			//SpotlightAnnotator.PARAM_ENDPOINT, "http://localhost:2222/rest",
			SpotlightAnnotator.PARAM_ENDPOINT, this.dbpediaService,
			SpotlightAnnotator.PARAM_CONFIDENCE, 0.35f,
			SpotlightAnnotator.PARAM_ALL_CANDIDATES, false);

	AnalysisEngineDescription ner = createEngineDescription(StanfordNamedEntityRecognizer.class);

	AnalysisEngineDescription pos = createEngineDescription(OpenNlpPosTagger.class,
			OpenNlpPosTagger.PARAM_LANGUAGE,"en");
	
	AnalysisEngineDescription chunk = createEngineDescription(OpenNlpChunker.class,
			OpenNlpChunker.PARAM_LANGUAGE,"en");
	
	AnalysisEngineDescription key = createEngineDescription(KeyPhraseAnnotator.class,
			KeyPhraseAnnotator.PARAM_LANGUAGE, "en");

	this.ae = createEngine(createEngineDescription(segmenter, dbpedia, ner, pos, chunk, key));
}
 
开发者ID:EUMSSI,项目名称:EumssiUimaService,代码行数:23,代码来源:UimaManager.java

示例2: main

import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordNamedEntityRecognizer; //导入依赖的package包/类
public static void main(String[] args) throws ResourceInitializationException, UIMAException, IOException {
	System.setProperty("java.util.logging.config.file", "src/main/resources/logging.properties");

	CollectionReaderDescription crd = CollectionReaderFactory.createReaderDescription(TextgridTEIUrlReader.class,
			TextgridTEIUrlReader.PARAM_INPUT, "src/main/resources");

	SimplePipeline.runPipeline(crd,
			/*
			 * Do segmentation.
			 */
			D.getWrappedSegmenterDescription(LanguageToolSegmenter.class),
			createEngineDescription(FigureReferenceAnnotator.class),
			createEngineDescription(SpeakerIdentifier.class, SpeakerIdentifier.PARAM_CREATE_SPEAKER_FIGURE, true),
			/*
			 * standard NLP components. This works because dkpro only sees
			 * tokens and sentences. The segmenter creates those only for
			 * the figure speech (and not for stage directions)
			 */
			createEngineDescription(StanfordPosTagger.class),
			createEngineDescription(StanfordNamedEntityRecognizer.class),
			createEngineDescription(FigureMentionDetection.class),
			/*
			 * Extract copresence network
			 */
			createEngineDescription(NetworkExtractor.class),
			/*
			 * extract mention network
			 */
			createEngineDescription(NetworkExtractor.class, NetworkExtractor.PARAM_VIEW_NAME, "MentionNetwork",
					NetworkExtractor.PARAM_NETWORK_TYPE, "MentionNetwork"),
			/*
			 * print xmi
			 */
			createEngineDescription(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, "target/xmi/"));
}
 
开发者ID:quadrama,项目名称:DramaNLP,代码行数:36,代码来源:Process.java

示例3: main

import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordNamedEntityRecognizer; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
	
	parseArgs(args);
	
	CollectionReaderDescription reader = createReaderDescription(
			TextReader.class,
			TextReader.PARAM_SOURCE_LOCATION, source,
			TextReader.PARAM_LANGUAGE, lang);
    
	AnalysisEngineDescription seg = createEngineDescription(OpenNlpSegmenter.class,
			OpenNlpSegmenter.PARAM_LANGUAGE,lang);
	
	AnalysisEngineDescription lem = createEngineDescription(MateLemmatizer.class,
			MateLemmatizer.PARAM_LANGUAGE,lang);
	
	AnalysisEngineDescription pos = createEngineDescription(OpenNlpPosTagger.class,
			OpenNlpPosTagger.PARAM_LANGUAGE,lang);
	
	AnalysisEngineDescription key = createEngineDescription(KeyPhraseAnnotator.class,
			KeyPhraseAnnotator.PARAM_LANGUAGE,lang,
			KeyPhraseAnnotator.PARAM_KEYPHRASE_RATIO,ratio);
	
	AnalysisEngineDescription dbp = createEngineDescription(SpotlightAnnotator.class,
        		SpotlightAnnotator.PARAM_ENDPOINT, endpoint,
        		SpotlightAnnotator.PARAM_CONFIDENCE,conf);
	AnalysisEngineDescription ner = createEngineDescription(StanfordNamedEntityRecognizer.class,
			StanfordNamedEntityRecognizer.PARAM_LANGUAGE,lang);
	
	AnalysisEngineDescription type = createEngineDescription(
			XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION,target);
	
	SimplePipeline.runPipeline(reader,seg,pos,lem,ner,dbp,key,type);
}
 
开发者ID:EUMSSI,项目名称:EUMSSI-tools,代码行数:34,代码来源:KEAPipeline.java

示例4: main

import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordNamedEntityRecognizer; //导入依赖的package包/类
public static void main(String[] args) throws UIMAException, IOException {

		Logger.getRootLogger().setLevel(Level.INFO);

		// 0) parameter
		if (args.length > 0)
			textFolder = args[0];

		// 1) read text documents
		CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(TextReader.class,
				TextReader.PARAM_SOURCE_LOCATION, textFolder, TextReader.PARAM_PATTERNS, textPattern,
				TextReader.PARAM_LANGUAGE, "en");

		// 2) process documents

		String[] quoteBegin = { "“", "‘" };
		List<String> quoteBeginList = Arrays.asList(quoteBegin);
		String[] quoteEnd = { "”", "’" };
		List<String> quoteEndList = Arrays.asList(quoteEnd);

		// tokenization and sentence splitting
		AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(StanfordSegmenter.class,
				StanfordSegmenter.PARAM_NEWLINE_IS_SENTENCE_BREAK, "ALWAYS");

		// part-of-speech tagging
		AnalysisEngineDescription pos = AnalysisEngineFactory.createEngineDescription(StanfordPosTagger.class,
				StanfordPosTagger.PARAM_QUOTE_BEGIN, quoteBeginList, StanfordPosTagger.PARAM_QUOTE_END, quoteEndList);

		// lemmatizing
		AnalysisEngineDescription lemmatizer = AnalysisEngineFactory.createEngineDescription(StanfordLemmatizer.class,
				StanfordLemmatizer.PARAM_QUOTE_BEGIN, quoteBeginList, StanfordLemmatizer.PARAM_QUOTE_END, quoteEndList);

		// named entity recognition
		AnalysisEngineDescription ner = AnalysisEngineFactory.createEngineDescription(
				StanfordNamedEntityRecognizer.class, StanfordNamedEntityRecognizer.PARAM_QUOTE_BEGIN, quoteBeginList,
				StanfordNamedEntityRecognizer.PARAM_QUOTE_END, quoteEndList);

		// constituency parsing and dependency conversion
		AnalysisEngineDescription parser = AnalysisEngineFactory.createEngineDescription(StanfordParser.class,
				StanfordParser.PARAM_QUOTE_BEGIN, quoteBeginList, StanfordParser.PARAM_QUOTE_END, quoteEndList,
				StanfordParser.PARAM_MODE, DependenciesMode.CC_PROPAGATED);

		// coreference resolution
		AnalysisEngineDescription coref = AnalysisEngineFactory.createEngineDescription();

		// 3) write annotated data to file
		AnalysisEngineDescription writer = AnalysisEngineFactory.createEngineDescription(BinaryCasWriter.class,
				BinaryCasWriter.PARAM_TARGET_LOCATION, textFolder, BinaryCasWriter.PARAM_STRIP_EXTENSION, false,
				BinaryCasWriter.PARAM_FILENAME_EXTENSION, ".bin6", BinaryCasWriter.PARAM_OVERWRITE, true);

		// print statistics
		AnalysisEngineDescription stat = AnalysisEngineFactory.createEngineDescription(CorpusStatWriter.class);

		// 4) run pipeline
		SimplePipeline.runPipeline(reader, segmenter, pos, lemmatizer, ner, parser, coref, writer, stat);
	}
 
开发者ID:UKPLab,项目名称:ijcnlp2017-cmaps,代码行数:57,代码来源:PipelinePreprocessing.java

示例5: main

import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordNamedEntityRecognizer; //导入依赖的package包/类
public static void main(String[] args)
		    throws UIMAException, IOException
		{
			
	    
//			CollectionReaderDescription reader = createReaderDescription(
//                    ImsCwbReader.class,
//                    ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION,     new File(sourceLocationBase).getAbsolutePath(),
//                    ResourceCollectionReaderBase.PARAM_PATTERNS, new String[] { "[+]UKWAC*.xml" },
//                    ResourceCollectionReaderBase.PARAM_LANGUAGE, "en",
//                    ImsCwbReader.PARAM_POS_MAPPING_LOCATION, "src/main/resources/en-tagger.map"
//					);
			
		    // this assumes that the following preprocessing has been performed:
	        // tokenizing, sentence splitting, POS-tagging, lemmatization, sense tagging with FrameNet senses
	       CollectionReaderDescription reader = createReaderDescription(
	               XmiReader.class,
	               ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, sourceLocationBase,
	               ResourceCollectionReaderBase.PARAM_PATTERNS, new String [] {"*.xmi.bz2"}
	               );

			
			AnalysisEngineDescription ner = createEngineDescription(StanfordNamedEntityRecognizer.class);

		    AnalysisEngineDescription semanticFieldAnnotator = 
	    	        createEngineDescription(UbySemanticFieldAnnotator.class,
	    	                UbySemanticFieldAnnotator.PARAM_UBY_SEMANTIC_FIELD_RESOURCE,
	    	                createExternalResourceDescription(UbySemanticFieldResource.class,
	    	                		UbySemanticFieldResource.PARAM_LANGUAGE, "en",
	    	                		UbySemanticFieldResource.RES_UBY,	
	    	                		createExternalResourceDescription(UbyResource.class,
	    	                		UbyResource.PARAM_MODEL_LOCATION, ResourceObjectProviderBase.NOT_REQUIRED,
	    	                		UbyResource.PARAM_URL, "localhost/uby_medium_0_7_0",
	    	                		UbyResource.PARAM_DRIVER, "com.mysql.jdbc.Driver",
	    	                		UbyResource.PARAM_DIALECT, "mysql",
	    	                		UbyResource.PARAM_USERNAME, "root",
	    	                		UbyResource.PARAM_PASSWORD, "pass"			    	                        
	    	                        )));

            AnalysisEngineDescription parser = createEngineDescription(StanfordParser.class,
                    StanfordParser.PARAM_VARIANT, "wsj-rnn",
                    StanfordParser.PARAM_LANGUAGE, "en",
                    StanfordParser.PARAM_READ_POS, true,
                    StanfordParser.PARAM_WRITE_POS, false,
                    StanfordParser.PARAM_WRITE_CONSTITUENT, true
                    );

		    AnalysisEngineDescription semanticRoleAnnotator = 
		    		createEngineDescription(FrameNetRoleAnnotator.class,
		    				FrameNetRoleAnnotator.PARAM_EVAL_MODE, false,
		    				FrameNetRoleAnnotator.PARAM_OUTPUT_FILE, outputFileBase+"/srlLogFile.log",
		    				FrameNetRoleAnnotator.PARAM_SEMLINK_LOCATION,semLinkLocation);			    
		    
		    AnalysisEngineDescription srlwriter = createEngineDescription(XmiWriter.class,
            		XmiWriter.PARAM_TARGET_LOCATION, outputFileBase+"/xmi_rl_input/",
            		XmiWriter.PARAM_USE_DOCUMENT_ID,true,
            		XmiWriter.PARAM_ESCAPE_DOCUMENT_ID,false,
            		XmiWriter.PARAM_COMPRESSION ,CompressionMethod.BZIP2);
		    
//			    AnalysisEngineDescription dumpWriter = createEngineDescription(
//                        CasDumpWriter.class,
//                        CasDumpWriter.PARAM_OUTPUT_FILE, "target/roleAnnotationOutput");			    
						
			SimplePipeline.runPipeline(reader, 
					ner,
					parser,
					semanticFieldAnnotator, 
					semanticRoleAnnotator,
					srlwriter
					//dumpWriter
					);
		}
 
开发者ID:UKPLab,项目名称:tacl2016-trainingdata4srl,代码行数:73,代码来源:FrameNetSrlPipeline.java

示例6: main

import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordNamedEntityRecognizer; //导入依赖的package包/类
public static void main(String[] args)
		    throws UIMAException, IOException
		{
		
//       CollectionReaderDescription reader = createReaderDescription(
//                ImsCwbReader.class,
//                ImsCwbReader.PARAM_ENCODING, "ISO-8859-1",
//                ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION,     new File(sourceLocationBase).getAbsolutePath(),
//                ResourceCollectionReaderBase.PARAM_PATTERNS, new String[] { "[+]*.xml" },
//                ResourceCollectionReaderBase.PARAM_LANGUAGE, "de"
//				);
       
	    // this assumes that the following preprocessing has been performed:
	    // tokenizing, sentence splitting, POS-tagging, lemmatization, sense tagging with FrameNet senses
       CollectionReaderDescription reader = createReaderDescription(
               XmiReader.class,
               ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, sourceLocationBase,
               ResourceCollectionReaderBase.PARAM_PATTERNS, new String [] {"*.xmi.bz2"}
               );

		
       AnalysisEngineDescription separatedParticleAnnotator = createEngineDescription(GermanSeparatedParticleAnnotator.class);
	        
       AnalysisEngineDescription ner = createEngineDescription(StanfordNamedEntityRecognizer.class, 
	                StanfordNamedEntityRecognizer.PARAM_LANGUAGE, "de");
      		
       ExternalResourceDescription ubyResource = createExternalResourceDescription(
	                UbyResource.class,
	                UbyResource.PARAM_MODEL_LOCATION, ResourceObjectProviderBase.NOT_REQUIRED,
	                UbyResource.PARAM_URL, DB_URL,
	                UbyResource.PARAM_DRIVER, DB_DRIVER,
	                UbyResource.PARAM_DIALECT, DB_DRIVER_NAME,
	                UbyResource.PARAM_USERNAME, DB_USERNAME,
	                UbyResource.PARAM_PASSWORD, DB_PASSWORD);
	        
       AnalysisEngineDescription semanticFieldAnnotator = 
	    	        createEngineDescription(UbySemanticFieldAnnotator.class,
	    	                UbySemanticFieldAnnotator.PARAM_UBY_SEMANTIC_FIELD_RESOURCE,    	           
	    	                createExternalResourceDescription(UbySemanticFieldResource.class,
	    	                		UbySemanticFieldResource.PARAM_LANGUAGE, "de",
	    	                		UbySemanticFieldResource.RES_UBY, ubyResource));

	        
       AnalysisEngineDescription parser = createEngineDescription(StanfordParser.class,
               StanfordParser.PARAM_LANGUAGE, "de",
               StanfordParser.PARAM_READ_POS, true,
               StanfordParser.PARAM_WRITE_POS, false,
               StanfordParser.PARAM_WRITE_CONSTITUENT, true
               );

	
		    AnalysisEngineDescription semanticRoleAnnotator = 
    		createEngineDescription(FrameNetRoleAnnotatorGerman.class,
    				FrameNetRoleAnnotatorGerman.PARAM_OUTPUT_FILE, outputFileBase+"/srlLogFile.log",
    				FrameNetRoleAnnotatorGerman.PARAM_SEMLINK_LOCATION,semLinkLocation);
	        
		    AnalysisEngineDescription srlwriter = createEngineDescription(XmiWriter.class,
            		XmiWriter.PARAM_TARGET_LOCATION, outputFileBase+"/xmi_rl_input/", 
            		XmiWriter.PARAM_USE_DOCUMENT_ID,true,
            		XmiWriter.PARAM_ESCAPE_DOCUMENT_ID,false,
            		XmiWriter.PARAM_COMPRESSION, CompressionMethod.BZIP2
            		);
	                                  
	        SimplePipeline.runPipeline(reader, 
	        		separatedParticleAnnotator, 
	        		semanticFieldAnnotator,
	        		ner, 
	        		parser,
	        		semanticRoleAnnotator,
	        		srlwriter
	                );

	}
 
开发者ID:UKPLab,项目名称:tacl2016-trainingdata4srl,代码行数:74,代码来源:FrameNetSrlPipelineGerman.java


注:本文中的de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordNamedEntityRecognizer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。