当前位置: 首页>>代码示例>>Java>>正文


Java NLPGetter类代码示例

本文整理汇总了Java中com.clearnlp.nlp.NLPGetter的典型用法代码示例。如果您正苦于以下问题:Java NLPGetter类的具体用法?Java NLPGetter怎么用?Java NLPGetter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


NLPGetter类属于com.clearnlp.nlp包,在下文中一共展示了NLPGetter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: setupComponents

import com.clearnlp.nlp.NLPGetter; //导入依赖的package包/类
private static AbstractComponent[] setupComponents() {
    AbstractComponent[] components = null;

    try {
        String language = AbstractReader.LANG_EN;
        String modelType = "general-en";

        AbstractComponent tagger = NLPGetter.getComponent(modelType, language, NLPMode.MODE_POS);
        AbstractComponent morphological = NLPGetter.getComponent(modelType, language, NLPMode.MODE_MORPH);
        AbstractComponent parser = NLPGetter.getComponent(modelType, language, NLPMode.MODE_DEP);
        AbstractComponent identifier = NLPGetter.getComponent(modelType, language, NLPMode.MODE_PRED);
        AbstractComponent classifier = NLPGetter.getComponent(modelType, language, NLPMode.MODE_ROLE);
        AbstractComponent labeler = NLPGetter.getComponent(modelType, language, NLPMode.MODE_SRL);
        components = new AbstractComponent[]{tagger, morphological, parser, identifier, classifier, labeler};//, identifier, classifier, labeler};
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    return components;
}
 
开发者ID:marcusklang,项目名称:langforia,代码行数:21,代码来源:ClearNLP.java

示例2: call

import com.clearnlp.nlp.NLPGetter; //导入依赖的package包/类
public String call()
{
	AbstractSegmenter segmenter = NLPGetter.getSegmenter(s_language, NLPGetter.getTokenizer(s_language));
	BufferedReader reader = new BufferedReader(new StringReader(s_line));
	StringBuilder build = new StringBuilder();
	
	for (List<String> tokens : segmenter.getSentences(reader))
	{
		if (tokens.size() < 3)	continue;
		build.append(UTArray.join(tokens, "\n"));
		build.append("\n\n");
	}
	
	try
	{
		reader.close();
	}
	catch (IOException e) {e.printStackTrace();}
	
	return build.toString();
}
 
开发者ID:clearnlp,项目名称:clearnlp,代码行数:22,代码来源:DemoMultiThread.java

示例3: decode

import com.clearnlp.nlp.NLPGetter; //导入依赖的package包/类
public void decode(LineReader reader, PrintStream fout, AbstractTokenizer tokenizer, AbstractComponent[] components)
{
	String sentence, mode = getMode();
	DEPTree tree;

	while ((sentence = reader.next()) != null)
	{
		if (sentence.trim().equals(UNConstant.EMPTY)) continue;
		tree = NLPGetter.toDEPTree(tokenizer.getTokens(sentence));
		
		for (AbstractComponent component : components)
			component.process(tree);
		
		fout.println(toString(tree, mode)+"\n");
	}
}
 
开发者ID:clearnlp,项目名称:clearnlp,代码行数:17,代码来源:NLPDecoder.java

示例4: Tokenizer

import com.clearnlp.nlp.NLPGetter; //导入依赖的package包/类
public Tokenizer(String[] args)
{
	initArgs(args);
	
	try
	{
		AbstractTokenizer tokenizer = NLPGetter.getTokenizer(s_language);
		AbstractSegmenter segmenter = i_format.equals(AbstractReader.TYPE_RAW) ? NLPGetter.getSegmenter(s_language, tokenizer) : null;
		List<String[]>    filenames = getFilenames(s_inputPath, s_inputExt, s_outputExt);
		boolean outLine = o_format.equals(AbstractReader.TYPE_LINE);
		tokenizer.setTwit(b_twit);
		
		for (String[] io : filenames)
		{
			System.out.println(io[0]);
			tokenize(tokenizer, segmenter, io[0], io[1], outLine);
		}
	}
	catch (IOException e) {e.printStackTrace();}
}
 
开发者ID:clearnlp,项目名称:clearnlp,代码行数:21,代码来源:Tokenizer.java

示例5: process

import com.clearnlp.nlp.NLPGetter; //导入依赖的package包/类
public void process(AbstractTokenizer tokenizer, AbstractComponent[] components, BufferedReader reader, PrintStream fout)
{
	AbstractSegmenter segmenter = NLPGetter.getSegmenter(language, tokenizer);
	DEPTree tree;
	
	for (List<String> tokens : segmenter.getSentences(reader))
	{
		tree = NLPGetter.toDEPTree(tokens);
		
		for (AbstractComponent component : components)
			component.process(tree);
		
		fout.println(tree.toStringSRL()+"\n");
	}
	
	fout.close();
}
 
开发者ID:clearnlp,项目名称:clearnlp-demo,代码行数:18,代码来源:DemoNLPDecode.java

示例6: DemoCSenTypeClassifierEN

import com.clearnlp.nlp.NLPGetter; //导入依赖的package包/类
public DemoCSenTypeClassifierEN(String inputFile, String outputFile) throws Exception
	{
		final String language = AbstractReader.LANG_EN;
		final String modelType = "general-en";
//		final String modelType = "medical-en";
		
		AbstractSegmenter segmenter	 = NLPGetter.getSegmenter(language, NLPGetter.getTokenizer(language));
		AbstractComponent tagger     = NLPGetter.getComponent(modelType, language, NLPMode.MODE_POS);
		AbstractComponent parser     = NLPGetter.getComponent(modelType, language, NLPMode.MODE_DEP);
		CSenTypeClassifierEN typer	 = new CSenTypeClassifierEN();
		
		BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(inputFile)));
		PrintStream fout = new PrintStream(new BufferedOutputStream(new FileOutputStream(outputFile)));
		
		process(reader, fout, segmenter, tagger, parser, typer);
		
		reader.close();
		fout.close();
	}
 
开发者ID:clearnlp,项目名称:clearnlp-demo,代码行数:20,代码来源:DemoCSenTypeClassifierEN.java

示例7: process

import com.clearnlp.nlp.NLPGetter; //导入依赖的package包/类
public void process(BufferedReader reader, PrintStream fout, AbstractSegmenter segmenter, AbstractComponent tagger, AbstractComponent parser, CSenTypeClassifierEN typer)
{
	DEPTree tree;
	
	for (List<String> tokens : segmenter.getSentences(reader))
	{
		tree = NLPGetter.toDEPTree(tokens);		// put tokens into dependency tree
		tagger.process(tree);					// part-of-speech tagging
		parser.process(tree);					// dependency parsing
		tree.setDependents();
		
		for (DEPNode root : tree.getRoots())
		{
			if (typer.isInterrogative(root))
			{
				fout.println(UTArray.join(tokens, " ")+"\n");
				fout.println(tree.toStringDEP()+"\n");
				break;
			}
		}
	}
}
 
开发者ID:clearnlp,项目名称:clearnlp-demo,代码行数:23,代码来源:DemoCSenTypeClassifierEN.java

示例8: DemoMultiParse

import com.clearnlp.nlp.NLPGetter; //导入依赖的package包/类
public DemoMultiParse(String modelType, String inputFile, String outputFile) throws Exception
{
	AbstractTokenizer tokenizer  = NLPGetter.getTokenizer(language);
	AbstractComponent tagger     = NLPGetter.getComponent(modelType, language, NLPMode.MODE_POS);
	AbstractComponent parser     = NLPGetter.getComponent(modelType, language, NLPMode.MODE_DEP);
	AbstractComponent identifier = NLPGetter.getComponent(modelType, language, NLPMode.MODE_PRED);
	AbstractComponent classifier = NLPGetter.getComponent(modelType, language, NLPMode.MODE_ROLE);
	AbstractComponent labeler    = NLPGetter.getComponent(modelType, language, NLPMode.MODE_SRL);
	
	AbstractComponent[] preComponents  = {tagger};	// components used before parsing
	AbstractComponent[] postComponents = {identifier, classifier, labeler};	// components used after parsing
	
	String sentence = "I know you know who I know.";
	process(tokenizer, (AbstractDEPParser)parser, preComponents, postComponents, sentence);
	process(tokenizer, (AbstractDEPParser)parser, preComponents, postComponents, UTInput.createBufferedFileReader(inputFile), UTOutput.createPrintBufferedFileStream(outputFile));
}
 
开发者ID:clearnlp,项目名称:clearnlp-demo,代码行数:17,代码来源:DemoMultiParse.java

示例9: loadPosTagger

import com.clearnlp.nlp.NLPGetter; //导入依赖的package包/类
public void loadPosTagger() throws IOException {
	String model_path = prop.getProperty("pos.model");
	if (!model_path.toLowerCase().endsWith(".zip"))
		tagger = (AbstractPOSTagger) NLPGetter.getComponent(model_path, AbstractReader.LANG_EN, NLPMode.MODE_POS);
	else
		tagger = (AbstractPOSTagger) NLPGetter.getComponent(new ZipFile(model_path), AbstractReader.LANG_EN, NLPMode.MODE_POS);
}
 
开发者ID:hakchul77,项目名称:irnlp_toolkit,代码行数:8,代码来源:ClearNlpWrapper.java

示例10: loadPredIdentifier

import com.clearnlp.nlp.NLPGetter; //导入依赖的package包/类
private void loadPredIdentifier() throws IOException {
	String model_path = prop.getProperty("pred.model");
	if (!model_path.toLowerCase().endsWith(".zip"))
		identifier = (AbstractPredicateIdentifier) NLPGetter.getComponent(model_path, AbstractReader.LANG_EN, NLPMode.MODE_PRED);
	else
		identifier = (AbstractPredicateIdentifier) NLPGetter.getComponent(new ZipFile(model_path), AbstractReader.LANG_EN, NLPMode.MODE_PRED);
}
 
开发者ID:hakchul77,项目名称:irnlp_toolkit,代码行数:8,代码来源:ClearNlpWrapper.java

示例11: loadRoleClassifier

import com.clearnlp.nlp.NLPGetter; //导入依赖的package包/类
private void loadRoleClassifier() throws IOException {
	String model_path = prop.getProperty("role.model");
	if (!model_path.toLowerCase().endsWith(".zip"))
		classifier = (AbstractRolesetClassifier) NLPGetter.getComponent(model_path, AbstractReader.LANG_EN, NLPMode.MODE_ROLE);
	else
		classifier = (AbstractRolesetClassifier) NLPGetter.getComponent(new ZipFile(model_path), AbstractReader.LANG_EN, NLPMode.MODE_ROLE);
}
 
开发者ID:hakchul77,项目名称:irnlp_toolkit,代码行数:8,代码来源:ClearNlpWrapper.java

示例12: loadSrlLabeler

import com.clearnlp.nlp.NLPGetter; //导入依赖的package包/类
public void loadSrlLabeler() throws IOException {
	loadPredIdentifier();
	loadRoleClassifier();
	String model_path = prop.getProperty("srl.model");
	if (!model_path.toLowerCase().endsWith(".zip"))
		labeler = (AbstractSRLabeler) NLPGetter.getComponent(model_path, AbstractReader.LANG_EN, NLPMode.MODE_SRL);
	else
		labeler = (AbstractSRLabeler) NLPGetter.getComponent(new ZipFile(model_path), AbstractReader.LANG_EN, NLPMode.MODE_SRL);
}
 
开发者ID:hakchul77,项目名称:irnlp_toolkit,代码行数:10,代码来源:ClearNlpWrapper.java

示例13: main

import com.clearnlp.nlp.NLPGetter; //导入依赖的package包/类
/**
 * Main function
 * 
 * @param args
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
	// data input
	String text = "Samsung Electronics is a South Korean multinational electronics company in Suwon, South Korea.";

	// model loading
	ClearNlpWrapper nlp = new ClearNlpWrapper(Env.CLEARNLP_CFG);
	nlp.loadAll("tokenize, ssplit, pos, parse, srl");

	// task run
	for (List<String> toks : nlp.detect(text)) {
		DEPTree units = NLPGetter.toDEPTree(toks);
		units = nlp.tag(units);
		units = nlp.parse(units);
		units = nlp.label(units);

		System.out.println("-toStringSRL--------------------------------------------------------------------");
		System.out.println(units.toStringSRL());

		System.out.println("-dependent Nodes----------------------------------------------------------------");
		StringBuffer sb = new StringBuffer();
		for (DEPNode verb : ClearNlpWrapper.getAllVerbs(units)) {
			sb.append("->(Verb) " + ClearNlpWrapper.toTaggedWord(verb) + "\n");
			for (SRLNode dep : ClearNlpWrapper.getDependents(verb)) {
				Map<String, Object> m = dep.toMap();
				sb.append(String.format("  ->(Node) id=%d, form=%s, pos=%s, drel=%s",
					m.get("id"), m.get("form"), m.get("pos"), m.get("drel")));
				if (m.get("srel") != null)
					sb.append(String.format(", srel=%s, sfunc=%s", m.get("srel"), m.get("sfunc")));
				sb.append("\n");
			}
		}
		System.out.println(JString.trimAndIndent(sb.toString(), 2));
	}
}
 
开发者ID:hakchul77,项目名称:irnlp_toolkit,代码行数:41,代码来源:SemanticRoleLabeling.java

示例14: testClearNlpWrapperForBasic

import com.clearnlp.nlp.NLPGetter; //导入依赖的package包/类
/**
 * ClearNlpWrapper Test for basic functions
 * 
 * @throws IOException
 */
public void testClearNlpWrapperForBasic() throws IOException {
	System.out.println("\n----- testClearNlpWrapperForBasic() ------------------------------");
	if (!TEST_BASIC)
		return;

	String text = "Samsung Electronics is a South Korean multinational electronics company in Suwon, South Korea.";
	text += " It is the flagship subsidiary of the Samsung Group.";

	ClearNlpWrapper nlp = new ClearNlpWrapper(Env.CLEARNLP_CFG);
	nlp.loadAll("tokenize, ssplit, pos, parse");
	assertTrue(nlp.tokenizer != null);
	assertTrue(nlp.detector != null);
	assertTrue(nlp.tagger != null);
	assertTrue(nlp.parser != null);

	assertEquals(2, nlp.detect(text).size());
	for (List<String> words : nlp.detect(text)) {
		DEPTree units = NLPGetter.toDEPTree(words);
		System.out.println("\n[Sentence] " + JString.join(" ", words));
		assertEquals(words.size(), ClearNlpWrapper.toTaggedWords(nlp.tag(units)).size());
		System.out.println("  <Tagged> " + JString.join(" ", ClearNlpWrapper.toTaggedWords(nlp.tag(units))));
		assertEquals(words.size(), ClearNlpWrapper.toTypedDependencies(nlp.parse(units)).size());
		System.out.println("  <Parsed> " + JString.join("; ", ClearNlpWrapper.toTypedDependencies(nlp.parse(units))));

		System.out.println("-parsed SemanticGraph-----------------------------------------------------------");
		SemanticGraph sgraph = StanfordNlpWrapper.toSemanticGraph(ClearNlpWrapper.toTypedDependencies(nlp.parse(units)), false);
		assertEquals(words.size(), JString.trimAndIndent(sgraph.toString(), 2).split("\n").length);
		System.out.println(JString.trimAndIndent(sgraph.toString(), 2));
	}
}
 
开发者ID:hakchul77,项目名称:irnlp_toolkit,代码行数:36,代码来源:TestClearNLP.java

示例15: getSegmenter

import com.clearnlp.nlp.NLPGetter; //导入依赖的package包/类
protected AbstractSegmenter getSegmenter(Element eConfig, boolean twit) throws IOException
{
	AbstractTokenizer tokenizer = getTokenizer(eConfig, twit);
	String language = getLanguage(eConfig);
	
	return NLPGetter.getSegmenter(language, tokenizer);
}
 
开发者ID:clearnlp,项目名称:clearnlp,代码行数:8,代码来源:NLPDecoder.java


注:本文中的com.clearnlp.nlp.NLPGetter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。