当前位置: 首页>>代码示例>>Java>>正文


Java EngLemmatiser类代码示例

本文整理汇总了Java中dragon.nlp.tool.lemmatiser.EngLemmatiser的典型用法代码示例。如果您正苦于以下问题:Java EngLemmatiser类的具体用法?Java EngLemmatiser怎么用?Java EngLemmatiser使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


EngLemmatiser类属于dragon.nlp.tool.lemmatiser包,在下文中一共展示了EngLemmatiser类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: inform

import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
@Override
public void inform(ResourceLoader loader) throws IOException {
    if (lemmatiserResourceDir != null ) {
        try {
            String path=((SolrResourceLoader) loader).getConfigDir();
            if(!path.endsWith(File.separator))
                    path=path+File.separator;
            lemmatiser = new EngLemmatiser(path+lemmatiserResourceDir,
                    false, false);
        } catch (Exception e) {
            StringBuilder sb = new StringBuilder("Initiating ");
            sb.append(this.getClass().getName()).append(" failed due to:\n");
            sb.append(ExceptionUtils.getFullStackTrace(e));
            throw new IllegalArgumentException(sb.toString());
        }
    }
}
 
开发者ID:ziqizhang,项目名称:jate,代码行数:18,代码来源:EnglishLemmatisationFilterFactory.java

示例2: Lemmatiser

import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
public Lemmatiser(EngLemmatiser lemmatiser) {

        this.lemmatiser=lemmatiser;
        tagLookUp = new HashMap<>();
        tagLookUp.put("NN", 1);
        tagLookUp.put("NNS", 1);
        tagLookUp.put("NNP", 1);
        tagLookUp.put("NNPS", 1);
        tagLookUp.put("VB", 2);
        tagLookUp.put("VBG", 2);
        tagLookUp.put("VBD", 2);
        tagLookUp.put("VBN", 2);
        tagLookUp.put("VBP", 2);
        tagLookUp.put("VBZ", 2);
        tagLookUp.put("JJ", 3);
        tagLookUp.put("JJR", 3);
        tagLookUp.put("JJS", 3);
        tagLookUp.put("RB", 4);
        tagLookUp.put("RBR", 4);
        tagLookUp.put("RBS", 4);
    }
 
开发者ID:ziqizhang,项目名称:jate,代码行数:22,代码来源:Lemmatiser.java

示例3: main

import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
public static void main(String[] args) throws IOException, ParseException, JATEException {
    /*mergeGS("/home/zqz/Work/data/semeval2017-scienceie/scienceie2017_test/scienceie2017_test_gs",
            "/home/zqz/Work/data/semeval2017-scienceie/scienceie2017_test/all_key_phrases.txt");*/
    Lemmatiser lem = new Lemmatiser(new EngLemmatiser(args[0],
            false, false));
    String solrHomePath = args[4];
    String solrCoreName = args[5];
    final EmbeddedSolrServer solrServer = new EmbeddedSolrServer(Paths.get(solrHomePath), solrCoreName);
    JATEProperties jateProp = App.getJateProperties(args[6]);
    for (File f : new File(args[1]).listFiles()) {
        File outFolder = new File(args[2] + "/" + f.getName() + "/");
        outFolder.mkdirs();
        System.out.println(outFolder);

        transformToKEAOutput(f.toString(), 0, args[3], outFolder.toString(), lem,
                solrServer.getCoreContainer().getCore(solrCoreName), jateProp);
    }
    solrServer.close();
    System.exit(0);
}
 
开发者ID:ziqizhang,项目名称:jate,代码行数:21,代码来源:ScienceIECorpusParser.java

示例4: test

import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
@Test
public void test() throws Exception {

    File lemmatiserDataDirectory = new File(BANNER_ROOT
            + "src/main/resources/pear_resources/nlpdata/lemmatiser");
    assertTrue(lemmatiserDataDirectory.exists());

    EngLemmatiser lemmatiser = new EngLemmatiser(
            lemmatiserDataDirectory.getAbsolutePath(), false, true);

    String[] texts = { "gone", "went", "" };

    for (String text : texts) {

        String lemma = lemmatiser.lemmatize(text);
        System.out.println(text + "\t--> " + lemma);
    }
}
 
开发者ID:BlueBrain,项目名称:bluima,代码行数:19,代码来源:LemmatizerTest.java

示例5: initialize

import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
@Override
public void initialize(UimaContext context)
        throws ResourceInitializationException {
    super.initialize(context);
    try {
        File lemmatiserDataDirectory = getFile(lemmatiserData);
        checkFileExists(lemmatiserDataDirectory);
        lemmatiser = new EngLemmatiser(
                lemmatiserDataDirectory.getAbsolutePath(), false, true);
    } catch (FileNotFoundException e) {
        throw new ResourceInitializationException(RESOURCE_DATA_NOT_VALID,
                new Object[] { lemmatiserData, "lemmatiserDataDirectory" },
                e);
    }
}
 
开发者ID:BlueBrain,项目名称:bluima,代码行数:16,代码来源:DragonLemmatiserAnnotator.java

示例6: init

import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
private void init() {
	// Initializes the temporary directory if required.
	final JATEProperties instance = JATEProperties.getInstance();
	String path = instance.getNLPPath();
	if ( path.startsWith( "jar:" ) ) {
		String dir = instance.getWorkPath() + File.separator + "lemmatizer";
		path = dir;
		File directory = new File(dir);
		if ( !directory.exists() ) {
			if ( !directory.mkdirs() ) {
				throw new UnsupportedOperationException("Could not initialize " + directory);
			}
			for ( String file : new String[]{ "adj.exc", "adj.index", "adv.exc", "adv.index", "noun.exc", "stopwordexc.list", "umlserror.list", "verb.exc", "verb.index" } ) {
				try {
					InputStream input = null;
					OutputStream output = null;
					try {
						input = instance.getNLPInputStream( "lemmatizer" + File.separator + file );
						if ( input == null ) {
							throw new IOException( "Unable to read: " + file );
						}
						output = new FileOutputStream( dir + File.separator + file );
						byte [] bytes = new byte[ 4096 ];
						while ( true ) {
							int len = input.read( bytes );
							if ( len == -1 ) {
								break;
							}
							output.write( bytes, 0, len );
						}
					} finally {
						try {
							if ( input != null ) {
								input.close();
							}
						} finally {
							if ( output != null ) {
								output.close();
							}
						}
					}
				} catch ( IOException ioe ) {
					throw new UnsupportedOperationException( "Unable to copy data", ioe );
				}
			}
		}
	}

	lemmatizer = new EngLemmatiser( path, false, true );
	tagLookUp.put("NN", 1);
	tagLookUp.put("NNS", 1);
	tagLookUp.put("NNP", 1);
	tagLookUp.put("NNPS", 1);
	tagLookUp.put("VB", 2);
	tagLookUp.put("VBG", 2);
	tagLookUp.put("VBD", 2);
	tagLookUp.put("VBN", 2);
	tagLookUp.put("VBP", 2);
	tagLookUp.put("VBZ", 2);
	tagLookUp.put("JJ", 3);
	tagLookUp.put("JJR", 3);
	tagLookUp.put("JJS", 3);
	tagLookUp.put("RB", 4);
	tagLookUp.put("RBR", 4);
	tagLookUp.put("RBS", 4);
}
 
开发者ID:etheriau,项目名称:jatetoolkit,代码行数:67,代码来源:Lemmatizer.java

示例7: load

import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
public static BannerProperties load(Properties properties) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
   BannerProperties bannerProperties = new BannerProperties();
   String lemmatiserDataDirectory = properties.getProperty("lemmatiserDataDirectory");
   if (lemmatiserDataDirectory != null)
     bannerProperties.lemmatiser = new EngLemmatiser(lemmatiserDataDirectory, false, true);
   String posTaggerDataDirectory = properties.getProperty("posTaggerDataDirectory");
   if (posTaggerDataDirectory != null)
   {
     String posTagger = properties.getProperty("posTagger", HeppleTagger.class.getName());
     if (posTagger.equals(HeppleTagger.class.getName()))
       bannerProperties.posTagger = new HeppleTagger(posTaggerDataDirectory);
     else if (posTagger.equals(MedPostTagger.class.getName()))
       bannerProperties.posTagger = new MedPostTagger(posTaggerDataDirectory);
     else
       throw new IllegalArgumentException("Unknown POS tagger type: " + posTagger);
   }
   String tokenizer = properties.getProperty("tokenizer", SimpleTokenizer.class.getName());
   bannerProperties.tokenizer = (Tokenizer) Class.forName(tokenizer).newInstance();
   // Note assumption that the tokenizer constructor takes no
   // parameters
   bannerProperties.tagFormat = TagFormat.valueOf(properties.getProperty("tagFormat", "IOB"));
   if (Boolean.parseBoolean(properties.getProperty("useParenthesisPostProcessing", "true")))
     bannerProperties.postProcessor = new ParenthesisPostProcessor();
   bannerProperties.useNumericNormalization = Boolean.parseBoolean(properties.getProperty("useNumericNormalization", "true"));
   bannerProperties.order = Integer.parseInt(properties.getProperty("order", "2"));
   bannerProperties.useFeatureInduction = Boolean.parseBoolean(properties.getProperty("useFeatureInduction", "false"));
   bannerProperties.textDirection = TextDirection.valueOf(properties.getProperty("textDirection", "Forward"));
   String dictionaryFileName = properties.getProperty("dictionary");
   if (dictionaryFileName != null)
   {
     // FIXME This is a temporary hack
     DictionaryTagger dictTagger = new GeneDictionaryTagger(bannerProperties.tokenizer, true);
     FileReader reader = new FileReader(dictionaryFileName);
     dictTagger.add(reader, MentionType.getType("GENE"));
     reader.close();
     System.out.println("Dict size - " + dictTagger.size());
     bannerProperties.preTagger = dictTagger;
   }
   bannerProperties.regexFilename = properties.getProperty("regexFilename");

   return bannerProperties;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:43,代码来源:BannerProperties.java

示例8: getLemmatiser

import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
/**
 * @return The lemmatiser ({@link EngLemmatiser}) to use for training and
 *         tagging
 */
public EngLemmatiser getLemmatiser()
{
	return lemmatiser;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:9,代码来源:BannerProperties.java

示例9: EnglishLemmatisationFilter

import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
public EnglishLemmatisationFilter(EngLemmatiser dragontoolLemmatiser, TokenStream input) {
    super(input);
    lemmatiser = new Lemmatiser(dragontoolLemmatiser);
}
 
开发者ID:ziqizhang,项目名称:jate,代码行数:5,代码来源:EnglishLemmatisationFilter.java

示例10: main

import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
public static void main(String[] args) throws IOException, JATEException, ParseException {
        if (args == null || args.length < 4) {
            StringBuilder sb = new StringBuilder("Usage:\n");
            sb.append("java -cp 'jate.jar' ").append(Scorer.class.getName()).append(" ")
                    .append("[CORPUS_NAME] [ATE_OUTPUT_DIR] [ATE_OUTPUT_FILE_TYPE] ").append("\n\n");
            sb.append("Example: java -cp 'jate.jar' /c/jate/outputDir/ csv genia_eval.csv \n\n");
            sb.append("[OPTIONS]:\n")
                    .append("\t\targs[0]:\t\t 'genia', 'aclrdtec1' or any other dataset name.\n")
                    .append("\t\targs[1]:\t\t ATE algorithms output folder that contains one or more ranked term candidates output.\n")
                    .append("\t\targs[2]:\t\t ATE algorithms output file type. Two options are 'csv' and 'json'. If file type is 'csv', it should contain a header row. \n")
                    .append("\t\targs[3]:\t\t A file name & path to save evaluation output (should not be the same folder of ATE algorithm output.\n");

            System.out.println(sb);
            System.exit(-1);
        }
        String workingDir = System.getProperty("user.dir");
        Lemmatiser lemmatiser = new Lemmatiser(new EngLemmatiser(
                Paths.get(workingDir, "src", "test", "resource", "lemmatiser").toString(), false, false
        ));
        Path GENIA_CORPUS_CONCEPT_FILE = Paths.get(workingDir, "src", "test", "resource",
                "eval", "GENIA", "concept.txt");
        Path ACL_1_CORPUS_CONCEPT_FILE = Paths.get(workingDir, "src", "test", "resource",
                "eval", "ACL_RD-TEC", "terms.txt");

        String datasetName = args[0];
        String ateOutputFolder = args[1];
        String ateOutputType = args[2];
        String outFile = args[3];
        String gsFile = args[4];

        if (datasetName.equals("genia")) {
            /* gsFile = GENIA_CORPUS_CONCEPT_FILE.toString()*/
            createReportGenia(lemmatiser, ateOutputFolder, ateOutputType,
                    gsFile, outFile,
                    EVAL_CONDITION_IGNORE_SYMBOL, EVAL_CONDITION_IGNORE_DIGITS, EVAL_CONDITION_CASE_INSENSITIVE,
                    EVAL_CONDITION_CHAR_RANGE_MIN, EVAL_CONDITION_CHAR_RANGE_MAX,
                    EVAL_CONDITION_TOKEN_RANGE_MIN, EVAL_CONDITION_TOKEN_RANGE_MAX,
                    EVAL_CONDITION_TOP_N, EVAL_CONDITION_TOP_K, IS_COMPUTE_ATR4S_AvP);
        } else {
            //gsFile = ACL_1_CORPUS_CONCEPT_FILE.toString();
            //DOES NOT REALLY NEED ANY PRUNE/LEMMATISATION FOR ACL RD-TEC 1.0
            // RECOMMENDED EVALUATION OF ACL RD-TEC 1.0 AS FOLLOWS
//            createReportACLRD(null, ateOutputFolder, ateOutputType,
//                    gsFile, outFile,
//                    false, false, false,
//                    -1, -1,
//                    -1, -1,
//                    EVAL_CONDITION_TOP_N, EVAL_CONDITION_TOP_K, IS_COMPUTE_ATR4S_AvP);

            createReportACLRD(lemmatiser, ateOutputFolder, ateOutputType,
                    gsFile, outFile,
                    EVAL_CONDITION_IGNORE_SYMBOL, EVAL_CONDITION_IGNORE_DIGITS, EVAL_CONDITION_CASE_INSENSITIVE,
                    EVAL_CONDITION_CHAR_RANGE_MIN, EVAL_CONDITION_CHAR_RANGE_MAX,
                    EVAL_CONDITION_TOKEN_RANGE_MIN, EVAL_CONDITION_TOKEN_RANGE_MAX,
                    EVAL_CONDITION_TOP_N, EVAL_CONDITION_TOP_K, IS_COMPUTE_ATR4S_AvP);
        }
    }
 
开发者ID:ziqizhang,项目名称:jate,代码行数:58,代码来源:Scorer.java

示例11: load

import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
/**
    * Loads the properties file from the specified filename, and instantiates any objects to be used, such as the lemmatiser and part-of-speech (pos)
    * tagger
    * 
    * @param filename
    * @return An instance of {@link BannerProperties} which can be queried for configuration parameters
    */
public static BannerProperties load(String filename)
{
	
	Properties properties = new Properties();
	BannerProperties bannerProperties = new BannerProperties();
	try {
		properties.load(new FileInputStream(filename));
		String lemmatiserDataDirectory = properties.getProperty("lemmatiserDataDirectory");
		if (lemmatiserDataDirectory != null)
			bannerProperties.lemmatiser = new EngLemmatiser(lemmatiserDataDirectory, false, true);
		String posTaggerDataDirectory = properties.getProperty("posTaggerDataDirectory");
		if (posTaggerDataDirectory != null)
		{
			String posTagger = properties.getProperty("posTagger", HeppleTagger.class.getName());
			if (posTagger.equals(HeppleTagger.class.getName()))
				bannerProperties.posTagger = new HeppleTagger(posTaggerDataDirectory);
			else if (posTagger.equals(MedPostTagger.class.getName()))
				bannerProperties.posTagger = new MedPostTagger(posTaggerDataDirectory);
			else
				throw new IllegalArgumentException("Unknown POS tagger type: " + posTagger);
		}
		String tokenizer = properties.getProperty("tokenizer", SimpleTokenizer.class.getName());
		if (tokenizer.equals(NaiveTokenizer.class.getName()))
			bannerProperties.tokenizer = new NaiveTokenizer();
		else if (tokenizer.equals(SimpleTokenizer.class.getName()))
			bannerProperties.tokenizer = new SimpleTokenizer();
		else if (tokenizer.equals(BaseTokenizer.class.getName()))
			bannerProperties.tokenizer = new BaseTokenizer();
		else
			throw new IllegalArgumentException("Unknown tokenizer type: " + tokenizer);
		bannerProperties.tagFormat = TagFormat.valueOf(properties.getProperty("tagFormat", "IOB"));
		if (Boolean.parseBoolean(properties.getProperty("useParenthesisPostProcessing", "true")))
			bannerProperties.postProcessor = new ParenthesisPostProcessor();
		bannerProperties.useNumericNormalization = Boolean.parseBoolean(properties.getProperty("useNumericNormalization", "true"));
		bannerProperties.order = Integer.parseInt(properties.getProperty("order", "2"));
		bannerProperties.useFeatureInduction = Boolean.parseBoolean(properties.getProperty("useFeatureInduction", "false"));
		bannerProperties.textDirection = TextDirection.valueOf(properties.getProperty("textDirection", "Forward"));
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
	return bannerProperties;
}
 
开发者ID:leebird,项目名称:legonlp,代码行数:50,代码来源:BannerProperties.java

示例12: getLemmatiser

import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
/**
    * @return The lemmatiser ({@link EngLemmatiser}) to use for training and tagging
    */
public EngLemmatiser getLemmatiser() {
	return lemmatiser;
}
 
开发者ID:leebird,项目名称:legonlp,代码行数:7,代码来源:BannerProperties.java

示例13: load

import dragon.nlp.tool.lemmatiser.EngLemmatiser; //导入依赖的package包/类
public static BannerProperties load(String filename, String dataroot)
{
	
	Properties properties = new Properties();
	BannerProperties bannerProperties = new BannerProperties();
	try {
		properties.load(new FileInputStream(filename));
		String lemmatiserDataDirectory = properties.getProperty("lemmatiserDataDirectory");
		if (lemmatiserDataDirectory != null)
			bannerProperties.lemmatiser = new EngLemmatiser(dataroot+lemmatiserDataDirectory, false, true);
		String posTaggerDataDirectory = properties.getProperty("posTaggerDataDirectory");
		if (posTaggerDataDirectory != null)
		{
			String posTagger = properties.getProperty("posTagger", HeppleTagger.class.getName());
			if (posTagger.equals(HeppleTagger.class.getName()))
				bannerProperties.posTagger = new HeppleTagger(dataroot+posTaggerDataDirectory);
			else if (posTagger.equals(MedPostTagger.class.getName()))
				bannerProperties.posTagger = new MedPostTagger(dataroot+posTaggerDataDirectory);
			else
				throw new IllegalArgumentException("Unknown POS tagger type: " + posTagger);
		}
		String tokenizer = properties.getProperty("tokenizer", SimpleTokenizer.class.getName());
		if (tokenizer.equals(NaiveTokenizer.class.getName()))
			bannerProperties.tokenizer = new NaiveTokenizer();
		else if (tokenizer.equals(SimpleTokenizer.class.getName()))
			bannerProperties.tokenizer = new SimpleTokenizer();
		else if (tokenizer.equals(BaseTokenizer.class.getName()))
			bannerProperties.tokenizer = new BaseTokenizer();
		else
			throw new IllegalArgumentException("Unknown tokenizer type: " + tokenizer);
		bannerProperties.tagFormat = TagFormat.valueOf(properties.getProperty("tagFormat", "IOB"));
		if (Boolean.parseBoolean(properties.getProperty("useParenthesisPostProcessing", "true")))
			bannerProperties.postProcessor = new ParenthesisPostProcessor();
		bannerProperties.useNumericNormalization = Boolean.parseBoolean(properties.getProperty("useNumericNormalization", "true"));
		bannerProperties.order = Integer.parseInt(properties.getProperty("order", "2"));
		bannerProperties.useFeatureInduction = Boolean.parseBoolean(properties.getProperty("useFeatureInduction", "false"));
		bannerProperties.textDirection = TextDirection.valueOf(properties.getProperty("textDirection", "Forward"));
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
	return bannerProperties;
}
 
开发者ID:BlueBrain,项目名称:bluima,代码行数:43,代码来源:BannerProperties.java


注:本文中的dragon.nlp.tool.lemmatiser.EngLemmatiser类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。