当前位置: 首页>>代码示例>>Java>>正文


Java PlainTextByLineStream类代码示例

本文整理汇总了Java中opennlp.tools.util.PlainTextByLineStream的典型用法代码示例。如果您正苦于以下问题:Java PlainTextByLineStream类的具体用法?Java PlainTextByLineStream怎么用?Java PlainTextByLineStream使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


PlainTextByLineStream类属于opennlp.tools.util包,在下文中一共展示了PlainTextByLineStream类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: buildModel

import opennlp.tools.util.PlainTextByLineStream; //导入依赖的package包/类
@Override
public void buildModel(String entityType) {
  try {
    System.out.println("\tBuilding Model using " + annotatedSentences.size() + " annotations");
    System.out.println("\t\treading training data...");
    Charset charset = Charset.forName("UTF-8");
    ObjectStream<String> lineStream =
            new PlainTextByLineStream(new MarkableFileInputStreamFactory(params.getAnnotatedTrainingDataFile()), charset);
    ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream);

    TokenNameFinderModel model;
    model = NameFinderME.train("en", entityType, sampleStream, null);
    sampleStream.close();
    OutputStream modelOut = new BufferedOutputStream(new FileOutputStream(params.getModelFile()));
    model.serialize(modelOut);
    if (modelOut != null) {
      modelOut.close();
    }
    System.out.println("\tmodel generated");
  } catch (Exception e) {
  }
}
 
开发者ID:apache,项目名称:opennlp-addons,代码行数:23,代码来源:GenericModelableImpl.java

示例2: getNLPModel

import opennlp.tools.util.PlainTextByLineStream; //导入依赖的package包/类
public static DoccatModel getNLPModel(File openNLPTraining) throws IOException {
	DoccatModel model = null;

	FeatureGenerator[] def = { new BagOfWordsFeatureGenerator() };
	WhitespaceTokenizer tokenizer = WhitespaceTokenizer.INSTANCE;

	DoccatFactory factory = new DoccatFactory(tokenizer, def);
	InputStreamFactory isf = new MarkableFileInputStreamFactory(openNLPTraining);
	ObjectStream<String> lineStream = new PlainTextByLineStream(isf, "UTF-8");
	ObjectStream<DocumentSample> sampleStream = new DocumentSampleStream(lineStream);

	TrainingParameters params = TrainingParameters.defaultParams();
	System.out.println(params.algorithm());
	params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(0));
	params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(4000));

	model = DocumentCategorizerME.train("en", sampleStream, params, factory);
	
	evaluateDoccatModel(model, openNLPTraining);

	return model;

}
 
开发者ID:SOBotics,项目名称:SOCVFinder,代码行数:24,代码来源:ModelCreator.java

示例3: trainSentences

import opennlp.tools.util.PlainTextByLineStream; //导入依赖的package包/类
public static void trainSentences(final String inResource, String outFile) throws IOException {
    InputStreamFactory inputStreamFactory = new InputStreamFactory() {
        @Override
        public InputStream createInputStream() throws IOException {
            return Trainer.class.getResourceAsStream(inResource);
        }
    };
    SentenceSampleStream samples = new SentenceSampleStream(new PlainTextByLineStream(inputStreamFactory, StandardCharsets.UTF_8));
    TrainingParameters trainingParameters = new TrainingParameters();
    trainingParameters.put(TrainingParameters.ALGORITHM_PARAM, ModelType.MAXENT.name());
    trainingParameters.put(TrainingParameters.ITERATIONS_PARAM, "100");
    trainingParameters.put(TrainingParameters.CUTOFF_PARAM, "0");
    SentenceDetectorFactory sentenceDetectorFactory = SentenceDetectorFactory.create(null, "en", true, null, ".?!".toCharArray());
    SentenceModel sentdetectModel = SentenceDetectorME.train("en", samples, sentenceDetectorFactory, trainingParameters);
    //.train("en", samples, true, null, 100, 0);
    samples.close();
    FileOutputStream out = new FileOutputStream(outFile);
    sentdetectModel.serialize(out);
    out.close();
}
 
开发者ID:jprante,项目名称:elasticsearch-analysis-opennlp,代码行数:21,代码来源:Trainer.java

示例4: trainChunker

import opennlp.tools.util.PlainTextByLineStream; //导入依赖的package包/类
public static void trainChunker(final String inResource, String outFile) throws IOException {
    InputStreamFactory inputStreamFactory = new InputStreamFactory() {
        @Override
        public InputStream createInputStream() throws IOException {
            return Trainer.class.getResourceAsStream(inResource);
        }
    };
    ChunkSampleStream samples = new ChunkSampleStream(new PlainTextByLineStream(inputStreamFactory, StandardCharsets.UTF_8));
    TrainingParameters trainingParameters = new TrainingParameters();
    trainingParameters.put(TrainingParameters.ITERATIONS_PARAM, "70");
    trainingParameters.put(TrainingParameters.CUTOFF_PARAM, "1");

    ChunkerFactory chunkerFactory = ChunkerFactory.create(null);
    ChunkerModel model = ChunkerME.train("en", samples, trainingParameters, chunkerFactory);
    //ChunkerME.train("en", samples, 1, 70);
    samples.close();
    FileOutputStream out = new FileOutputStream(outFile);
    model.serialize(out);
    out.close();
}
 
开发者ID:jprante,项目名称:elasticsearch-analysis-opennlp,代码行数:21,代码来源:Trainer.java

示例5: trainNameFinder

import opennlp.tools.util.PlainTextByLineStream; //导入依赖的package包/类
public static void trainNameFinder(final String inResource, String outFile) throws IOException {
    InputStreamFactory inputStreamFactory = new InputStreamFactory() {
        @Override
        public InputStream createInputStream() throws IOException {
            return Trainer.class.getResourceAsStream(inResource);
        }
    };
    InputStream in = Trainer.class.getResourceAsStream(inResource);
    NameSampleDataStream samples = new NameSampleDataStream(new PlainTextByLineStream(inputStreamFactory, StandardCharsets.UTF_8));
    TrainingParameters trainingParameters = new TrainingParameters();
    trainingParameters.put(TrainingParameters.ITERATIONS_PARAM, "5");
    trainingParameters.put(TrainingParameters.CUTOFF_PARAM, "200");
    byte[] featureGeneratorBytes = null;
    Map<String, Object> resources = Collections.<String, Object>emptyMap();
    SequenceCodec<String> seqCodec = new BioCodec();
    TokenNameFinderFactory tokenNameFinderFactory = TokenNameFinderFactory.create(null, featureGeneratorBytes, resources, seqCodec);
    TokenNameFinderModel model = NameFinderME.train("en", "person", samples, trainingParameters, tokenNameFinderFactory);
    //NameFinderME.train("en", "person", samples, Collections.<String, Object>emptyMap(), 200, 5);
    samples.close();
    FileOutputStream out = new FileOutputStream(outFile);
    model.serialize(out);
    out.close();
}
 
开发者ID:jprante,项目名称:elasticsearch-analysis-opennlp,代码行数:24,代码来源:Trainer.java

示例6: openSampleData

import opennlp.tools.util.PlainTextByLineStream; //导入依赖的package包/类
static ObjectStream<POSSample> openSampleData(String sampleDataName, File sampleDataFile, Charset encoding) {
    CmdLineUtil.checkInputFile(sampleDataName + " Data", sampleDataFile);
    FileInputStream sampleDataIn = CmdLineUtil.openInFile(sampleDataFile);
    ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(), encoding);
    return new WordTagSampleStream(lineStream);
}
 
开发者ID:radsimu,项目名称:UaicNlpToolkit,代码行数:7,代码来源:POStrainer.java

示例7: evaluateDoccatModel

import opennlp.tools.util.PlainTextByLineStream; //导入依赖的package包/类
public static void evaluateDoccatModel(DoccatModel model,File openNLPTraining) throws IOException{
	InputStreamFactory isf = new MarkableFileInputStreamFactory(openNLPTraining);
	ObjectStream<String> lineStream = new PlainTextByLineStream(isf, "UTF-8");
	ObjectStream<DocumentSample> sampleStream = new DocumentSampleStream(lineStream);

	List<EvaluationMonitor<DocumentSample>> listeners = new LinkedList<EvaluationMonitor<DocumentSample>>();
    listeners.add(new DoccatEvaluationErrorListener());
    listeners.add(new DoccatFineGrainedReportListener());
    
	DocumentCategorizerEvaluator eval = new  DocumentCategorizerEvaluator(new DocumentCategorizerME(model),listeners.toArray(new DoccatEvaluationMonitor[listeners.size()]));
	eval.evaluate(sampleStream);
	System.out.println(eval);
	
}
 
开发者ID:SOBotics,项目名称:SOCVFinder,代码行数:15,代码来源:ModelCreator.java

示例8: run

import opennlp.tools.util.PlainTextByLineStream; //导入依赖的package包/类
@PostConstruct
@SuppressWarnings("deprecation")
public void run() {
	
	DoccatModel model = null;
	OutputStream modelOut = null;
	
	try {
		
		// Ensinando a máquina
		InputStreamFactory dataIn = new MarkableFileInputStreamFactory(Paths.get(train).toFile());
		ObjectStream<String> lineStream = new PlainTextByLineStream(dataIn, "UTF-8");
		ObjectStream<DocumentSample> sampleStream = new DocumentSampleStream(lineStream);
		model = DocumentCategorizerME.train("pt", sampleStream);
		
		// Escrevendo arquivo que ela aprendeu
		modelOut = new BufferedOutputStream(new FileOutputStream(Paths.get(bin).toFile()));
		model.serialize(modelOut);
		
	} catch (IOException e) {
		LOGGER.error(ExceptionUtils.getStackTrace(e));
	} finally {
		if (Objects.nonNull(modelOut)) {
			closeOutputStream(modelOut);
		}
	}
	
}
 
开发者ID:sjcdigital,项目名称:temis-api,代码行数:29,代码来源:Train.java

示例9: train

import opennlp.tools.util.PlainTextByLineStream; //导入依赖的package包/类
public static void  train(String file_train, String file_model) throws IOException {
	DoccatModel model = null;
	ObjectStream<String> lineStream =
			new PlainTextByLineStream(new MarkableFileInputStreamFactory(
					new File(file_train)), "UTF-8");
	ObjectStream<DocumentSample> sampleStream =
			new DocumentSampleStream(lineStream);

	TrainingParameters param = TrainingParameters.defaultParams();
	DoccatFactory factory = new DoccatFactory();
	model = DocumentCategorizerME.train("en", sampleStream,param,factory);

	model.serialize(new FileOutputStream(file_model));
}
 
开发者ID:jackeylu,项目名称:NLP_with_Java_zh,代码行数:15,代码来源:SentenceTest.java

示例10: main

import opennlp.tools.util.PlainTextByLineStream; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
	SentenceModel model;
	
	
	Charset charset = Charset.forName("UTF-8");
	InputStreamFactory isf = new MarkableFileInputStreamFactory(new File("model/openNPLTraining.txt"));
	ObjectStream<String> lineStream =
	  new PlainTextByLineStream(isf, charset);
	ObjectStream<SentenceSample> sampleStream = new SentenceSampleStream(lineStream);

	try {
		Dictionary dict = new Dictionary(new FileInputStream(new File("ini/stop_words.txt")));
		SentenceDetectorFactory sdf = new SentenceDetectorFactory("en",true,dict,null);
		TrainingParameters params = TrainingParameters.defaultParams();
		model = SentenceDetectorME.train("en", sampleStream, sdf,params);
	}
	finally {
	  sampleStream.close();
	}
	
	
	
	System.out.println("done");

}
 
开发者ID:SOBotics,项目名称:SOCVFinder,代码行数:26,代码来源:SentanceDetector.java

示例11: trainTokenizer

import opennlp.tools.util.PlainTextByLineStream; //导入依赖的package包/类
public static void trainTokenizer(final String inResource, String outFile) throws IOException {
    InputStreamFactory inputStreamFactory = new InputStreamFactory() {
        @Override
        public InputStream createInputStream() throws IOException {
            return Trainer.class.getResourceAsStream(inResource);
        }
    };
    ObjectStream<TokenSample> samples = new TokenSampleStream(new PlainTextByLineStream(inputStreamFactory, StandardCharsets.UTF_8));
    TrainingParameters trainingParameters = new TrainingParameters();
    trainingParameters.put(TrainingParameters.ITERATIONS_PARAM, "100");
    trainingParameters.put(TrainingParameters.CUTOFF_PARAM, "5");
    String subclassname = null;
    String langcode = "en";
    Dictionary dict = null;
    Pattern alphanumericpattern = null;

    opennlp.tools.tokenize.TokenizerFactory tokenizerFactory = TokenizerFactory.create(subclassname, langcode, dict, true, alphanumericpattern);
    TokenizerModel model = TokenizerME.train(samples, tokenizerFactory, trainingParameters);
    //TokenizerME.train("en", samples, true, 5, 100);
    samples.close();
    FileOutputStream out = new FileOutputStream(outFile);
    model.serialize(out);
    out.close();
}
 
开发者ID:jprante,项目名称:elasticsearch-analysis-opennlp,代码行数:25,代码来源:Trainer.java

示例12: trainPOS

import opennlp.tools.util.PlainTextByLineStream; //导入依赖的package包/类
public static void trainPOS(final String inResource, String outFile) throws IOException {
    InputStreamFactory inputStreamFactory = new InputStreamFactory() {
        @Override
        public InputStream createInputStream() throws IOException {
            return Trainer.class.getResourceAsStream(inResource);
        }
    };
    WordTagSampleStream samples = new WordTagSampleStream(new PlainTextByLineStream(inputStreamFactory, StandardCharsets.UTF_8));
    TrainingParameters trainingParameters = new TrainingParameters();
    trainingParameters.put(TrainingParameters.ALGORITHM_PARAM, ModelType.MAXENT.name());
    trainingParameters.put(TrainingParameters.ITERATIONS_PARAM, "100");
    trainingParameters.put(TrainingParameters.CUTOFF_PARAM, "5");
    Dictionary ngramDictionary = null;
    POSDictionary posDictionary = null;
    POSTaggerFactory posTaggerFactory = POSTaggerFactory.create(null, ngramDictionary, posDictionary);
    POSModel model = POSTaggerME.train("en", samples, trainingParameters, posTaggerFactory);
    //POSTaggerME.train("en", samples, ModelType.MAXENT, null, null, 5, 100);
    samples.close();
    FileOutputStream out = new FileOutputStream(outFile);
    model.serialize(out);
    out.close();
}
 
开发者ID:jprante,项目名称:elasticsearch-analysis-opennlp,代码行数:23,代码来源:Trainer.java


注:本文中的opennlp.tools.util.PlainTextByLineStream类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。