本文整理汇总了Java中de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordPosTagger类的典型用法代码示例。如果您正苦于以下问题:Java StanfordPosTagger类的具体用法?Java StanfordPosTagger怎么用?Java StanfordPosTagger使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
StanfordPosTagger类属于de.tudarmstadt.ukp.dkpro.core.stanfordnlp包,在下文中一共展示了StanfordPosTagger类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordPosTagger; //导入依赖的package包/类
public static void main(String[] args) throws UIMAException, IOException {
// read text documents
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(TextReader.class,
TextReader.PARAM_SOURCE_LOCATION, textFolder, TextReader.PARAM_PATTERNS, textPattern,
TextReader.PARAM_LANGUAGE, "en");
// preprocess documents
String[] quoteBegin = { "“", "‘" };
List<String> quoteBeginList = Arrays.asList(quoteBegin);
String[] quoteEnd = { "”", "’" };
List<String> quoteEndList = Arrays.asList(quoteEnd);
AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(StanfordSegmenter.class);
AnalysisEngineDescription pos = AnalysisEngineFactory.createEngineDescription(StanfordPosTagger.class,
StanfordPosTagger.PARAM_QUOTE_BEGIN, quoteBeginList, StanfordPosTagger.PARAM_QUOTE_END, quoteEndList);
AnalysisEngineDescription lemmatizer = AnalysisEngineFactory.createEngineDescription(StanfordLemmatizer.class);
AnalysisEngineDescription stemmer = AnalysisEngineFactory.createEngineDescription(SnowballStemmer.class,
SnowballStemmer.PARAM_LOWER_CASE, true);
AnalysisEngineDescription parser = AnalysisEngineFactory.createEngineDescription(StanfordParser.class,
StanfordParser.PARAM_MODEL_LOCATION, "lib/englishRNN.ser", StanfordParser.PARAM_MODE,
DependenciesMode.CC_PROPAGATED, StanfordPosTagger.PARAM_QUOTE_BEGIN, quoteBeginList,
StanfordPosTagger.PARAM_QUOTE_END, quoteEndList);
// write annotated data to file
AnalysisEngineDescription writer = AnalysisEngineFactory.createEngineDescription(BinaryCasWriter.class,
BinaryCasWriter.PARAM_TARGET_LOCATION, textFolder, BinaryCasWriter.PARAM_STRIP_EXTENSION, false,
BinaryCasWriter.PARAM_FILENAME_EXTENSION, ".bin6", BinaryCasWriter.PARAM_OVERWRITE, true);
// print statistics
AnalysisEngineDescription stat = AnalysisEngineFactory.createEngineDescription(CorpusStatWriter.class);
// run pipeline
SimplePipeline.runPipeline(reader, segmenter, pos, lemmatizer, stemmer, parser, writer, stat);
}
示例2: run
import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordPosTagger; //导入依赖的package包/类
public void run()
throws Exception
{
String language = "en";
String corpora = "src/main/resources/raw/";
String fileSuffix = "*.txt";
CollectionReaderDescription trainReader = CollectionReaderFactory.createReaderDescription(
LineTokenTagReader.class, LineTokenTagReader.PARAM_LANGUAGE, language,
LineTokenTagReader.PARAM_SOURCE_LOCATION, corpora,
LineTokenTagReader.PARAM_PATTERNS, fileSuffix);
FlexTagTrainSaveModel flex = new FlexTagTrainSaveModel(trainReader,
new File(System.getProperty("user.home") + "/Desktop/flexOut"));
if (System.getProperty("DKPRO_HOME") == null) {
flex.setDKProHomeFolder(System.getProperty("user.home") + "/Desktop/");
}
flex.setExperimentName("FlexTest");
flex.setFeatures(TcFeatureFactory.create(LuceneCharacterNGram.class,
LuceneCharacterNGram.PARAM_NGRAM_MIN_N, 2, LuceneCharacterNGram.PARAM_NGRAM_MAX_N,
4, LuceneCharacterNGram.PARAM_NGRAM_USE_TOP_K, 50));
flex.setPreprocessing(
AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class),
AnalysisEngineFactory.createEngineDescription(StanfordPosTagger.class,
StanfordPosTagger.PARAM_LANGUAGE, "en"));
flex.execute();
}
示例3: testSegmentation
import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordPosTagger; //导入依赖的package包/类
@Test
public void testSegmentation() throws ResourceInitializationException, AnalysisEngineProcessException {
SimplePipeline.runPipeline(jcas, desc, AnalysisEngineFactory.createEngineDescription(StanfordPosTagger.class));
assertTrue(JCasUtil.exists(jcas, POS.class));
assertEquals("Liebe", JCasUtil.selectByIndex(jcas, NN.class, 0).getCoveredText());
}
示例4: main
import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordPosTagger; //导入依赖的package包/类
public static void main(String[] args) throws ResourceInitializationException, UIMAException, IOException {
System.setProperty("java.util.logging.config.file", "src/main/resources/logging.properties");
CollectionReaderDescription crd = CollectionReaderFactory.createReaderDescription(TextgridTEIUrlReader.class,
TextgridTEIUrlReader.PARAM_INPUT, "src/main/resources");
SimplePipeline.runPipeline(crd,
/*
* Do segmentation.
*/
D.getWrappedSegmenterDescription(LanguageToolSegmenter.class),
createEngineDescription(FigureReferenceAnnotator.class),
createEngineDescription(SpeakerIdentifier.class, SpeakerIdentifier.PARAM_CREATE_SPEAKER_FIGURE, true),
/*
* standard NLP components. This works because dkpro only sees
* tokens and sentences. The segmenter creates those only for
* the figure speech (and not for stage directions)
*/
createEngineDescription(StanfordPosTagger.class),
createEngineDescription(StanfordNamedEntityRecognizer.class),
createEngineDescription(FigureMentionDetection.class),
/*
* Extract copresence network
*/
createEngineDescription(NetworkExtractor.class),
/*
* extract mention network
*/
createEngineDescription(NetworkExtractor.class, NetworkExtractor.PARAM_VIEW_NAME, "MentionNetwork",
NetworkExtractor.PARAM_NETWORK_TYPE, "MentionNetwork"),
/*
* print xmi
*/
createEngineDescription(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, "target/xmi/"));
}
示例5: main
import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordPosTagger; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
CollectionReaderDescription reader = createReaderDescription(PythagorasCorpusReader.class,
PythagorasCorpusReader.PARAM_SOURCE_LOCATION, RESOURCES_PATH,
PythagorasCorpusReader.PARAM_LANGUAGE, "de",
PythagorasCorpusReader.PARAM_PATTERNS, PythagorasCorpusReader.INCLUDE_PREFIX + "**/*.docx");
AnalysisEngineDescription tfIdfConsumer = createEngineDescription(
createEngineDescription(BreakIteratorSegmenter.class),
createEngineDescription(StanfordPosTagger.class,
StanfordPosTagger.PARAM_LANGUAGE, "de"),
createEngineDescription(StanfordLemmatizer.class),
createEngineDescription(TfidfConsumer.class,
TfidfConsumer.PARAM_FEATURE_PATH, Lemma.class,
TfidfConsumer.PARAM_TARGET_LOCATION, OUTPUT_PATH,
TfidfConsumer.PARAM_LOWERCASE, true));
SimplePipeline.runPipeline(reader, tfIdfConsumer);
//The following output is a test to check whether the consumer has built a correct model.
DfModel dfModel = TfidfUtils.getDfModel(OUTPUT_PATH);
System.out.println("Testing newly created model..." + "\n" +
"Very probable token: " + dfModel.getDf("ist") + "\n" +
"Impossible token: " + dfModel.getDf("impossibleToken") + "\n" +
"Num of Documents: " + dfModel.getDocumentCount() + "\n" +
"Feature Path: " + dfModel.getFeaturePath() + "\n" +
"Lowercase?: " + dfModel.getLowercase()
);
}
示例6: main
import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordPosTagger; //导入依赖的package包/类
public static void main(String[] args) throws UIMAException, IOException {
Logger.getRootLogger().setLevel(Level.INFO);
// 0) parameter
if (args.length > 0)
textFolder = args[0];
// 1) read text documents
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(TextReader.class,
TextReader.PARAM_SOURCE_LOCATION, textFolder, TextReader.PARAM_PATTERNS, textPattern,
TextReader.PARAM_LANGUAGE, "en");
// 2) process documents
String[] quoteBegin = { "“", "‘" };
List<String> quoteBeginList = Arrays.asList(quoteBegin);
String[] quoteEnd = { "”", "’" };
List<String> quoteEndList = Arrays.asList(quoteEnd);
// tokenization and sentence splitting
AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(StanfordSegmenter.class,
StanfordSegmenter.PARAM_NEWLINE_IS_SENTENCE_BREAK, "ALWAYS");
// part-of-speech tagging
AnalysisEngineDescription pos = AnalysisEngineFactory.createEngineDescription(StanfordPosTagger.class,
StanfordPosTagger.PARAM_QUOTE_BEGIN, quoteBeginList, StanfordPosTagger.PARAM_QUOTE_END, quoteEndList);
// lemmatizing
AnalysisEngineDescription lemmatizer = AnalysisEngineFactory.createEngineDescription(StanfordLemmatizer.class,
StanfordLemmatizer.PARAM_QUOTE_BEGIN, quoteBeginList, StanfordLemmatizer.PARAM_QUOTE_END, quoteEndList);
// named entity recognition
AnalysisEngineDescription ner = AnalysisEngineFactory.createEngineDescription(
StanfordNamedEntityRecognizer.class, StanfordNamedEntityRecognizer.PARAM_QUOTE_BEGIN, quoteBeginList,
StanfordNamedEntityRecognizer.PARAM_QUOTE_END, quoteEndList);
// constituency parsing and dependency conversion
AnalysisEngineDescription parser = AnalysisEngineFactory.createEngineDescription(StanfordParser.class,
StanfordParser.PARAM_QUOTE_BEGIN, quoteBeginList, StanfordParser.PARAM_QUOTE_END, quoteEndList,
StanfordParser.PARAM_MODE, DependenciesMode.CC_PROPAGATED);
// coreference resolution
AnalysisEngineDescription coref = AnalysisEngineFactory.createEngineDescription();
// 3) write annotated data to file
AnalysisEngineDescription writer = AnalysisEngineFactory.createEngineDescription(BinaryCasWriter.class,
BinaryCasWriter.PARAM_TARGET_LOCATION, textFolder, BinaryCasWriter.PARAM_STRIP_EXTENSION, false,
BinaryCasWriter.PARAM_FILENAME_EXTENSION, ".bin6", BinaryCasWriter.PARAM_OVERWRITE, true);
// print statistics
AnalysisEngineDescription stat = AnalysisEngineFactory.createEngineDescription(CorpusStatWriter.class);
// 4) run pipeline
SimplePipeline.runPipeline(reader, segmenter, pos, lemmatizer, ner, parser, coref, writer, stat);
}