本文整理汇总了Java中edu.stanford.nlp.tagger.maxent.MaxentTagger类的典型用法代码示例。如果您正苦于以下问题:Java MaxentTagger类的具体用法?Java MaxentTagger怎么用?Java MaxentTagger使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
MaxentTagger类属于edu.stanford.nlp.tagger.maxent包,在下文中一共展示了MaxentTagger类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: doRun
import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
@Override
protected List<Word> doRun(Language language, String sentence) {
MaxentTagger tagger = taggers.computeIfAbsent(language, lang -> {
if (lang == EN) {
return new MaxentTagger("edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger");
}
throw new UnsupportedLanguageException(lang);
});
PartOfSpeechSet partOfSpeechSet = PartOfSpeechSet.getPOSSet(language);
List<Word> words = new ArrayList<>();
List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new StringReader(sentence));
sentences.forEach(s -> {
tagger.tagSentence(s).forEach(taggedWord ->
words.add(new Word(partOfSpeechSet.valueOf(taggedWord.tag()), taggedWord.value())));
});
return words;
}
示例2: wordIsNONAdjective
import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
public boolean wordIsNONAdjective(String word) {
if (word.endsWith("ic") || word.endsWith("ical"))
return false;
List<HasWord> sentence = MaxentTagger.tokenizeText(new StringReader(word)).get(0);
TaggedWord taggedWord = tagger.tagSentence(sentence).get(0);
// if (taggedWord.tag().equals("NN") || taggedWord.tag().equals("NNS")
// || taggedWord.tag().equals("NNP")
// || taggedWord.tag().equals("NNPS"))
if (taggedWord.tag().equals("JJ"))
return false;
return true;
}
示例3: preprocess
import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
public Concept preprocess(Concept c) {
if (this.tagger == null)
this.tagger = new MaxentTagger("ext_models/pos_tagger/english-left3words-distsim.tagger");
if (this.ner == null)
this.ner = CRFClassifier.getClassifierNoExceptions("ext_models/ner/english.all.3class.distsim.crf.ser.gz");
List<CoreLabel> words = tokFactory.getTokenizer(new StringReader(c.name)).tokenize();
tagger.tagCoreLabels(words);
words = ner.classifySentence(words);
words = this.addLemmas(words);
List<PToken> tokens = new ArrayList<PToken>();
for (CoreLabel word : words) {
PToken t = new PToken(word.originalText());
t.pos = word.tag();
t.neTag = word.get(CoreAnnotations.AnswerAnnotation.class);
t.lemma = word.get(LemmaAnnotation.class);
tokens.add(t);
}
c.tokenList = tokens;
return c;
}
示例4: ExportExamplesToSentences
import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
public ExportExamplesToSentences(String targetFile, String sourceDir, int ngramSize,
SourceType type, String fileExtension,
boolean replaceNumbers, boolean toLowerCase,
boolean stripWords, String tagDelimiter) {
this.target = targetFile;
this.source = sourceDir;
this.ngramSize = ngramSize;
this.tokenizer = PTBTokenizer.factory();
this.tagger = new MaxentTagger(MaxentTagger.DEFAULT_JAR_PATH);
this.type = type;
this.fileExtension = fileExtension;
this.replaceNumbers = replaceNumbers;
this.toLowerCase = toLowerCase;
this.stripWords = stripWords;
this.tagDelimiter = tagDelimiter;
}
示例5: AMRServices
import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
private AMRServices(String skolemPredicateBaseName, Type textType,
String refPredicateBaseName, SpecificationMapping mapping,
File stanfordModelFile, String opPredicatePrefix,
LogicalConstant dummyEntity, LogicalConstant nameInstancePredicate,
Type typingPredicateType, IllinoisNERWrapper namedEntityRecognizer,
File propBankDir) throws IOException {
this.opPredicatePrefix = opPredicatePrefix;
this.dummyEntity = dummyEntity;
this.nameInstancePredicate = nameInstancePredicate;
this.typingPredicateType = typingPredicateType;
this.namedEntityRecognizer = namedEntityRecognizer;
// Add a lemmatizer that simply returns the lower-cased word.
this.lemmatizer = new UnionLemmatizer(new WordNetLemmatizer(),
word -> SetUtils.createSingleton(word.toLowerCase()));
this.skolemPredicateBaseName = skolemPredicateBaseName;
this.textType = textType;
this.refPredicateBaseName = refPredicateBaseName;
this.mapping = mapping;
this.tagger = stanfordModelFile == null ? null
: new MaxentTagger(stanfordModelFile.getAbsolutePath());
this.propBank = propBankDir == null ? null : new PropBank(propBankDir);
}
示例6: tagList
import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
private Map<String, String> tagList(LinkedList<String> text)
{
/*
* requires: A linked list of strings which are of the selected text not equal to null
* modifies: text
* effects: It tags the text by its parts of speech and stores the words
* returns: a map with (word, tag)
*/
MaxentTagger tagger = new MaxentTagger("Files/english-left3words-distsim.tagger");
Map<String, String> mapWordTag = new HashMap<String, String>();
for(String word: text)
{
try{
mapWordTag.put(word, tagger.tagString(word).split("_")[1]);
}
catch(ArrayIndexOutOfBoundsException e){
}
}
return mapWordTag;
}
示例7: tokenize
import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
public ListMatrix<ListMatrix<String>> tokenize(String input) throws Exception {
ListMatrix<ListMatrix<String>> result = new DefaultListMatrix<ListMatrix<String>>();
StringReader sr = new StringReader(input);
List<List<HasWord>> sentences = MaxentTagger.tokenizeText(sr);
for (List<HasWord> tokSentence : sentences) {
ListMatrix<String> m = new DefaultListMatrix<String>();
for (HasWord t : tokSentence) {
m.add(t.word());
}
result.add(m);
}
return result;
}
示例8: tokenize
import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
/**
* Splits the sentence into individual tokens.
*
* @param sentence Input sentence
* @return Array of tokens
*/
public static String[] tokenize(String sentence) {
List t = MaxentTagger.tokenizeText(new StringReader(sentence));
List<String> tokens = new ArrayList<String>();
for (int j = 0; j < t.size(); j++) {
Sentence s1 = (Sentence) t.get(j);
for (int i = 0; i < s1.length(); i++) {
HasWord w = s1.getHasWord(i);
tokens.add(w.word());
}
}
return (String[]) tokens.toArray(new String[tokens.size()]);
}
示例9: tagPos
import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
/**
* Tags the tokens with part of speech
*
* @param tokens Array of token strings
* @return Part of speech tags
*/
public static String[] tagPos(String[] tokens) {
Sentence untagged = createSentence(tokens);
Sentence tagged = MaxentTagger.tagSentence(untagged);
String[] pos = new String[tagged.size()];
for (int i = 0; i < tagged.size(); i++) {
HasWord w = (HasWord) tagged.get(i);
String[] s = w.toString().split("/");
if (s.length > 1)
pos[i] = s[s.length - 1];
else
pos[i] = "";
}
return pos;
}
示例10: posFromFile
import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
/**
* Runs the Stanford CoreNLP MaxentTagger with the left3words model. Prints
* the resulting word/tag pair in TSV format.
*
* @param filePath
* The file to run the tagger on.
*/
private static void posFromFile(String filePath) {
URL l3wTagger = StanfordCoreNLPUtility.class.getResource("/models/english-left3words-distsim.tagger");
MaxentTagger tagger = new MaxentTagger(l3wTagger.toString());
StringWriter writer = new StringWriter();
File dataFile = new File(filePath);
BufferedWriter bw = new BufferedWriter(writer);
try {
BufferedReader br = new BufferedReader(new FileReader(dataFile));
tagger.runTagger(br, bw, "", OutputStyle.TSV);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println(writer.toString());
}
示例11: main
import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("usage: java TaggerDemo modelFile fileToTag");
return;
}
MaxentTagger tagger = new MaxentTagger(args[0]);
TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
"untokenizable=noneKeep");
BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
for (List<HasWord> sentence : documentPreprocessor) {
List<TaggedWord> tSentence = tagger.tagSentence(sentence);
pw.println(Sentence.listToString(tSentence, false));
}
pw.close();
}
示例12: main
import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
/**
* @param args
*/
public static void main(String[] args) throws Exception{
// TODO Auto-generated method stub
if (args.length != 2) {
System.err.println("usage: java TaggerDemo modelFile fileToTag");
return;
}
MaxentTagger tagger = new MaxentTagger(args[0] );
List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader(args[1])));
for (List<HasWord> sentence : sentences) {
ArrayList<TaggedWord> tSentence = tagger.tagSentence(sentence);
System.out.println(Sentence.listToString(tSentence, false));
}
}
示例13: TypeClassifier
import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
public TypeClassifier() {
pipeline = new AnnotationPipeline();
classifier = new Classifier();
featureSet = new FeatureSet(new MaxentTagger(
"de.uni_mannheim.informatik.dws.winter.webtables.detectors.tabletypeclassifier\\english-left3words-distsim.tagger"));
initialize();
}
示例14: tag
import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
public Vector<ArrayList<TaggedWord>> tag(String input) {
Vector<ArrayList<TaggedWord>> returnVector = new Vector<ArrayList<TaggedWord>>();
List<List<HasWord>> sentences = MaxentTagger
.tokenizeText(new BufferedReader(new StringReader(input)));
for (List<? extends HasWord> sentence : sentences) {
returnVector.add(tagger.tagSentence(sentence));
}
return returnVector;
}
示例15: main
import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
public static void main(String[] args) {
String modelPath = DependencyParser.DEFAULT_MODEL;
String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
for (int argIndex = 0; argIndex < args.length;) {
switch (args[argIndex]) {
case "-tagger":
taggerPath = args[argIndex + 1];
argIndex += 2;
break;
case "-com.dukenlidb.nlidb.model":
modelPath = args[argIndex + 1];
argIndex += 2;
break;
default:
throw new RuntimeException("Unknown argument " + args[argIndex]);
}
}
String text = "Return authors who have more papers than Bob in VLDB after 2000";
MaxentTagger tagger = new MaxentTagger(taggerPath);
DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
for (List<HasWord> sentence : tokenizer) {
List<TaggedWord> tagged = tagger.tagSentence(sentence);
GrammaticalStructure gs = parser.predict(tagged);
// Print typed dependencies
log.info(gs);
}
}