当前位置: 首页>>代码示例>>Java>>正文


Java MaxentTagger类代码示例

本文整理汇总了Java中edu.stanford.nlp.tagger.maxent.MaxentTagger的典型用法代码示例。如果您正苦于以下问题:Java MaxentTagger类的具体用法?Java MaxentTagger怎么用?Java MaxentTagger使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


MaxentTagger类属于edu.stanford.nlp.tagger.maxent包,在下文中一共展示了MaxentTagger类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: doRun

import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
@Override
protected List<Word> doRun(Language language, String sentence) {
    MaxentTagger tagger = taggers.computeIfAbsent(language, lang -> {
        if (lang == EN) {
            return new MaxentTagger("edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger");
        }
        throw new UnsupportedLanguageException(lang);
    });

    PartOfSpeechSet partOfSpeechSet = PartOfSpeechSet.getPOSSet(language);
    List<Word> words = new ArrayList<>();

    List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new StringReader(sentence));
    sentences.forEach(s -> {
        tagger.tagSentence(s).forEach(taggedWord ->
                words.add(new Word(partOfSpeechSet.valueOf(taggedWord.tag()), taggedWord.value())));
    });

    return words;
}
 
开发者ID:Lambda-3,项目名称:Stargraph,代码行数:21,代码来源:CoreNLPAnnotator.java

示例2: wordIsNONAdjective

import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
public boolean wordIsNONAdjective(String word) {

		if (word.endsWith("ic") || word.endsWith("ical"))
			return false;

		List<HasWord> sentence = MaxentTagger.tokenizeText(new StringReader(word)).get(0);

		TaggedWord taggedWord = tagger.tagSentence(sentence).get(0);
		// if (taggedWord.tag().equals("NN") || taggedWord.tag().equals("NNS")
		// || taggedWord.tag().equals("NNP")
		// || taggedWord.tag().equals("NNPS"))

		if (taggedWord.tag().equals("JJ"))
			return false;

		return true;
	}
 
开发者ID:ag-sc,项目名称:JLink,代码行数:18,代码来源:PosTagger.java

示例3: preprocess

import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
public Concept preprocess(Concept c) {

		if (this.tagger == null)
			this.tagger = new MaxentTagger("ext_models/pos_tagger/english-left3words-distsim.tagger");
		if (this.ner == null)
			this.ner = CRFClassifier.getClassifierNoExceptions("ext_models/ner/english.all.3class.distsim.crf.ser.gz");

		List<CoreLabel> words = tokFactory.getTokenizer(new StringReader(c.name)).tokenize();
		tagger.tagCoreLabels(words);
		words = ner.classifySentence(words);
		words = this.addLemmas(words);

		List<PToken> tokens = new ArrayList<PToken>();
		for (CoreLabel word : words) {
			PToken t = new PToken(word.originalText());
			t.pos = word.tag();
			t.neTag = word.get(CoreAnnotations.AnswerAnnotation.class);
			t.lemma = word.get(LemmaAnnotation.class);
			tokens.add(t);
		}
		c.tokenList = tokens;

		return c;
	}
 
开发者ID:UKPLab,项目名称:ijcnlp2017-cmaps,代码行数:25,代码来源:NonUIMAPreprocessor.java

示例4: ExportExamplesToSentences

import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
public ExportExamplesToSentences(String targetFile, String sourceDir, int ngramSize,
        SourceType type, String fileExtension,
        boolean replaceNumbers, boolean toLowerCase,
        boolean stripWords, String tagDelimiter) {
    this.target = targetFile;
    this.source = sourceDir;
    this.ngramSize = ngramSize;
    this.tokenizer = PTBTokenizer.factory();
    this.tagger = new MaxentTagger(MaxentTagger.DEFAULT_JAR_PATH);
    this.type = type;
    this.fileExtension = fileExtension;
    this.replaceNumbers = replaceNumbers;
    this.toLowerCase = toLowerCase;
    this.stripWords = stripWords;
    this.tagDelimiter = tagDelimiter;
}
 
开发者ID:sinantie,项目名称:Generator,代码行数:17,代码来源:ExportExamplesToSentences.java

示例5: AMRServices

import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
private AMRServices(String skolemPredicateBaseName, Type textType,
		String refPredicateBaseName, SpecificationMapping mapping,
		File stanfordModelFile, String opPredicatePrefix,
		LogicalConstant dummyEntity, LogicalConstant nameInstancePredicate,
		Type typingPredicateType, IllinoisNERWrapper namedEntityRecognizer,
		File propBankDir) throws IOException {
	this.opPredicatePrefix = opPredicatePrefix;
	this.dummyEntity = dummyEntity;
	this.nameInstancePredicate = nameInstancePredicate;
	this.typingPredicateType = typingPredicateType;
	this.namedEntityRecognizer = namedEntityRecognizer;
	// Add a lemmatizer that simply returns the lower-cased word.
	this.lemmatizer = new UnionLemmatizer(new WordNetLemmatizer(),
			word -> SetUtils.createSingleton(word.toLowerCase()));
	this.skolemPredicateBaseName = skolemPredicateBaseName;
	this.textType = textType;
	this.refPredicateBaseName = refPredicateBaseName;
	this.mapping = mapping;
	this.tagger = stanfordModelFile == null ? null
			: new MaxentTagger(stanfordModelFile.getAbsolutePath());
	this.propBank = propBankDir == null ? null : new PropBank(propBankDir);
}
 
开发者ID:clic-lab,项目名称:amr,代码行数:23,代码来源:AMRServices.java

示例6: tagList

import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
private Map<String, String> tagList(LinkedList<String> text)
{
	/*
	 * requires: A linked list of strings which are of the selected text not equal to null
	 * modifies: text
	 * effects: It tags the text by its parts of speech and stores the words 
	 * returns: a map with (word, tag) 
	 */
	MaxentTagger tagger = new MaxentTagger("Files/english-left3words-distsim.tagger");
	Map<String, String> mapWordTag = new HashMap<String, String>();
	for(String word: text)
	{
		try{
		mapWordTag.put(word, tagger.tagString(word).split("_")[1]);
		}
		catch(ArrayIndexOutOfBoundsException e){
			
		}
	}
	return mapWordTag;
}
 
开发者ID:rkhatib,项目名称:topotext,代码行数:22,代码来源:PartsOfSpeechImp.java

示例7: tokenize

import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
public ListMatrix<ListMatrix<String>> tokenize(String input) throws Exception {
	ListMatrix<ListMatrix<String>> result = new DefaultListMatrix<ListMatrix<String>>();
	StringReader sr = new StringReader(input);

	List<List<HasWord>> sentences = MaxentTagger.tokenizeText(sr);
	for (List<HasWord> tokSentence : sentences) {
		ListMatrix<String> m = new DefaultListMatrix<String>();

		for (HasWord t : tokSentence) {
			m.add(t.word());
		}
		result.add(m);
	}

	return result;
}
 
开发者ID:jdmp,项目名称:java-data-mining-package,代码行数:17,代码来源:StanfordTokenizer.java

示例8: tokenize

import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
/**
 * Splits the sentence into individual tokens.
 * 
 * @param sentence Input sentence
 * @return Array of tokens
 */
public static String[] tokenize(String sentence) {
	List t = MaxentTagger.tokenizeText(new StringReader(sentence));
	
	List<String> tokens = new ArrayList<String>();
	
	for (int j = 0; j < t.size(); j++) {
		Sentence s1 = (Sentence) t.get(j);
		
		for (int i = 0; i < s1.length(); i++) {
			HasWord w = s1.getHasWord(i);
			tokens.add(w.word());
		}
	}
	
	return (String[]) tokens.toArray(new String[tokens.size()]);
}
 
开发者ID:claritylab,项目名称:lucida,代码行数:23,代码来源:StanfordPosTagger.java

示例9: tagPos

import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
/**
 * Tags the tokens with part of speech
 * 
 * @param tokens Array of token strings
 * @return Part of speech tags
 */
public static String[] tagPos(String[] tokens) {
	Sentence untagged = createSentence(tokens);
	Sentence tagged = MaxentTagger.tagSentence(untagged);
	
	String[] pos = new String[tagged.size()];
	for (int i = 0; i < tagged.size(); i++) {
		HasWord w = (HasWord) tagged.get(i);
		String[] s = w.toString().split("/");
		if (s.length > 1)
			pos[i] = s[s.length - 1];
		else
			pos[i] = "";
	}
	
	return pos;
}
 
开发者ID:claritylab,项目名称:lucida,代码行数:23,代码来源:StanfordPosTagger.java

示例10: posFromFile

import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
/**
 * Runs the Stanford CoreNLP MaxentTagger with the left3words model. Prints
 * the resulting word/tag pair in TSV format.
 * 
 * @param filePath
 *            The file to run the tagger on.
 */
private static void posFromFile(String filePath) {
	URL l3wTagger = StanfordCoreNLPUtility.class.getResource("/models/english-left3words-distsim.tagger");
	MaxentTagger tagger = new MaxentTagger(l3wTagger.toString());
	StringWriter writer = new StringWriter();
	File dataFile = new File(filePath);
	BufferedWriter bw = new BufferedWriter(writer);

	try {
		BufferedReader br = new BufferedReader(new FileReader(dataFile));
		tagger.runTagger(br, bw, "", OutputStyle.TSV);
	} catch (IOException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
	System.out.println(writer.toString());
}
 
开发者ID:Pro-Nouns,项目名称:StanfordCoreNLPUtility,代码行数:24,代码来源:StanfordCoreNLPUtility.java

示例11: main

import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
  if (args.length != 2) {
    System.err.println("usage: java TaggerDemo modelFile fileToTag");
    return;
  }
  MaxentTagger tagger = new MaxentTagger(args[0]);
  TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
							   "untokenizable=noneKeep");
  BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
  PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
  DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
  documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
  for (List<HasWord> sentence : documentPreprocessor) {
    List<TaggedWord> tSentence = tagger.tagSentence(sentence);
    pw.println(Sentence.listToString(tSentence, false));
  }
  pw.close();
}
 
开发者ID:jaimeguzman,项目名称:data_mining,代码行数:19,代码来源:TaggerDemo2.java

示例12: main

import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
/**
 * @param args
 */
public static void main(String[] args) throws Exception{
	// TODO Auto-generated method stub

	
	  if (args.length != 2) {
	      System.err.println("usage: java TaggerDemo modelFile fileToTag");
	      return;
	    }
	    MaxentTagger tagger = new MaxentTagger(args[0] );
	    List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader(args[1])));
	    for (List<HasWord> sentence : sentences) {
	    ArrayList<TaggedWord> tSentence = tagger.tagSentence(sentence);
	    System.out.println(Sentence.listToString(tSentence, false));

	    
	    }
	  
	
}
 
开发者ID:jaimeguzman,项目名称:data_mining,代码行数:23,代码来源:testPostagger.java

示例13: TypeClassifier

import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
public TypeClassifier() {
	pipeline = new AnnotationPipeline();
	classifier = new Classifier();
	featureSet = new FeatureSet(new MaxentTagger(
			"de.uni_mannheim.informatik.dws.winter.webtables.detectors.tabletypeclassifier\\english-left3words-distsim.tagger"));
	initialize();
}
 
开发者ID:olehmberg,项目名称:winter,代码行数:8,代码来源:TypeClassifier.java

示例14: tag

import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
public Vector<ArrayList<TaggedWord>> tag(String input) {
    Vector<ArrayList<TaggedWord>> returnVector = new Vector<ArrayList<TaggedWord>>();
    List<List<HasWord>> sentences = MaxentTagger
            .tokenizeText(new BufferedReader(new StringReader(input)));
    for (List<? extends HasWord> sentence : sentences) {
        returnVector.add(tagger.tagSentence(sentence));
    }
    return returnVector;
}
 
开发者ID:jatecs,项目名称:jatecs,代码行数:10,代码来源:StanfordPOSTagger.java

示例15: main

import edu.stanford.nlp.tagger.maxent.MaxentTagger; //导入依赖的package包/类
public static void main(String[] args) {
	String modelPath = DependencyParser.DEFAULT_MODEL;
	String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";

	for (int argIndex = 0; argIndex < args.length;) {
		switch (args[argIndex]) {
		case "-tagger":
			taggerPath = args[argIndex + 1];
			argIndex += 2;
			break;
		case "-com.dukenlidb.nlidb.model":
			modelPath = args[argIndex + 1];
			argIndex += 2;
			break;
		default:
			throw new RuntimeException("Unknown argument " + args[argIndex]);
		}
	}

	String text = "Return authors who have more papers than Bob in VLDB after 2000";

	MaxentTagger tagger = new MaxentTagger(taggerPath);
	DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);

	DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
	for (List<HasWord> sentence : tokenizer) {
		List<TaggedWord> tagged = tagger.tagSentence(sentence);
		GrammaticalStructure gs = parser.predict(tagged);

		// Print typed dependencies
		log.info(gs);
	}
	
}
 
开发者ID:DukeNLIDB,项目名称:NLIDB,代码行数:35,代码来源:ParserDemo.java


注:本文中的edu.stanford.nlp.tagger.maxent.MaxentTagger类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。