当前位置: 首页>>代码示例>>Java>>正文


Java CoreLabelTokenFactory类代码示例

本文整理汇总了Java中edu.stanford.nlp.process.CoreLabelTokenFactory的典型用法代码示例。如果您正苦于以下问题:Java CoreLabelTokenFactory类的具体用法?Java CoreLabelTokenFactory怎么用?Java CoreLabelTokenFactory使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


CoreLabelTokenFactory类属于edu.stanford.nlp.process包,在下文中一共展示了CoreLabelTokenFactory类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: initialize

import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
/**
 * Initializes the tokenizer to detect date columns.
 */
public void initialize() {
	Properties props = new Properties();
	pipeline.addAnnotator(new TokenizerAnnotator(false) {

		@Override
		public Tokenizer<CoreLabel> getTokenizer(Reader r) {
			// TODO Auto-generated method stub
			return new PTBTokenizer<CoreLabel>(r, new CoreLabelTokenFactory(), "");

		}

	});
	pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
	pipeline.addAnnotator(new POSTaggerAnnotator(false));
	pipeline.addAnnotator(new TimeAnnotator("sutime", props));
}
 
开发者ID:olehmberg,项目名称:winter,代码行数:20,代码来源:TypeClassifier.java

示例2: AbstractSequenceClassifier

import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
/**
 * Initialize the featureFactory and other variables based on the passed in
 * flags.
 *
 * @param flags A specification of the AbstractSequenceClassifier to construct.
 */
public AbstractSequenceClassifier(SeqClassifierFlags flags) {
  this.flags = flags;

  // try {
  this.featureFactory = new MetaClass(flags.featureFactory).createInstance(flags.featureFactoryArgs);
  //   this.featureFactory = (FeatureFactory<IN>) Class.forName(flags.featureFactory).newInstance();
  if (flags.tokenFactory == null) {
    tokenFactory = (CoreTokenFactory<IN>) new CoreLabelTokenFactory();
  } else {
    this.tokenFactory = new MetaClass(flags.tokenFactory).createInstance(flags.tokenFactoryArgs);
  //   this.tokenFactory = (CoreTokenFactory<IN>) Class.forName(flags.tokenFactory).newInstance();
  }
  // } catch (Exception e) {
  //   throw new RuntimeException(e);
  // }
  pad = tokenFactory.makeToken();
  windowSize = flags.maxLeft + 1;
  reinit();
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:26,代码来源:AbstractSequenceClassifier.java

示例3: writeImage

import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public static void writeImage(String sentence, String outFile, int scale) throws Exception {
    
    LexicalizedParser lp = null;
    try {
        lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
    } catch (Exception e) {
        System.err.println("Could not load file englishPCFG.ser.gz. Try placing this file in the same directory as Dependencee.jar");
        return;
    }
    
    lp.setOptionFlags(new String[]{"-maxLength", "500", "-retainTmpSubcategories"});
    TokenizerFactory<CoreLabel> tokenizerFactory =
            PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
    List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(sentence)).tokenize();
    Tree tree = lp.apply(wordList);
    writeImage(tree, outFile, scale);
    
}
 
开发者ID:awaisathar,项目名称:dependensee,代码行数:19,代码来源:Main.java

示例4: testWriteImage

import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
/**
 * Test of writeImage method, of class Main.
 */

@Test
public void testWriteImage() throws Exception {
    String text = "A quick brown fox jumped over the lazy dog.";
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    LexicalizedParser lp = LexicalizedParser.loadModel();
    lp.setOptionFlags(new String[]{"-maxLength", "500", "-retainTmpSubcategories"});
    TokenizerFactory<CoreLabel> tokenizerFactory =
            PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
    List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(text)).tokenize();
    Tree tree = lp.apply(wordList);
    GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
    Collection<TypedDependency> tdl = gs.typedDependenciesCollapsed();
    Main.writeImage(tdl, "image.png", 3);
    assert (new File("image.png").exists());
}
 
开发者ID:awaisathar,项目名称:dependensee,代码行数:21,代码来源:MainTest.java

示例5: main

import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
  if (args.length != 2) {
    System.err.println("usage: java TaggerDemo modelFile fileToTag");
    return;
  }
  MaxentTagger tagger = new MaxentTagger(args[0]);
  TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
							   "untokenizable=noneKeep");
  BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
  PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
  DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
  documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
  for (List<HasWord> sentence : documentPreprocessor) {
    List<TaggedWord> tSentence = tagger.tagSentence(sentence);
    pw.println(Sentence.listToString(tSentence, false));
  }
  pw.close();
}
 
开发者ID:jaimeguzman,项目名称:data_mining,代码行数:19,代码来源:TaggerDemo2.java

示例6: getTokenizer

import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
private static Tokenizer<CoreLabel> getTokenizer(String text, String language) {
    if (language.equals("Characterwise")) {
        return new CharacterwiseTokenizer(text, new CoreLabelTokenFactory());
    } else {
        StringReader reader = new StringReader(text);
        TokenizerAnnotator tok = new TokenizerAnnotator(VERBOSE, language, OPTIONS);
        return tok.getTokenizer(reader);
    }
}
 
开发者ID:annefried,项目名称:swan,代码行数:10,代码来源:TokenizationUtil.java

示例7: produceBagOfWords_Token

import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
/**
	 * Loads document from file and transform it in a token multi-set using stanford PTBTokenizer.
	 * @param documentPath
	 * @return
	 * @throws IOException
	 */
	public HashMultiset<String> produceBagOfWords_Token(String documentPath) throws IOException{ 
		HashMultiset<String>tokenMultiset = HashMultiset.create();
		PTBTokenizer<CoreLabel> ptbt = new PTBTokenizer<>(new FileReader(documentPath),
	              new CoreLabelTokenFactory(), "");
	      while (ptbt.hasNext()) {
	        CoreLabel label = ptbt.next();
	        tokenMultiset.add(label.toString());
//	        System.out.println(label);
	      }
//	      System.out.println("\n\nMULTISET:\n\n");
//	      for (String token: tokenMultiset) System.out.println(token +"	"+ tokenMultiset.count(token));
	      return tokenMultiset;
	}
 
开发者ID:JULIELab,项目名称:JEmAS,代码行数:20,代码来源:File2BagOfWords_Processor.java

示例8: tokenize

import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public String[] tokenize(String s) {
	s = s.replaceAll("\u00ad ", "\u00ad");
	
	PTBTokenizer<CoreLabel> tokenizer = new PTBTokenizer<CoreLabel>(
			new StringReader(s),
			new CoreLabelTokenFactory(),
			"invertible=false,ptb3Escaping=false");
	
	List<CoreLabel> words = tokenizer.tokenize();
	String[] result = new String[words.size()];
	for (int i = 0; i < words.size(); i++)
		result[i] = words.get(i).toString();
	
	return result;
}
 
开发者ID:Scarano,项目名称:Headword,代码行数:16,代码来源:PTBLikeTokenizer.java

示例9: main

import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
  if (args.length != 2) {
    System.err.println("usage: java TaggerDemo2 modelFile fileToTag");
    return;
  }
  MaxentTagger tagger = new MaxentTagger(args[0]);
  TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
							   "untokenizable=noneKeep");
  BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
  PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
  DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
  documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
  for (List<HasWord> sentence : documentPreprocessor) {
    List<TaggedWord> tSentence = tagger.tagSentence(sentence);
    pw.println(Sentence.listToString(tSentence, false));
  }

  // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
  List<HasWord> sent = Sentence.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
  List<TaggedWord> taggedSent = tagger.tagSentence(sent);
  for (TaggedWord tw : taggedSent) {
    if (tw.tag().startsWith("JJ")) {
      pw.println(tw.word());
    }
  }

  pw.close();
}
 
开发者ID:tudarmstadt-lt,项目名称:sentiment,代码行数:29,代码来源:TaggerDemo2.java

示例10: demoAPI

import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
/**
 * demoAPI demonstrates other ways of calling the parser with already
 * tokenized text, or in some cases, raw text that needs to be tokenized as
 * a single sentence. Output is handled with a TreePrint object. Note that
 * the options used when creating the TreePrint can determine what results
 * to print out. Once again, one can capture the output by passing a
 * PrintWriter to TreePrint.printTree.
 * 
 * difference: already tokenized text
 * 
 * 
 */
public static void demoAPI(LexicalizedParser lp) {
	// This option shows parsing a list of correctly tokenized words
	String[] sent = { "This", "is", "an", "easy", "sentence", "." };
	List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent);
	Tree parse = lp.apply(rawWords);
	parse.pennPrint();
	System.out.println();

	// This option shows loading and using an explicit tokenizer
	String sent2 = "Hey @Apple, pretty much all your products are amazing. You blow minds every time you launch a new gizmo."
			+ " that said, your hold music is crap";
	TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(
			new CoreLabelTokenFactory(), "");
	Tokenizer<CoreLabel> tok = tokenizerFactory
			.getTokenizer(new StringReader(sent2));
	List<CoreLabel> rawWords2 = tok.tokenize();
	parse = lp.apply(rawWords2);

	TreebankLanguagePack tlp = new PennTreebankLanguagePack();
	GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
	GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
	List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
	System.out.println(tdl);
	System.out.println();

	// You can also use a TreePrint object to print trees and dependencies
	TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
	tp.printTree(parse);
}
 
开发者ID:opinion-extraction-propagation,项目名称:TASC-Tuples,代码行数:42,代码来源:ParserDemo.java

示例11: tokenizeStanford

import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public static String tokenizeStanford(String line)
{
    StringBuilder str = new StringBuilder();
    Tokenizer<CoreLabel> tokenizer = new PTBTokenizer(new StringReader(line), new CoreLabelTokenFactory(), "asciiQuotes=true untokenizable=allDelete");
    while(tokenizer.hasNext())
    {
        CoreLabel label = tokenizer.next();
        if(!label.toString().matches("``|\'\'|\"|-[LR][RCR]B-"))
            str.append(label).append(" ");
    }
    return str.toString().trim();
}
 
开发者ID:sinantie,项目名称:PLTAG,代码行数:13,代码来源:Utils.java

示例12: MUCMentionExtractor

import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public MUCMentionExtractor(Dictionaries dict, Properties props, Semantics semantics) throws Exception {
  super(dict, semantics);
  String fileName = props.getProperty(Constants.MUC_PROP);
  fileContents = IOUtils.slurpFile(fileName);
  currentOffset = 0;
  tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(false), "");
  stanfordProcessor = loadStanfordProcessor(props);
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:9,代码来源:MUCMentionExtractor.java

示例13: WhitespaceTokenizerAnnotator

import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public WhitespaceTokenizerAnnotator(Properties props) {
  super(false);
  boolean eolIsSignificant =
    Boolean.valueOf(props.getProperty(EOL_PROPERTY, "false"));
  eolIsSignificant =
    (eolIsSignificant ||
     Boolean.valueOf(props.getProperty
                     (StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "false")));
  factory = new WhitespaceTokenizer.WhitespaceTokenizerFactory<CoreLabel>
            (new CoreLabelTokenFactory(), eolIsSignificant);
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:12,代码来源:WhitespaceTokenizerAnnotator.java

示例14: getGraph

import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public static Graph getGraph(String sentence) throws Exception {
    LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
    lp.setOptionFlags(new String[]{"-maxLength", "500", "-retainTmpSubcategories"});
    TokenizerFactory<CoreLabel> tokenizerFactory =
            PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
    List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(sentence)).tokenize();
    Tree tree = lp.apply(wordList);
    GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
    Collection<TypedDependency> tdl = gs.typedDependencies();
    return getGraph(tree, tdl);
}
 
开发者ID:awaisathar,项目名称:dependensee,代码行数:12,代码来源:Main.java

示例15: demoAPI

import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public static void demoAPI(LexicalizedParser lp) {
  // This option shows parsing a list of correctly tokenized words
  String[] sent = { "This", "is", "an", "easy", "sentence", "." };
  List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent);
  Tree parse = lp.apply(rawWords);
  parse.pennPrint();
  System.out.println();


  // This option shows loading and using an explicit tokenizer
  String sent2 = "This is another sentence.";
  TokenizerFactory<CoreLabel> tokenizerFactory =
    PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
  List<CoreLabel> rawWords2 =
    tokenizerFactory.getTokenizer(new StringReader(sent2)).tokenize();
  parse = lp.apply(rawWords2);

  TreebankLanguagePack tlp = new PennTreebankLanguagePack();
  GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
  GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
  List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
  System.out.println(tdl);
  System.out.println();

  TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
  tp.printTree(parse);
}
 
开发者ID:amark-india,项目名称:eventspotter,代码行数:28,代码来源:ParserDemo.java


注:本文中的edu.stanford.nlp.process.CoreLabelTokenFactory类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。