当前位置: 首页>>代码示例>>Java>>正文


Java PennTreebankLanguagePack类代码示例

本文整理汇总了Java中edu.stanford.nlp.trees.PennTreebankLanguagePack的典型用法代码示例。如果您正苦于以下问题:Java PennTreebankLanguagePack类的具体用法?Java PennTreebankLanguagePack怎么用?Java PennTreebankLanguagePack使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


PennTreebankLanguagePack类属于edu.stanford.nlp.trees包,在下文中一共展示了PennTreebankLanguagePack类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: demoDP

import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
/**
 * demoDP demonstrates turning a file into tokens and then parse trees. Note
 * that the trees are printed by calling pennPrint on the Tree object. It is
 * also possible to pass a PrintWriter to pennPrint if you want to capture
 * the output.
 * 
 * file => tokens => parse trees
 */
public static void demoDP(LexicalizedParser lp, String filename) {
	// This option shows loading, sentence-segmenting and tokenizing
	// a file using DocumentPreprocessor.
	TreebankLanguagePack tlp = new PennTreebankLanguagePack();
	GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
	// You could also create a tokenizer here (as below) and pass it
	// to DocumentPreprocessor
	for (List<HasWord> sentence : new DocumentPreprocessor(filename)) {
		Tree parse = lp.apply(sentence);
		parse.pennPrint();
		System.out.println();

		GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
		Collection tdl = gs.typedDependenciesCCprocessed();
		System.out.println(tdl);
		System.out.println();
	}
}
 
开发者ID:opinion-extraction-propagation,项目名称:TASC-Tuples,代码行数:27,代码来源:ParserDemo.java

示例2: T2PStanfordWrapper

import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
/**
 * 
 */
public T2PStanfordWrapper() {
	try {
		ObjectInputStream in;
	    InputStream is;
	    URL u = T2PStanfordWrapper.class.getResource("/englishFactored.ser.gz");
	    if(u == null){
	    	//opening from IDE
	    	is = new FileInputStream(new File("resources/englishFactored.ser.gz"));		    		    	
	    }else{
	    	//opening from jar
	    	URLConnection uc = u.openConnection();
		    is = uc.getInputStream(); 				    
	    }
	    in = new ObjectInputStream(new GZIPInputStream(new BufferedInputStream(is)));  
	    f_parser = new LexicalizedParser(in);
		f_tlp = new PennTreebankLanguagePack(); //new ChineseTreebankLanguagePack();
	    f_gsf = f_tlp.grammaticalStructureFactory();
	}catch(Exception ex) {
		ex.printStackTrace();
	}	    
	//option flags as in the Parser example, but without maxlength
	f_parser.setOptionFlags(new String[]{"-retainTmpSubcategories"});				
	//f_parser.setOptionFlags(new String[]{"-segmentMarkov"});				
	Test.MAX_ITEMS = 4000000; //enables parsing of long sentences
}
 
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:29,代码来源:T2PStanfordWrapper.java

示例3: ParsedToDep

import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
ParsedToDep(String[] args) {
  HandleParameters params = new HandleParameters(args);

  if( params.hasFlag("-output") )
    _outputDir = params.get("-output");
  if( params.hasFlag("-type") )
    _inputType = params.get("-type").toLowerCase();
  _dataPath = args[args.length - 1];

  System.out.println("outputdir= " + _outputDir);
  System.out.println("inputtype= " + _inputType);

  _tlp = new PennTreebankLanguagePack();
  _gsf = _tlp.grammaticalStructureFactory();
  _tf = new LabeledScoredTreeFactory();
}
 
开发者ID:nchambers,项目名称:schemas,代码行数:17,代码来源:ParsedToDep.java

示例4: testWriteImage

import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
/**
 * Test of writeImage method, of class Main.
 */

@Test
public void testWriteImage() throws Exception {
    String text = "A quick brown fox jumped over the lazy dog.";
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    LexicalizedParser lp = LexicalizedParser.loadModel();
    lp.setOptionFlags(new String[]{"-maxLength", "500", "-retainTmpSubcategories"});
    TokenizerFactory<CoreLabel> tokenizerFactory =
            PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
    List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(text)).tokenize();
    Tree tree = lp.apply(wordList);
    GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
    Collection<TypedDependency> tdl = gs.typedDependenciesCollapsed();
    Main.writeImage(tdl, "image.png", 3);
    assert (new File("image.png").exists());
}
 
开发者ID:awaisathar,项目名称:dependensee,代码行数:21,代码来源:MainTest.java

示例5: initLexResources

import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
private void initLexResources() {
  Properties props = new Properties();
  props.put("annotators", "tokenize, ssplit, pos, lemma, parse, ner, dcoref"); // most of these are dependencies for the main ones.
  pipeline = new StanfordCoreNLP(props);
  
  try {
    options = new Options();
    options.testOptions.verbose = true;
  } catch( Exception ex ) { ex.printStackTrace(); }

  // Dependency tree info
  TreebankLanguagePack tlp = new PennTreebankLanguagePack();
  gsf = tlp.grammaticalStructureFactory();
}
 
开发者ID:nchambers,项目名称:probschemas,代码行数:15,代码来源:AllParser.java

示例6: CoreNlpPipeline

import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
public CoreNlpPipeline() {
	props = new Properties();
	// props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse,
	// dcoref");
	props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse");
	pipeline = new StanfordCoreNLP(props);
	gsf = new PennTreebankLanguagePack().grammaticalStructureFactory();
}
 
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:9,代码来源:CoreNlpPipeline.java

示例7: initLexResources

import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
private void initLexResources() {
  options = new Options();

  try {
    // Parser
    parser = Ling.createParser(_serializedGrammar);
  } catch( Exception ex ) { ex.printStackTrace(); }

  // Dependency tree info
  TreebankLanguagePack tlp = new PennTreebankLanguagePack();
  gsf = tlp.grammaticalStructureFactory();
}
 
开发者ID:nchambers,项目名称:schemas,代码行数:13,代码来源:TextToSentences.java

示例8: init

import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
private void init() {
  // Phrase structure.
  _parser = Ling.createParser(_serializedGrammar);
  // Dependency trees.
  TreebankLanguagePack tlp = new PennTreebankLanguagePack();
  _gsf = tlp.grammaticalStructureFactory();
}
 
开发者ID:nchambers,项目名称:schemas,代码行数:8,代码来源:TimebankParser.java

示例9: initLexResources

import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
private void initLexResources() {
  options = new Options();

  try {
    // Parser
    parser = Ling.createParser(serializedGrammar);
  } catch( Exception ex ) { ex.printStackTrace(); }

  // Dependency tree info
  TreebankLanguagePack tlp = new PennTreebankLanguagePack();
  gsf = tlp.grammaticalStructureFactory();
}
 
开发者ID:nchambers,项目名称:schemas,代码行数:13,代码来源:GigawordParser.java

示例10: initLexResources

import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
private void initLexResources() {
  try {
    options = new Options();
    options.testOptions.verbose = true;
    // Parser
    parser = LexicalizedParser.loadModel(_serializedGrammar);
    //parser = new LexicalizedParser(_serializedGrammar, options);
  } catch( Exception ex ) { ex.printStackTrace(); }

  // Dependency tree info
  TreebankLanguagePack tlp = new PennTreebankLanguagePack();
  gsf = tlp.grammaticalStructureFactory();
}
 
开发者ID:nchambers,项目名称:schemas,代码行数:14,代码来源:DirectoryParser.java

示例11: initLexResources

import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
private void initLexResources() {
  options = new Options();
  //    dp = new DocumentPreprocessor(options.tlpParams.treebankLanguagePack().getTokenizerFactory());

  try {
    // Parser
    parser = Ling.createParser(serializedGrammar);
  } catch( Exception ex ) { ex.printStackTrace(); }

  // Dependency tree info
  TreebankLanguagePack tlp = new PennTreebankLanguagePack();
  gsf = tlp.grammaticalStructureFactory();
}
 
开发者ID:nchambers,项目名称:schemas,代码行数:14,代码来源:LOTRParser.java

示例12: demoAPI

import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
/**
 * demoAPI demonstrates other ways of calling the parser with already
 * tokenized text, or in some cases, raw text that needs to be tokenized as
 * a single sentence. Output is handled with a TreePrint object. Note that
 * the options used when creating the TreePrint can determine what results
 * to print out. Once again, one can capture the output by passing a
 * PrintWriter to TreePrint.printTree.
 * 
 * difference: already tokenized text
 * 
 * 
 */
public static void demoAPI(LexicalizedParser lp) {
	// This option shows parsing a list of correctly tokenized words
	String[] sent = { "This", "is", "an", "easy", "sentence", "." };
	List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent);
	Tree parse = lp.apply(rawWords);
	parse.pennPrint();
	System.out.println();

	// This option shows loading and using an explicit tokenizer
	String sent2 = "Hey @Apple, pretty much all your products are amazing. You blow minds every time you launch a new gizmo."
			+ " that said, your hold music is crap";
	TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(
			new CoreLabelTokenFactory(), "");
	Tokenizer<CoreLabel> tok = tokenizerFactory
			.getTokenizer(new StringReader(sent2));
	List<CoreLabel> rawWords2 = tok.tokenize();
	parse = lp.apply(rawWords2);

	TreebankLanguagePack tlp = new PennTreebankLanguagePack();
	GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
	GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
	List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
	System.out.println(tdl);
	System.out.println();

	// You can also use a TreePrint object to print trees and dependencies
	TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
	tp.printTree(parse);
}
 
开发者ID:opinion-extraction-propagation,项目名称:TASC-Tuples,代码行数:42,代码来源:ParserDemo.java

示例13: toTypedDependencies

import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
/**
 * Transform a parse tree into a list of TypedDependency instances
 * 
 * @param tree
 * @return
 */
public static List<TypedDependency> toTypedDependencies(Tree tree) {
	TreebankLanguagePack tlp = new PennTreebankLanguagePack();
	Filter<String> filter = Filters.acceptFilter();
	GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(filter, tlp.typedDependencyHeadFinder());
	GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
	return (List<TypedDependency>) gs.typedDependencies();
}
 
开发者ID:hakchul77,项目名称:irnlp_toolkit,代码行数:14,代码来源:StanfordNlpWrapper.java

示例14: TreeHelper

import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
public TreeHelper() {
	super();
	this.tlp = new PennTreebankLanguagePack();
	this.gsf = tlp.grammaticalStructureFactory();
	this.pst = new PhraseStructureTree();
	this.dt = new DependencyTree();
}
 
开发者ID:T-abide,项目名称:SocialEventExtraction,代码行数:8,代码来源:TreeHelper.java

示例15: initLexResources

import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
private void initLexResources() {
  options = new Options();

  try {
    // Load WordNet
    if( _wordnetPath.length() > 0 )
      JWNL.initialize(new FileInputStream(_wordnetPath));

    // POS Tagger
    if( !parsed )
      tagger = new MaxentTagger("../stanford-postagger/bidirectional/train-wsj-0-18.holder");
    //      tagger = new MaxentTagger("/u/nlp/distrib/postagger-2006-05-21/wsj3t0-18-bidirectional/train-wsj-0-18.holder");

    // Parser
    parser = Ling.createParser(serializedGrammar);
    tlp = new PennTreebankLanguagePack();
    gsf = tlp.grammaticalStructureFactory();
    pwOut = parser.getOp().tlpParams.pw();

    // Coref
    if( _corefPath.length() > 0 && _eventPath.length() == 0 && !calculateIDF ) 
      coref = new Coref(_corefPath);
    else if( _eventPath.length() == 0 ) { // if events are loaded, don't need coref
      System.out.println("WARNING: no coref loaded");
      nocoref = true;
    }

    // Duplicate Gigaword files to ignore
    _duplicates = GigawordDuplicates.fromFile(_duplicatesPath);

    // Ignore List (evaluation docs)
    _ignoreList = new HashSet();
    _ignoreList.add("NYT_ENG_19940701.0076");
    _ignoreList.add("NYT_ENG_19940701.0266");
    _ignoreList.add("NYT_ENG_19940701.0271");
    _ignoreList.add("NYT_ENG_19940703.0098");
    _ignoreList.add("NYT_ENG_19940705.0192");
    _ignoreList.add("NYT_ENG_19940707.0298");
    _ignoreList.add("NYT_ENG_19940707.0302");
    _ignoreList.add("NYT_ENG_19940708.0340");
    _ignoreList.add("NYT_ENG_19940708.0246");
    _ignoreList.add("NYT_ENG_19940709.0181");
    String arr[] = { 
        "NYT_ENG_20010103.0419", "NYT_ENG_20010421.0160", "NYT_ENG_20010920.0485",
        "NYT_ENG_20010109.0219", "NYT_ENG_20010504.0008", "NYT_ENG_20010926.0056",
        "NYT_ENG_20010118.0310", "NYT_ENG_20010509.0423", "NYT_ENG_20011006.0231",
        "NYT_ENG_20010119.0006", "NYT_ENG_20010509.0428", "NYT_ENG_20011016.0074",
        "NYT_ENG_20010129.0047", "NYT_ENG_20010601.0019", "NYT_ENG_20011025.0388",
        "NYT_ENG_20010206.0268", "NYT_ENG_20010606.0375", "NYT_ENG_20011102.0438",
        "NYT_ENG_20010220.0078", "NYT_ENG_20010622.0207", "NYT_ENG_20011104.0024",
        "NYT_ENG_20010222.0365", "NYT_ENG_20010628.0267", "NYT_ENG_20011116.0194",
        "NYT_ENG_20010226.0275", "NYT_ENG_20010628.0346", "NYT_ENG_20011121.0151",
        "NYT_ENG_20010303.0173", "NYT_ENG_20010706.0092", "NYT_ENG_20011201.0008",
        "NYT_ENG_20010306.0129", "NYT_ENG_20010706.0292", "NYT_ENG_20011201.0169",
        "NYT_ENG_20010307.0079", "NYT_ENG_20010708.0122", "NYT_ENG_20011205.0143",
        "NYT_ENG_20010307.0105", "NYT_ENG_20010726.0177", "NYT_ENG_20011224.0120",
        "NYT_ENG_20010328.0175", "NYT_ENG_20010801.0291", "NYT_ENG_20011224.0125",
        "NYT_ENG_20010416.0419", "NYT_ENG_20010802.0276",
        "NYT_ENG_20010417.0324", "NYT_ENG_20010828.0078",
        "NYT_ENG_20010417.0372", "NYT_ENG_20010829.0034",
        "NYT_ENG_20010419.0058", "NYT_ENG_20010904.0446" };
    for( String s : arr ) _ignoreList.add(s);

  } catch( Exception ex ) { ex.printStackTrace(); }
}
 
开发者ID:nchambers,项目名称:schemas,代码行数:66,代码来源:BasicEventAnalyzer.java


注:本文中的edu.stanford.nlp.trees.PennTreebankLanguagePack类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。