本文整理汇总了Java中edu.stanford.nlp.trees.PennTreebankLanguagePack类的典型用法代码示例。如果您正苦于以下问题:Java PennTreebankLanguagePack类的具体用法?Java PennTreebankLanguagePack怎么用?Java PennTreebankLanguagePack使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
PennTreebankLanguagePack类属于edu.stanford.nlp.trees包,在下文中一共展示了PennTreebankLanguagePack类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: demoDP
import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
/**
* demoDP demonstrates turning a file into tokens and then parse trees. Note
* that the trees are printed by calling pennPrint on the Tree object. It is
* also possible to pass a PrintWriter to pennPrint if you want to capture
* the output.
*
* file => tokens => parse trees
*/
public static void demoDP(LexicalizedParser lp, String filename) {
// This option shows loading, sentence-segmenting and tokenizing
// a file using DocumentPreprocessor.
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
// You could also create a tokenizer here (as below) and pass it
// to DocumentPreprocessor
for (List<HasWord> sentence : new DocumentPreprocessor(filename)) {
Tree parse = lp.apply(sentence);
parse.pennPrint();
System.out.println();
GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
Collection tdl = gs.typedDependenciesCCprocessed();
System.out.println(tdl);
System.out.println();
}
}
示例2: T2PStanfordWrapper
import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
/**
*
*/
public T2PStanfordWrapper() {
try {
ObjectInputStream in;
InputStream is;
URL u = T2PStanfordWrapper.class.getResource("/englishFactored.ser.gz");
if(u == null){
//opening from IDE
is = new FileInputStream(new File("resources/englishFactored.ser.gz"));
}else{
//opening from jar
URLConnection uc = u.openConnection();
is = uc.getInputStream();
}
in = new ObjectInputStream(new GZIPInputStream(new BufferedInputStream(is)));
f_parser = new LexicalizedParser(in);
f_tlp = new PennTreebankLanguagePack(); //new ChineseTreebankLanguagePack();
f_gsf = f_tlp.grammaticalStructureFactory();
}catch(Exception ex) {
ex.printStackTrace();
}
//option flags as in the Parser example, but without maxlength
f_parser.setOptionFlags(new String[]{"-retainTmpSubcategories"});
//f_parser.setOptionFlags(new String[]{"-segmentMarkov"});
Test.MAX_ITEMS = 4000000; //enables parsing of long sentences
}
示例3: ParsedToDep
import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
ParsedToDep(String[] args) {
HandleParameters params = new HandleParameters(args);
if( params.hasFlag("-output") )
_outputDir = params.get("-output");
if( params.hasFlag("-type") )
_inputType = params.get("-type").toLowerCase();
_dataPath = args[args.length - 1];
System.out.println("outputdir= " + _outputDir);
System.out.println("inputtype= " + _inputType);
_tlp = new PennTreebankLanguagePack();
_gsf = _tlp.grammaticalStructureFactory();
_tf = new LabeledScoredTreeFactory();
}
示例4: testWriteImage
import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
/**
* Test of writeImage method, of class Main.
*/
@Test
public void testWriteImage() throws Exception {
String text = "A quick brown fox jumped over the lazy dog.";
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
LexicalizedParser lp = LexicalizedParser.loadModel();
lp.setOptionFlags(new String[]{"-maxLength", "500", "-retainTmpSubcategories"});
TokenizerFactory<CoreLabel> tokenizerFactory =
PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(text)).tokenize();
Tree tree = lp.apply(wordList);
GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
Collection<TypedDependency> tdl = gs.typedDependenciesCollapsed();
Main.writeImage(tdl, "image.png", 3);
assert (new File("image.png").exists());
}
示例5: initLexResources
import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
private void initLexResources() {
Properties props = new Properties();
props.put("annotators", "tokenize, ssplit, pos, lemma, parse, ner, dcoref"); // most of these are dependencies for the main ones.
pipeline = new StanfordCoreNLP(props);
try {
options = new Options();
options.testOptions.verbose = true;
} catch( Exception ex ) { ex.printStackTrace(); }
// Dependency tree info
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
gsf = tlp.grammaticalStructureFactory();
}
示例6: CoreNlpPipeline
import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
public CoreNlpPipeline() {
props = new Properties();
// props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse,
// dcoref");
props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse");
pipeline = new StanfordCoreNLP(props);
gsf = new PennTreebankLanguagePack().grammaticalStructureFactory();
}
示例7: initLexResources
import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
private void initLexResources() {
options = new Options();
try {
// Parser
parser = Ling.createParser(_serializedGrammar);
} catch( Exception ex ) { ex.printStackTrace(); }
// Dependency tree info
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
gsf = tlp.grammaticalStructureFactory();
}
示例8: init
import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
private void init() {
// Phrase structure.
_parser = Ling.createParser(_serializedGrammar);
// Dependency trees.
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
_gsf = tlp.grammaticalStructureFactory();
}
示例9: initLexResources
import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
private void initLexResources() {
options = new Options();
try {
// Parser
parser = Ling.createParser(serializedGrammar);
} catch( Exception ex ) { ex.printStackTrace(); }
// Dependency tree info
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
gsf = tlp.grammaticalStructureFactory();
}
示例10: initLexResources
import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
private void initLexResources() {
try {
options = new Options();
options.testOptions.verbose = true;
// Parser
parser = LexicalizedParser.loadModel(_serializedGrammar);
//parser = new LexicalizedParser(_serializedGrammar, options);
} catch( Exception ex ) { ex.printStackTrace(); }
// Dependency tree info
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
gsf = tlp.grammaticalStructureFactory();
}
示例11: initLexResources
import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
private void initLexResources() {
options = new Options();
// dp = new DocumentPreprocessor(options.tlpParams.treebankLanguagePack().getTokenizerFactory());
try {
// Parser
parser = Ling.createParser(serializedGrammar);
} catch( Exception ex ) { ex.printStackTrace(); }
// Dependency tree info
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
gsf = tlp.grammaticalStructureFactory();
}
示例12: demoAPI
import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
/**
* demoAPI demonstrates other ways of calling the parser with already
* tokenized text, or in some cases, raw text that needs to be tokenized as
* a single sentence. Output is handled with a TreePrint object. Note that
* the options used when creating the TreePrint can determine what results
* to print out. Once again, one can capture the output by passing a
* PrintWriter to TreePrint.printTree.
*
* difference: already tokenized text
*
*
*/
public static void demoAPI(LexicalizedParser lp) {
// This option shows parsing a list of correctly tokenized words
String[] sent = { "This", "is", "an", "easy", "sentence", "." };
List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent);
Tree parse = lp.apply(rawWords);
parse.pennPrint();
System.out.println();
// This option shows loading and using an explicit tokenizer
String sent2 = "Hey @Apple, pretty much all your products are amazing. You blow minds every time you launch a new gizmo."
+ " that said, your hold music is crap";
TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(
new CoreLabelTokenFactory(), "");
Tokenizer<CoreLabel> tok = tokenizerFactory
.getTokenizer(new StringReader(sent2));
List<CoreLabel> rawWords2 = tok.tokenize();
parse = lp.apply(rawWords2);
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
System.out.println(tdl);
System.out.println();
// You can also use a TreePrint object to print trees and dependencies
TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
tp.printTree(parse);
}
示例13: toTypedDependencies
import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
/**
* Transform a parse tree into a list of TypedDependency instances
*
* @param tree
* @return
*/
public static List<TypedDependency> toTypedDependencies(Tree tree) {
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
Filter<String> filter = Filters.acceptFilter();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(filter, tlp.typedDependencyHeadFinder());
GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
return (List<TypedDependency>) gs.typedDependencies();
}
示例14: TreeHelper
import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
public TreeHelper() {
super();
this.tlp = new PennTreebankLanguagePack();
this.gsf = tlp.grammaticalStructureFactory();
this.pst = new PhraseStructureTree();
this.dt = new DependencyTree();
}
示例15: initLexResources
import edu.stanford.nlp.trees.PennTreebankLanguagePack; //导入依赖的package包/类
private void initLexResources() {
options = new Options();
try {
// Load WordNet
if( _wordnetPath.length() > 0 )
JWNL.initialize(new FileInputStream(_wordnetPath));
// POS Tagger
if( !parsed )
tagger = new MaxentTagger("../stanford-postagger/bidirectional/train-wsj-0-18.holder");
// tagger = new MaxentTagger("/u/nlp/distrib/postagger-2006-05-21/wsj3t0-18-bidirectional/train-wsj-0-18.holder");
// Parser
parser = Ling.createParser(serializedGrammar);
tlp = new PennTreebankLanguagePack();
gsf = tlp.grammaticalStructureFactory();
pwOut = parser.getOp().tlpParams.pw();
// Coref
if( _corefPath.length() > 0 && _eventPath.length() == 0 && !calculateIDF )
coref = new Coref(_corefPath);
else if( _eventPath.length() == 0 ) { // if events are loaded, don't need coref
System.out.println("WARNING: no coref loaded");
nocoref = true;
}
// Duplicate Gigaword files to ignore
_duplicates = GigawordDuplicates.fromFile(_duplicatesPath);
// Ignore List (evaluation docs)
_ignoreList = new HashSet();
_ignoreList.add("NYT_ENG_19940701.0076");
_ignoreList.add("NYT_ENG_19940701.0266");
_ignoreList.add("NYT_ENG_19940701.0271");
_ignoreList.add("NYT_ENG_19940703.0098");
_ignoreList.add("NYT_ENG_19940705.0192");
_ignoreList.add("NYT_ENG_19940707.0298");
_ignoreList.add("NYT_ENG_19940707.0302");
_ignoreList.add("NYT_ENG_19940708.0340");
_ignoreList.add("NYT_ENG_19940708.0246");
_ignoreList.add("NYT_ENG_19940709.0181");
String arr[] = {
"NYT_ENG_20010103.0419", "NYT_ENG_20010421.0160", "NYT_ENG_20010920.0485",
"NYT_ENG_20010109.0219", "NYT_ENG_20010504.0008", "NYT_ENG_20010926.0056",
"NYT_ENG_20010118.0310", "NYT_ENG_20010509.0423", "NYT_ENG_20011006.0231",
"NYT_ENG_20010119.0006", "NYT_ENG_20010509.0428", "NYT_ENG_20011016.0074",
"NYT_ENG_20010129.0047", "NYT_ENG_20010601.0019", "NYT_ENG_20011025.0388",
"NYT_ENG_20010206.0268", "NYT_ENG_20010606.0375", "NYT_ENG_20011102.0438",
"NYT_ENG_20010220.0078", "NYT_ENG_20010622.0207", "NYT_ENG_20011104.0024",
"NYT_ENG_20010222.0365", "NYT_ENG_20010628.0267", "NYT_ENG_20011116.0194",
"NYT_ENG_20010226.0275", "NYT_ENG_20010628.0346", "NYT_ENG_20011121.0151",
"NYT_ENG_20010303.0173", "NYT_ENG_20010706.0092", "NYT_ENG_20011201.0008",
"NYT_ENG_20010306.0129", "NYT_ENG_20010706.0292", "NYT_ENG_20011201.0169",
"NYT_ENG_20010307.0079", "NYT_ENG_20010708.0122", "NYT_ENG_20011205.0143",
"NYT_ENG_20010307.0105", "NYT_ENG_20010726.0177", "NYT_ENG_20011224.0120",
"NYT_ENG_20010328.0175", "NYT_ENG_20010801.0291", "NYT_ENG_20011224.0125",
"NYT_ENG_20010416.0419", "NYT_ENG_20010802.0276",
"NYT_ENG_20010417.0324", "NYT_ENG_20010828.0078",
"NYT_ENG_20010417.0372", "NYT_ENG_20010829.0034",
"NYT_ENG_20010419.0058", "NYT_ENG_20010904.0446" };
for( String s : arr ) _ignoreList.add(s);
} catch( Exception ex ) { ex.printStackTrace(); }
}