本文整理汇总了Python中nltk.corpus.treebank.parsed_sents函数的典型用法代码示例。如果您正苦于以下问题:Python parsed_sents函数的具体用法?Python parsed_sents怎么用?Python parsed_sents使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parsed_sents函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: pcfg_demo
def pcfg_demo():
"""
A demonstration showing how C{WeightedGrammar}s can be created and used.
"""
from nltk.corpus import treebank
from nltk import treetransforms
from nltk import induce_pcfg
from nltk.parse import pchart
pcfg_prods = toy_pcfg1.productions()
pcfg_prod = pcfg_prods[2]
print('A PCFG production:', repr(pcfg_prod))
print(' pcfg_prod.lhs() =>', repr(pcfg_prod.lhs()))
print(' pcfg_prod.rhs() =>', repr(pcfg_prod.rhs()))
print(' pcfg_prod.prob() =>', repr(pcfg_prod.prob()))
print()
grammar = toy_pcfg2
print('A PCFG grammar:', repr(grammar))
print(' grammar.start() =>', repr(grammar.start()))
print(' grammar.productions() =>', end=' ')
# Use string.replace(...) is to line-wrap the output.
print(repr(grammar.productions()).replace(',', ',\n' + ' ' * 26))
print()
print('Coverage of input words by a grammar:')
print(grammar.covers(['a', 'boy']))
print(grammar.covers(['a', 'girl']))
# extract productions from three trees and induce the PCFG
print("Induce PCFG grammar from treebank data:")
productions = []
for item in treebank.items[:2]:
for tree in treebank.parsed_sents(item):
# perform optional tree transformations, e.g.:
tree.collapse_unary(collapsePOS=False)
tree.chomsky_normal_form(horzMarkov=2)
productions += tree.productions()
S = Nonterminal('S')
grammar = induce_pcfg(S, productions)
print(grammar)
print()
print("Parse sentence using induced grammar:")
parser = pchart.InsideChartParser(grammar)
parser.trace(3)
# doesn't work as tokens are different:
#sent = treebank.tokenized('wsj_0001.mrg')[0]
sent = treebank.parsed_sents('wsj_0001.mrg')[0].leaves()
print(sent)
for parse in parser.nbest_parse(sent):
print(parse)
示例2: grammar_development_with_treebank
def grammar_development_with_treebank():
from nltk.corpus import treebank
t = treebank.parsed_sents("wsj_0001.mrg")[0]
print t
print "identify verbs for SV in VP -> SV S", [
subtree for tree in treebank.parsed_sents() for subtree in tree.subtrees(_grammar_filter)
]
示例3: learn_treebank
def learn_treebank(files=None, markov_order=None):
"""
Learn a PCFG from the Penn Treebank, and return it.
By default, this learns from NLTK's 10% sample of the Penn Treebank.
You can give the filename of a Treebank file; 'wsj-02-21.mrg' will
learn from the entire training section of Treebank.
"""
if files is None: bank = treebank.parsed_sents()
else: bank = treebank.parsed_sents(files)
return learn_trees(bank, collapse=True, markov_order=markov_order)
示例4: grammarDevelopmen
def grammarDevelopmen():
print "page 315 8.6 Grammar Developmen"
print "=============== Treebanks and Grammars ==============="
from nltk.corpus import treebank
t = treebank.parsed_sents('wsj_0001.mrg')[0]
print t
def filter(tree):
child_nodes = [child.node for child in tree if isinstance(child, nltk.Tree)]
return (tree.node == 'VP') and ('S' in child_nodes)
print [subtree for tree in treebank.parsed_sents() for subtree in tree.subtrees(filter)]
示例5: test
def test():
"""Do some tree drawing tests."""
def print_tree(n, tree, sentence=None, ansi=True, **xargs):
print()
print('{0}: "{1}"'.format(n, ' '.join(sentence or tree.leaves())))
print(tree)
print()
drawtree = TreePrettyPrinter(tree, sentence)
try:
print(drawtree.text(unicodelines=ansi, ansi=ansi, **xargs))
except (UnicodeDecodeError, UnicodeEncodeError):
print(drawtree.text(unicodelines=False, ansi=False, **xargs))
from nltk.corpus import treebank
for n in [0, 1440, 1591, 2771, 2170]:
tree = treebank.parsed_sents()[n]
print_tree(n, tree, nodedist=2, maxwidth=8)
print()
print('ASCII version:')
print(TreePrettyPrinter(tree).text(nodedist=2))
tree = Tree.fromstring(
'(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) '
'(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) '
'(vg 10) (inf (verb 11)))))) (punct 12))', read_leaf=int)
sentence = ('Ze had met haar moeder kunnen gaan winkelen ,'
' zwemmen of terrassen .'.split())
print_tree('Discontinuous tree', tree, sentence, nodedist=2)
示例6: main
def main(transform_func = None, n = 10):
parser=StanfordParser(
path_to_jar = "/cs/fs/home/hxiao/code/stanford-parser-full-2015-01-30/stanford-parser.jar",
path_to_models_jar = "/cs/fs/home/hxiao/code/stanford-parser-full-2015-01-30/stanford-parser-3.5.1-models.jar",
model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
)
test_sents = treebank.sents()[-n:]
print "len(test_sents) = %d" %(len(test_sents))
if transform_func and callable(transform_func):
print "transforming it using ", transform_func
test_sents = [[transform_func(w) for w in s]
for s in test_sents] # transform it
print test_sents[:10]
print "predicting"
pred_parses = parser.parse_sents(test_sents)
gold_parses = treebank.parsed_sents()
print "evaluating"
correct_n = gold_n = predicted_n = 0.0
for gparse, pparse in zip(gold_parses, pred_parses):
cn, gn, pn = precision_and_recall_stat(get_nodes_with_range(gparse),
get_nodes_with_range(pparse))
correct_n += cn
gold_n += gn
predicted_n += pn
print "Prediction: %f, Recall: %f" %(correct_n / predicted_n, correct_n / gold_n)
示例7: sentences
def sentences():
for f in treebank.fileids():
for t in treebank.parsed_sents(f):
t.chomsky_normal_form(horzMarkov=1)
t.collapse_unary(collapsePOS=True)
yield (t, t.leaves())
示例8: convert_wsj
def convert_wsj(file_obj):
from nltk.corpus import treebank
sys.stderr.write("Converting Penn Treebank sampler...\n")
tb = TreebankConverter()
for sentence in treebank.parsed_sents():
tb.add_sentence(sentence)
tb.write(file_obj)
示例9: main
def main():
answers = open('coref_key.txt', 'r')
this_correct = 0
correct = 0
total = 0
prev_sentences = deque()
for file in FILENAMES:
this_correct = 0
this_total = 0
prev_sentences.clear()
for tree in treebank.parsed_sents(file):
tree = ParentedTree.convert(tree)
for pronoun, np_node in find_pronouns(tree):
# i = 0
# for t in list(prev_sentences)[-3:]:
# t.pretty_print()
# print("-"*25)
# i = i + 1
# if i == 3: break
proposed = hobbs_to_string(hobbs(np_node, pronoun.lower(), prev_sentences))
tree.pretty_print()
actual = answers.readline()
if proposed == actual[:-1]:
update_pronoun_results(pronoun, 1)
correct += 1
this_correct += 1
update_pronoun_results(pronoun, 0)
total += 1
this_total += 1
print "Pronoun: '" + pronoun + "' Proposed: '" + proposed + "' Actual: '" + actual + "'"
if total: print "Overall:\tCorrect:", correct, "\tTotal:", total, "\tPercentage:", correct/float(total), "\n"
print("*"*100)
print("*"*100)
prev_sentences.append(tree)
print("-"*50)
if this_correct: print file,":\tCorrect:", this_correct, "\tTotal:", this_total, "\tPercentage:", this_correct/float(this_total), "\n"
if total: print "Overall:\tCorrect:", correct, "\tTotal:", total, "\tPercentage:", correct/float(total), "\n"
print("-"*50)
print "Male correct:", PRONOUN_RESULTS['male'], "\tMale total:", PRONOUN_RESULTS['male_total'], "\tPercent correct:", PRONOUN_RESULTS['male_pct']
print "Female correct:", PRONOUN_RESULTS['female'], "\tFemale total:", PRONOUN_RESULTS['female_total'], "\tPercent correct:", PRONOUN_RESULTS['female_pct']
print "Neutral correct:", PRONOUN_RESULTS['neutral'], "\tNeutral total:", PRONOUN_RESULTS['neutral_total'], "\tPercent correct:", PRONOUN_RESULTS['neutral_pct']
print "Plural correct:", PRONOUN_RESULTS['they'], "\tPlural total:", PRONOUN_RESULTS['they_total'], "\tPercent correct:", PRONOUN_RESULTS['they_pct']
print "Reflexive correct:", PRONOUN_RESULTS['reflexive'], "\tReflexive total:", PRONOUN_RESULTS['reflexive_total'], "\tPercent correct:", PRONOUN_RESULTS['reflexive_pct']
print "Total correct:", correct, "\tTotal:", total, "\tPercent correct:", correct/float(total)
示例10: getTrees
def getTrees(source,size):
'''Load the trees from source, return first SIZE trees'''
if source=='treebank':
from nltk.corpus import treebank
trees = treebank.parsed_sents()
#inds = random.permutation(range(0,len(trees)))[0:size]
trees = trees[:size]
return trees
else:
return list()
示例11: TreebankNoTraces
def TreebankNoTraces():
tb = []
for t in treebank.parsed_sents():
if t.label() != "S": continue
RemoveFunctionTags(t)
RemoveTraces(t)
t.collapse_unary(collapsePOS = True, collapseRoot = True)
t.chomsky_normal_form()
tb.append(t)
return tb
示例12: learn_treebank
def learn_treebank(trees=None):
"""
Learn a PCFG from the Penn Treebank, and return it.
By default, this learns from NLTK's 10% sample of the Penn Treebank.
You can also pass a set of trees.
"""
if trees is None: bank = treebank.parsed_sents()
else: bank = trees
return learn_trees(bank, collapse=True)
示例13: write_example_tree
def write_example_tree(features, f):
filename = features['_filename']
sen = features['_sentence_id']
phr = features['_phrase_id']
tree = treebank.parsed_sents(filename)[sen]
phrase = tree[tree.treepositions('preorder')[phr]]
l = treebank_helper.get_label(phrase)
treebank_helper.set_label(phrase, '***' + l + '***')
f.write(str(tree))
f.write('\n')
treebank_helper.set_label(phrase, l)
示例14: treebank_accessor
def treebank_accessor():
'''
Function that reads the Penn treebank and returns all the trees
for each sentence in the corpus.
'''
trees = []
for i in range(1, TREEBANK_FILES + 1):
file_number = "%03d" % (i,)
t = treebank.parsed_sents('wsj_0' + file_number + '.mrg')
for sentence in range(len(t)):
# For each sentence in the file, convert to a tree and add it to trees[]
trees.append(t[sentence])
return trees
示例15: get_treebank_rules
def get_treebank_rules(cutoff=0, include_counts=False):
all_rules = cache_utils.cache_get('treebank_rules', 'rules')
if not all_rules:
log('Generating lexical rules from Penn Treebank', 4)
from nltk.corpus import treebank
all_rules = dict()
for tree in treebank.parsed_sents():
for rule, count in lexical_rules(tree).items():
all_rules[rule] = all_rules.get(rule, 0) + count
cache_utils.cache_set('treebank_rules', 'rules', all_rules)
if include_counts:
return {k: v for (k, v) in all_rules.items() if v > cutoff}
else:
rules_set = set([rule for rule, count in all_rules.items() if count > cutoff])
return rules_set