当前位置: 首页>>代码示例>>Python>>正文


Python treebank.parsed_sents函数代码示例

本文整理汇总了Python中nltk.corpus.treebank.parsed_sents函数的典型用法代码示例。如果您正苦于以下问题:Python parsed_sents函数的具体用法?Python parsed_sents怎么用?Python parsed_sents使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了parsed_sents函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: pcfg_demo

def pcfg_demo():
    """
    A demonstration showing how C{WeightedGrammar}s can be created and used.
    """

    from nltk.corpus import treebank
    from nltk import treetransforms
    from nltk import induce_pcfg
    from nltk.parse import pchart

    pcfg_prods = toy_pcfg1.productions()

    pcfg_prod = pcfg_prods[2]
    print('A PCFG production:', repr(pcfg_prod))
    print('    pcfg_prod.lhs()  =>', repr(pcfg_prod.lhs()))
    print('    pcfg_prod.rhs()  =>', repr(pcfg_prod.rhs()))
    print('    pcfg_prod.prob() =>', repr(pcfg_prod.prob()))
    print()

    grammar = toy_pcfg2
    print('A PCFG grammar:', repr(grammar))
    print('    grammar.start()       =>', repr(grammar.start()))
    print('    grammar.productions() =>', end=' ')
    # Use string.replace(...) is to line-wrap the output.
    print(repr(grammar.productions()).replace(',', ',\n' + ' ' * 26))
    print()

    print('Coverage of input words by a grammar:')
    print(grammar.covers(['a', 'boy']))
    print(grammar.covers(['a', 'girl']))

    # extract productions from three trees and induce the PCFG
    print("Induce PCFG grammar from treebank data:")

    productions = []
    for item in treebank.items[:2]:
        for tree in treebank.parsed_sents(item):
            # perform optional tree transformations, e.g.:
            tree.collapse_unary(collapsePOS=False)
            tree.chomsky_normal_form(horzMarkov=2)

            productions += tree.productions()

    S = Nonterminal('S')
    grammar = induce_pcfg(S, productions)
    print(grammar)
    print()

    print("Parse sentence using induced grammar:")

    parser = pchart.InsideChartParser(grammar)
    parser.trace(3)

    # doesn't work as tokens are different:
    #sent = treebank.tokenized('wsj_0001.mrg')[0]

    sent = treebank.parsed_sents('wsj_0001.mrg')[0].leaves()
    print(sent)
    for parse in parser.nbest_parse(sent):
        print(parse)
开发者ID:ggosline,项目名称:taxonparser,代码行数:60,代码来源:grammar.py

示例2: grammar_development_with_treebank

def grammar_development_with_treebank():
    from nltk.corpus import treebank

    t = treebank.parsed_sents("wsj_0001.mrg")[0]
    print t
    print "identify verbs for SV in VP -> SV S", [
        subtree for tree in treebank.parsed_sents() for subtree in tree.subtrees(_grammar_filter)
    ]
开发者ID:prashiyn,项目名称:nltk-examples,代码行数:8,代码来源:ch08.py

示例3: learn_treebank

def learn_treebank(files=None, markov_order=None):
    """
    Learn a PCFG from the Penn Treebank, and return it.
    
    By default, this learns from NLTK's 10% sample of the Penn Treebank.
    You can give the filename of a Treebank file; 'wsj-02-21.mrg' will
    learn from the entire training section of Treebank.
    """
    if files is None: bank = treebank.parsed_sents()
    else: bank = treebank.parsed_sents(files)
    return learn_trees(bank, collapse=True, markov_order=markov_order)
开发者ID:salmanahmad,项目名称:6.863,代码行数:11,代码来源:learn_pcfg.py

示例4: grammarDevelopmen

def grammarDevelopmen():
    print "page 315 8.6  Grammar Developmen"
    print "=============== Treebanks and Grammars ==============="
    from nltk.corpus import treebank
    t = treebank.parsed_sents('wsj_0001.mrg')[0]
    print t

    def filter(tree):
        child_nodes = [child.node for child in tree if isinstance(child, nltk.Tree)]
        return  (tree.node == 'VP') and ('S' in child_nodes)

    print [subtree for tree in treebank.parsed_sents() for subtree in tree.subtrees(filter)]
开发者ID:hbdhj,项目名称:python,代码行数:12,代码来源:chapter8.py

示例5: test

def test():
    """Do some tree drawing tests."""
    def print_tree(n, tree, sentence=None, ansi=True, **xargs):
        print()
        print('{0}: "{1}"'.format(n, ' '.join(sentence or tree.leaves())))
        print(tree)
        print()
        drawtree = TreePrettyPrinter(tree, sentence)
        try:
            print(drawtree.text(unicodelines=ansi, ansi=ansi, **xargs))
        except (UnicodeDecodeError, UnicodeEncodeError):
            print(drawtree.text(unicodelines=False, ansi=False, **xargs))

    from nltk.corpus import treebank
    for n in [0, 1440, 1591, 2771, 2170]:
        tree = treebank.parsed_sents()[n]
        print_tree(n, tree, nodedist=2, maxwidth=8)
    print()
    print('ASCII version:')
    print(TreePrettyPrinter(tree).text(nodedist=2))

    tree = Tree.fromstring(
        '(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) '
        '(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) '
        '(vg 10) (inf (verb 11)))))) (punct 12))', read_leaf=int)
    sentence = ('Ze had met haar moeder kunnen gaan winkelen ,'
                ' zwemmen of terrassen .'.split())
    print_tree('Discontinuous tree', tree, sentence, nodedist=2)
开发者ID:CaptainAL,项目名称:Spyder,代码行数:28,代码来源:treeprettyprinter.py

示例6: main

def main(transform_func = None, n = 10):
    parser=StanfordParser(
        path_to_jar = "/cs/fs/home/hxiao/code/stanford-parser-full-2015-01-30/stanford-parser.jar",
        path_to_models_jar = "/cs/fs/home/hxiao/code/stanford-parser-full-2015-01-30/stanford-parser-3.5.1-models.jar",
        model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
    )

    test_sents = treebank.sents()[-n:]

    print "len(test_sents) = %d" %(len(test_sents))

    if transform_func and callable(transform_func):
        print "transforming it using ", transform_func
        test_sents = [[transform_func(w) for w in s] 
                      for s in test_sents] # transform it

    print test_sents[:10]

    print "predicting"
    pred_parses = parser.parse_sents(test_sents)
    
    gold_parses = treebank.parsed_sents()
    
    print "evaluating"

    correct_n = gold_n = predicted_n = 0.0
    
    for gparse, pparse in zip(gold_parses, pred_parses):
        cn, gn, pn = precision_and_recall_stat(get_nodes_with_range(gparse), 
                                               get_nodes_with_range(pparse))
        correct_n += cn
        gold_n += gn
        predicted_n += pn
        
    print "Prediction: %f, Recall: %f" %(correct_n / predicted_n, correct_n / gold_n)
开发者ID:xiaohan2012,项目名称:capitalization-restoration-train,代码行数:35,代码来源:parse.py

示例7: sentences

def sentences():
    for f in treebank.fileids():
        for t in treebank.parsed_sents(f):
            t.chomsky_normal_form(horzMarkov=1)
            t.collapse_unary(collapsePOS=True)

            yield (t, t.leaves())
开发者ID:brucespang,项目名称:pcyk,代码行数:7,代码来源:pcyk.py

示例8: convert_wsj

def convert_wsj(file_obj):
    from nltk.corpus import treebank

    sys.stderr.write("Converting Penn Treebank sampler...\n")
    tb = TreebankConverter()
    for sentence in treebank.parsed_sents():
        tb.add_sentence(sentence)
    tb.write(file_obj)
开发者ID:Sandy4321,项目名称:nltk_contrib,代码行数:8,代码来源:demo.py

示例9: main

def main():
    answers = open('coref_key.txt', 'r')
    this_correct = 0
    correct = 0
    total = 0
    prev_sentences = deque()
    for file in FILENAMES:
        this_correct = 0
        this_total = 0
        prev_sentences.clear()
        for tree in treebank.parsed_sents(file):


            tree = ParentedTree.convert(tree)

            for pronoun, np_node in find_pronouns(tree):

                # i = 0
                # for t in list(prev_sentences)[-3:]:
                #     t.pretty_print()
                #     print("-"*25)
                #     i = i + 1
                #     if i == 3: break
                proposed = hobbs_to_string(hobbs(np_node, pronoun.lower(), prev_sentences))
                tree.pretty_print()

                actual = answers.readline()

                if  proposed == actual[:-1]:
                    update_pronoun_results(pronoun, 1)
                    correct += 1
                    this_correct += 1

                update_pronoun_results(pronoun, 0)
                total += 1
                this_total += 1

                print "Pronoun: '" + pronoun + "'   Proposed: '" + proposed + "'   Actual: '" + actual + "'"

                if total: print "Overall:\tCorrect:", correct, "\tTotal:", total, "\tPercentage:", correct/float(total), "\n"


                print("*"*100)
                print("*"*100)
            prev_sentences.append(tree)
        print("-"*50)
        if this_correct: print file,":\tCorrect:", this_correct, "\tTotal:", this_total, "\tPercentage:", this_correct/float(this_total), "\n"
        if total: print "Overall:\tCorrect:", correct, "\tTotal:", total, "\tPercentage:", correct/float(total), "\n"
        print("-"*50)

    print "Male correct:", PRONOUN_RESULTS['male'], "\tMale total:", PRONOUN_RESULTS['male_total'], "\tPercent correct:", PRONOUN_RESULTS['male_pct']
    print "Female correct:", PRONOUN_RESULTS['female'], "\tFemale total:", PRONOUN_RESULTS['female_total'], "\tPercent correct:", PRONOUN_RESULTS['female_pct']
    print "Neutral correct:", PRONOUN_RESULTS['neutral'], "\tNeutral total:", PRONOUN_RESULTS['neutral_total'], "\tPercent correct:", PRONOUN_RESULTS['neutral_pct']
    print "Plural correct:", PRONOUN_RESULTS['they'], "\tPlural total:", PRONOUN_RESULTS['they_total'], "\tPercent correct:", PRONOUN_RESULTS['they_pct']
    print "Reflexive correct:", PRONOUN_RESULTS['reflexive'], "\tReflexive total:", PRONOUN_RESULTS['reflexive_total'], "\tPercent correct:", PRONOUN_RESULTS['reflexive_pct']
    print "Total correct:", correct, "\tTotal:", total, "\tPercent correct:", correct/float(total)
开发者ID:treyfeldman,项目名称:Hobb-s-Algorithm,代码行数:56,代码来源:HobbsImplementation.py

示例10: getTrees

def getTrees(source,size):
    '''Load the trees from source, return first SIZE trees'''
    if source=='treebank':
        from nltk.corpus import treebank
        trees = treebank.parsed_sents()
        #inds = random.permutation(range(0,len(trees)))[0:size]
        trees = trees[:size]
        return trees
    else:
        return list()
开发者ID:Jsalim,项目名称:NLP-Stuff,代码行数:10,代码来源:PCFG_util.py

示例11: TreebankNoTraces

def TreebankNoTraces():
    tb = []
    for t in treebank.parsed_sents():
        if t.label() != "S": continue
        RemoveFunctionTags(t)
        RemoveTraces(t)
        t.collapse_unary(collapsePOS = True, collapseRoot = True)
        t.chomsky_normal_form()
        tb.append(t)
    return tb
开发者ID:weitongruan,项目名称:Comp150NLP,代码行数:10,代码来源:pset4.py

示例12: learn_treebank

def learn_treebank(trees=None):
    """
    Learn a PCFG from the Penn Treebank, and return it.
    
    By default, this learns from NLTK's 10% sample of the Penn Treebank.
    You can also pass a set of trees.
    """
    if trees is None: bank = treebank.parsed_sents()
    else: bank = trees
    return learn_trees(bank, collapse=True)
开发者ID:JakeBrawer,项目名称:org,代码行数:10,代码来源:learn_pcfg.py

示例13: write_example_tree

def write_example_tree(features, f):
    filename = features['_filename']
    sen = features['_sentence_id']
    phr = features['_phrase_id']
    tree = treebank.parsed_sents(filename)[sen]
    phrase = tree[tree.treepositions('preorder')[phr]]
    l = treebank_helper.get_label(phrase)
    treebank_helper.set_label(phrase, '***' + l + '***')
    f.write(str(tree))
    f.write('\n')
    treebank_helper.set_label(phrase, l)
开发者ID:EddieNejadi,项目名称:Machine_Learning,代码行数:11,代码来源:funtag.py

示例14: treebank_accessor

def treebank_accessor():
  '''
  Function that reads the Penn treebank and returns all the trees 
  for each sentence in the corpus.
  '''
  trees = []

  for i in range(1, TREEBANK_FILES + 1):
    file_number = "%03d" % (i,)
    t = treebank.parsed_sents('wsj_0' + file_number + '.mrg')

    for sentence in range(len(t)):
      # For each sentence in the file, convert to a tree and add it to trees[]
      trees.append(t[sentence])

  return trees
开发者ID:barbaragabriela,项目名称:inlp-probabilistic-parsing,代码行数:16,代码来源:helper.py

示例15: get_treebank_rules

def get_treebank_rules(cutoff=0, include_counts=False):
    all_rules = cache_utils.cache_get('treebank_rules', 'rules')
    if not all_rules:
        log('Generating lexical rules from Penn Treebank', 4)
        from nltk.corpus import treebank
        all_rules = dict()
        for tree in treebank.parsed_sents():
            for rule, count in lexical_rules(tree).items():
                all_rules[rule] = all_rules.get(rule, 0) + count

        cache_utils.cache_set('treebank_rules', 'rules', all_rules)

    if include_counts:
        return {k: v for (k, v) in all_rules.items() if v > cutoff}
    else:
        rules_set = set([rule for rule, count in all_rules.items() if count > cutoff])
        return rules_set
开发者ID:snyderp,项目名称:cs412-scorer,代码行数:17,代码来源:syntactic_formation.py


注:本文中的nltk.corpus.treebank.parsed_sents函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。