本文整理汇总了Python中nltk.tree.ParentedTree类的典型用法代码示例。如果您正苦于以下问题:Python ParentedTree类的具体用法?Python ParentedTree怎么用?Python ParentedTree使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ParentedTree类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_labeled_nodes
def test_labeled_nodes(self):
'''
Test labeled nodes.
Test case from Emily M. Bender.
'''
search = '''
# macros
@ SBJ /SBJ/;
@ VP /VP/;
@ VB /VB/;
@ VPoB /V[PB]/;
@ OBJ /OBJ/;
# 1 svo
S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =v'''
sent1 = ParentedTree.fromstring(
'(S (NP-SBJ I) (VP (VB eat) (NP-OBJ (NNS apples))))')
sent2 = ParentedTree.fromstring(
'(S (VP (VB eat) (NP-OBJ (NNS apples))) (NP-SBJ I))')
search_firsthalf = (search.split('\n\n')[0] +
'S < @SBJ < (@VP < (@VB $.. @OBJ))')
search_rewrite = 'S < (/.*SBJ/ $.. (/VP/ < (/VB/ $.. /.*OBJ/)))'
self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent1]))[0])
self.assertTrue(list(tgrep.tgrep_positions(search, [sent1]))[0])
self.assertTrue(list(tgrep.tgrep_positions(search_rewrite, [sent1]))[0])
self.assertEqual(list(tgrep.tgrep_positions(search, [sent1])),
list(tgrep.tgrep_positions(search_rewrite, [sent1])))
self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent2]))[0])
self.assertFalse(list(tgrep.tgrep_positions(search, [sent2]))[0])
self.assertFalse(list(tgrep.tgrep_positions(search_rewrite, [sent2]))[0])
self.assertEqual(list(tgrep.tgrep_positions(search, [sent2])),
list(tgrep.tgrep_positions(search_rewrite, [sent2])))
示例2: assign_slots
def assign_slots(tokens, tag_tree, word_tree):
stopword_list = stopwords.words('english')
tokens_with_slot_tags = []
word_tree = ParentedTree.convert(word_tree)
tag_tree = ParentedTree.convert(tag_tree)
word_tree_with_cats = tag_words_with_categories(word_tree)
tag_tree_with_cats = tag_words_with_categories(tag_tree)
for i, word in enumerate(tokens):
tag = finalize_tags(i, word, tag_tree_with_cats, word_tree_with_cats)
tokens_with_slot_tags.append((word, tag))
found_query_focus = False
for i, item in enumerate(tokens_with_slot_tags):
word, tag = item
if tag in ['USER','MEDIA','NETWORK'] and not found_query_focus:
tokens_with_slot_tags[i] = (word, 'SEARCH')
found_query_focus = True
elif tag == UNK:
tokens_with_slot_tags[i] = (word, 'KEYWORD')
slots = {}
for word, tag in tokens_with_slot_tags:
if tag == 'SKIP':
continue
elif tag == 'KEYWORD':
if 'KEYWORDS' not in slots:
slots['KEYWORDS'] = []
if word not in stopword_list and word not in PUNCTUATION:
slots['KEYWORDS'].append(word)
else:
if tag not in slots:
slots[tag] = word
else:
previous_words = slots[tag]
slots[tag] = ' '.join([previous_words, word])
return slots
示例3: merge_tree_nnps
def merge_tree_nnps(tree):
"""
Takes a parse tree and merges any consecutive leaf nodes that come from NNPs
For example if there is a segment of:
(NP
(JJ old)
(NNP Pierre)
(NNP Vinken)
)
Returns:
(NP
(JJ old)
(NNP PierreVinken)
)
"""
# require a parented tree to get a subtrees tree position
p = ParentedTree.convert(tree)
# iterates subtrees of height 3. This is where NP's leading to NNP's leading to lexicalizations will be
for s in p.subtrees(filter=lambda s: s.height() == 3):
# merge NNP's in the list representation of this trees children: [(POS, word), ...]
new_noun_phrase = merge_tagged_nnps([(c.label(), c[0]) for c in s])
child_str = " ".join("(%s %s)" % (pos, word) for pos, word in new_noun_phrase)
# create new subtree with merged NNP's
new_s = ParentedTree.fromstring("(%s %s)" % (s.label(), child_str))
# replace old subtree with new subtree
p[s.treeposition()] = new_s
return Tree.convert(p)
示例4: lappinleasse
def lappinleasse(parsetree, i):
global entitySet
for np in parsetree.subtrees(lambda x: x.label() == 'NP'):
if 'PRP' in np[0].label():
if np[0,0].lower() == 'it' and ispleonastic(np, parsetree): continue
maxsalience = -1
referent = None
e = Entity(np, parsetree, i)
for entity in entitySet:
if entity.sentencenum >= i - 4 and e.agreeswith(entity) and maxsalience < entity.salience:
maxsalience = entity.salience
referent = entity
try:
referent.salience += e.salience
referent.gender = e.gender
referent.phrases.add(np[0,0] + str(i))
orig = np[0,0]
if np[0].label() == 'PRP$':
np[0] = ParentedTree.fromstring('(SUB <'+ referent.name + "'s>)")
print('PRP$ substitution', orig, '-->', referent.name)
else:
np[0] = ParentedTree.fromstring('(SUB <' + referent.name + '>)')
print('PRP substitution', orig, '-->', referent.name)
except:
print('No substitution found for ', orig)
continue
elif np[0].label() == 'EX': continue
else: entitySet.add(Entity(np, parsetree, i))
# print('Discourse model after sentence', i + 1, ':')
# for entity in entitySet: print(entity)
halve()
示例5: syntax_similarity_two_documents
def syntax_similarity_two_documents(self, doc1, doc2, average=False): #syntax similarity of two single documents
global numnodes
doc1sents = self.sent_detector.tokenize(doc1.strip())
doc2sents = self.sent_detector.tokenize(doc2.strip())
for s in doc1sents: # to handle unusual long sentences.
if len(s.split())>100:
return "NA"
for s in doc2sents:
if len(s.split())>100:
return "NA"
try: #to handle parse errors. Parser errors might happen in cases where there is an unsuall long word in the sentence.
doc1parsed = self.parser.raw_parse_sents((doc1sents))
doc2parsed = self.parser.raw_parse_sents((doc2sents))
except Exception as e:
sys.stderr.write(str(e))
return "NA"
costMatrix = []
doc1parsed = list(doc1parsed)
for i in range(len(doc1parsed)):
doc1parsed[i] = list(doc1parsed[i])[0]
doc2parsed = list(doc2parsed)
for i in range(len(doc2parsed)):
doc2parsed[i] = list(doc2parsed[i])[0]
for i in range(len(doc1parsed)):
numnodes = 0
sentencedoc1 = ParentedTree.convert(doc1parsed[i])
tempnode = Node(sentencedoc1.root().label())
new_sentencedoc1 = self.convert_mytree(sentencedoc1,tempnode)
temp_costMatrix = []
sen1nodes = numnodes
for j in range(len(doc2parsed)):
numnodes=0.0
sentencedoc2 = ParentedTree.convert(doc2parsed[j])
tempnode = Node(sentencedoc2.root().label())
new_sentencedoc2 = self.convert_mytree(sentencedoc2,tempnode)
ED = simple_distance(new_sentencedoc1, new_sentencedoc2)
ED = ED / (numnodes + sen1nodes)
temp_costMatrix.append(ED)
costMatrix.append(temp_costMatrix)
costMatrix = np.array(costMatrix)
if average==True:
return 1-np.mean(costMatrix)
else:
indexes = su.linear_assignment(costMatrix)
total = 0
rowMarked = [0] * len(doc1parsed)
colMarked = [0] * len(doc2parsed)
for row, column in indexes:
total += costMatrix[row][column]
rowMarked[row] = 1
colMarked [column] = 1
for k in range(len(rowMarked)):
if rowMarked[k]==0:
total+= np.min(costMatrix[k])
for c in range(len(colMarked)):
if colMarked[c]==0:
total+= np.min(costMatrix[:,c])
maxlengraph = max(len(doc1parsed),len(doc2parsed))
return 1-(total/maxlengraph)
示例6: test_exact_match
def test_exact_match():
tree = ParentedTree.fromstring('(S (NP (DT the) (JJ big) (NN cat)) (VP bit) (NP (DT a) (NN cat)))')
node = search_by_exact_string_matching(tree, 'cat')
assert_equal(len(node), 2)
assert_equal(node[0], ParentedTree.fromstring('(NN cat)'))
node = search_by_exact_string_matching(tree, 'a cat')
assert_equal(len(node), 1)
assert_equal(node[0], ParentedTree.fromstring('(NP (DT a) (NN cat))'))
示例7: get_sentence_posteriors
def get_sentence_posteriors(sentence, iterations=1, extra_meaning=None):
meaning_probs = {}
# parse sentence with charniak and apply surgeries
print 'parsing ...'
modparse = get_modparse(sentence)
t = ParentedTree.parse(modparse)
print '\n%s\n' % t.pprint()
num_ancestors = count_lmk_phrases(t) - 1
for _ in xrange(iterations):
(lmk, _, _), (rel, _, _) = get_meaning(num_ancestors=num_ancestors)
meaning = m2s(lmk,rel)
if meaning not in meaning_probs:
ps = get_tree_probs(t, lmk, rel)[0]
# print "Tree probs: ", zip(ps,rls)
meaning_probs[meaning] = np.prod(ps)
print '.'
if extra_meaning:
meaning = m2s(*extra_meaning)
if meaning not in meaning_probs:
ps = get_tree_probs(t, lmk, rel)[0]
# print "Tree prob: ", zip(ps,rls)
meaning_probs[meaning] = np.prod(ps)
print '.'
summ = sum(meaning_probs.values())
for key in meaning_probs:
meaning_probs[key] /= summ
return meaning_probs.items()
示例8: disfile2tree
def disfile2tree(dis_filepath):
"""converts a *.dis file into a ParentedTree (NLTK) instance"""
with open(dis_filepath) as f:
rst_tree_str = f.read().strip()
rst_tree_str = fix_rst_treebank_tree_str(rst_tree_str)
rst_tree_str = convert_parens_in_rst_tree_str(rst_tree_str)
return ParentedTree.fromstring(rst_tree_str)
示例9: findSentencePTreeToken
def findSentencePTreeToken(sentence, keyword):
import nltk
from nltk.tree import ParentedTree
stemmed = _lemma_(keyword)
tmp = proc.parse_doc(sentence)
i = 0
numSentences = len(tmp['sentences'])
rs = []
for i in range(0, numSentences):
p = tmp['sentences'][i]['parse']
ptree = ParentedTree.fromstring(p)
# rs = []
for i in range(0, len(ptree.leaves())):
tree_position = ptree.leaf_treeposition(i)
node = ptree[tree_position]
if _stem_(node)==stemmed:
tree_position = tree_position[0:len(tree_position)-1]
rs.append(ptree[tree_position])
# if len(rs)>0:
# return rs
return rs
示例10: parse
def parse(sentence, use_cache=True, parser='stanford'):
cache_key = "parse_trees_{0}".format(parser)
valid_lines = None
if use_cache:
cache_attempt = cache_get(cache_key, sentence)
if cache_attempt:
valid_lines = cache_attempt
if valid_lines is None:
if parser == "stanford":
response = parse_stanford(sentence, use_cache=use_cache)
elif parser == "malt":
response = parse_malt(sentence, use_cache=use_cache)
else:
return []
valid_lines = [line for line in response.split("\n") if len(line) > 2 and line[0] == "(" and line[-1] == ")"]
if use_cache:
cache_set(cache_key, sentence, valid_lines)
# throw away the garbgage we don't want from the parser's response.
# this could probably get us in trouble since it'll hide errors etc,
# but we got deadlines....
trees = [ParentedTree.parse(line) for line in valid_lines]
return trees
示例11: test_node_nocase
def test_node_nocase(self):
'''
Test selecting nodes using case insensitive node names.
'''
tree = ParentedTree.fromstring('(S (n x) (N x))')
self.assertEqual(list(tgrep.tgrep_positions('"N"', [tree])), [[(1,)]])
self.assertEqual(list(tgrep.tgrep_positions('[email protected]"N"', [tree])), [[(0,), (1,)]])
示例12: parse_sentences
def parse_sentences(self, filename, num_sentences):
"""Parses each one-line sentence into a syntax tree"""
# Open the file and parse a given number of sentences
f = open(filename, 'r')
if num_sentences == 'all':
num_sentences = -1
count = 0
for sentence in f.readlines()[:num_sentences]:
if count%10==0:
print("Number of sentences trained: ",count)
# Get possible parse trees
trees = self.parser.raw_parse(sentence.lower())
for tree in trees:
self.nonterminal_counts['ROOT'] += 1
tokenized_sentence = self.tokenize_sentence(sentence)
# Only extract rules from sentences with greater than 8 tokens,
# to avoid adding rules that generate short, ungrammatical sentences
if len(tokenized_sentence) > 8:
self.extract_rules(tree)
# Convert the tree into a ParentedTree,
# which is an NLTK tree that keeps pointers to each node's parent
ptree = ParentedTree.convert(tree)
# Calculate the bigram counts for this sentence
self.get_bigram(ptree, tokenized_sentence)
count+=1
示例13: test_use_macros
def test_use_macros(self):
'''
Test defining and using tgrep2 macros.
'''
tree = ParentedTree.fromstring(
'(VP (VB sold) (NP (DET the) '
'(NN heiress)) (NP (NN deed) (PREP to) '
'(NP (DET the) (NN school) (NN house))))'
)
self.assertEqual(
list(
tgrep.tgrep_positions(
'@ NP /^NP/;\[email protected] NN /^NN/;\[email protected] !< @NP !$.. @NN', [tree]
)
),
[[(1,), (2, 2)]],
)
# use undefined macro @CNP
self.assertRaises(
tgrep.TgrepException,
list,
tgrep.tgrep_positions(
'@ NP /^NP/;\[email protected] NN /^NN/;\[email protected] !< @NP !$.. @NN', [tree]
),
)
示例14: getConsituentTreeDistribution
def getConsituentTreeDistribution(core_nlp_files):
diff_productions = dict()
production_dict_for_files = dict()
for genre_file_path, genre_file_name in core_nlp_files:
production_dict = dict()
dictionary = dict()
with open(genre_file_path) as f:
lines = f.readlines()
assert len(lines) == 1
line = lines[0]
line = 'dictionary=' + line
exec(line)
# print genre_file_path, dictionary
sentences = dictionary[SENTENCES]
for sent in sentences:
parsetree = sent[PARSE_TREE]
t = ParentedTree.fromstring(parsetree)
prods = t.productions()
for prod in prods:
if prod not in diff_productions:
diff_productions[prod] = 0.0
if prod not in production_dict:
production_dict[prod] = 0.0
diff_productions[prod] += 1.0
production_dict[prod] += 1.0
production_dict_for_files[genre_file_name.replace('_corenlp1000.txt', '.txt')] = production_dict
return production_dict_for_files, diff_productions
示例15: get_modparse
def get_modparse(sentence):
"""returns the modified parse tree for a sentence"""
sp_db = SentenceParse.get_sentence_parse(sentence)
try:
res = sp_db.all()[0]
parsetree = res.original_parse
modparsetree = res.modified_parse
except:
print "parse.py: 103: " + sentence
parses = parse_sentences([sentence])
if len(parses) == 0:
raise ParseError(printcolors.WARNING + ('ParseError: a sentence was empty'))
modparses = modify_parses(parses)
for i,chunk in enumerate(modparses[:]):
for j,modparse in enumerate(chunk):
if 'LANDMARK-PHRASE' in modparse:
modparses[i] = modparse
parses[i] = parses[i][j]
break
if isinstance(modparses[i],list):
modparses[i] = modparses[i][0]
parses[i] = parses[i][0]
parsetree = parses[0]
modparsetree = modparses[0]
try:
SentenceParse.add_sentence_parse(sentence, parsetree, modparsetree)
except Exception as e:
print e
if count_lmk_phrases(ParentedTree.parse(modparsetree)) < 1:
raise ParseError(printcolors.WARNING + ('ParseError: Parse contained no Landmark phrase.\nSentence: %s\nParse: %s\nModparse: %s' % (sentence,parsetree,modparsetree)))
return parsetree, modparsetree