本文整理汇总了Python中nltk.grammar.is_nonterminal函数的典型用法代码示例。如果您正苦于以下问题:Python is_nonterminal函数的具体用法?Python is_nonterminal怎么用?Python is_nonterminal使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了is_nonterminal函数的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: apply
def apply(self, chart, grammar, edge):
if edge.is_complete(): return
nextsym, index = edge.nextsym(), edge.end()
if not is_nonterminal(nextsym): return
# If we've already applied this rule to an edge with the same
# next & end, and the chart & grammar have not changed, then
# just return (no new edges to add).
nextsym_with_bindings = edge.next_with_bindings()
done = self._done.get((nextsym_with_bindings, index), (None, None))
if done[0] is chart and done[1] is grammar:
return
for prod in grammar.productions(lhs=nextsym):
# If the left corner in the predicted production is
# leaf, it must match with the input.
if prod.rhs():
first = prod.rhs()[0]
if is_terminal(first):
if index >= chart.num_leaves(): continue
if first != chart.leaf(index): continue
# We rename vars here, because we don't want variables
# from the two different productions to match.
if unify(prod.lhs(), nextsym_with_bindings, rename_vars=True):
new_edge = FeatureTreeEdge.from_production(prod, edge.end())
if chart.insert(new_edge, ()):
yield new_edge
# Record the fact that we've applied this rule.
self._done[nextsym_with_bindings, index] = (chart, grammar)
示例2: remove_unitary_productions
def remove_unitary_productions(cfg_grammar):
"""
Remove unitary-productions that aren't terminals, by making sure all
downstream productions get trickled up
unfortunately, this is recursive, because you might create singletons as you're shifting things
Note, this does NOT detect cycles
"""
unary = False
productions = cfg_grammar.productions()
for production in productions:
if len(production) == 1:
# Identity the first unary productions
if is_nonterminal(production.rhs()[0]):
unary = production
break
if not unary:
# Base Case
return cfg_grammar
else:
# get all productions of B, so we can make them all productions of A
b_prods = cfg_grammar.productions(lhs=unary.rhs()[0])
b_rhses = [b_prod.rhs() for b_prod in b_prods]
existing_productions = [prod for prod in productions if prod != unary]
new_productions = [Production(unary.lhs(), b_rhs) for b_rhs in b_rhses]
new_grammar = CFG(cfg_grammar.start(), existing_productions+new_productions)
return remove_unitary_productions(new_grammar)
示例3: apply
def apply(self, chart, grammar, edge):
if edge.is_incomplete():
return
found = edge.lhs()
for prod in grammar.productions(rhs=found):
bindings = {}
if isinstance(edge, FeatureTreeEdge):
_next = prod.rhs()[0]
if not is_nonterminal(_next):
continue
# We rename vars here, because we don't want variables
# from the two different productions to match.
used_vars = find_variables(
(prod.lhs(),) + prod.rhs(), fs_class=FeatStruct
)
found = found.rename_variables(used_vars=used_vars)
result = unify(_next, found, bindings, rename_vars=False)
if result is None:
continue
new_edge = FeatureTreeEdge.from_production(
prod, edge.start()
).move_dot_forward(edge.end(), bindings)
if chart.insert(new_edge, (edge,)):
yield new_edge
示例4: _expand
def _expand(symbol,grammar):
if is_nonterminal(symbol):
rules = grammar.productions(lhs=symbol)
probs = [r.prob() for r in rules]
rule = choice(rules,p=probs)
return (rule.rhs(),log(rule.prob()))
else:
return ((symbol,),0.0)
示例5: SampleFromPCFG
def SampleFromPCFG(grammar,start=None):
if start==None:
tupleSymbols = (grammar.start(),)
else:
tupleSymbols = (start,)
lprob = 0.0
while any(is_nonterminal(symbol) for symbol in tupleSymbols):
tupleSymbols,lprob1 = _expand_seq(tupleSymbols,grammar)
lprob += lprob1
return (tupleSymbols,lprob)
示例6: gen_frame_line
def gen_frame_line(self, nt):
sentence = ''
prods = random.sample(self.cfg.productions(lhs=nt),len(self.cfg.productions(lhs=nt)))
valid = True
for prod in prods:
#valid = True
for sym in prod.rhs():
if is_nonterminal(sym):
if len(self.cfg.productions(lhs=sym)) < 1:
valid = False
if valid == True:
for sym in prod.rhs():
if is_nonterminal(sym):
sentence += self.gen_frame_line(sym)
else:
sentence += sym + ' '
break
if valid == False:
return "ERROR"
else:
return sentence #removed capitalize
示例7: check_is_nonterminal
def check_is_nonterminal(*nts):
"""
Asserts that all of one or more objects are Nonterminals.
:param nts: An object, which may or may not be a Nonterminal
:return: None
"""
for nt in nts:
if not gr.is_nonterminal(nt):
raise TypeError("{} must be a nonterminal".format({}))
return
示例8: _get_arg_product_rules
def _get_arg_product_rules(self, a_doc_id, a_arg, a_rel, a_parses):
"""Extract syntactic production rules for the given arg.
Args:
a_doc_id (str):
id of the document
a_arg (str):
argument to extract productions for
a_rel (dict):
discourse relation to extract features for
a_parses (dict):
parsed sentences
Returns:
set:
set of syntactic productions
"""
ret = set()
# obtain token indices for each arg sentence
snt_id = None
snt2tok = self._get_snt2tok(a_rel[a_arg][TOK_LIST])
# obtain set of leaves corresponding to that argument
arg_leaves = set()
subt_leaves = set()
processed_leaves = set()
itree = itree_str = inode_path = None
for snt_id, toks in snt2tok.iteritems():
itree_str = a_parses[a_doc_id][SENTENCES][snt_id][PARSE_TREE]
itree = Tree.fromstring(itree_str)
if not itree.leaves():
print("Invalid parse tree for sentence {:d}".format(snt_id),
file=sys.stderr)
continue
# obtain all terminal syntactic nodes from the arg
for itok in toks:
inode_path = itree.leaf_treeposition(itok)
arg_leaves.add(itree[inode_path])
# check all subtrees (not efficient, but easy to implement)
for s_t in itree.subtrees():
subt_leaves.update(s_t.leaves())
if subt_leaves.issubset(arg_leaves) and \
not subt_leaves.issubset(processed_leaves):
ret.update(str(p) for p in itree.productions()
if any(is_nonterminal(n)
for n in p.rhs()))
processed_leaves.update(subt_leaves)
subt_leaves.clear()
if processed_leaves == arg_leaves:
break
arg_leaves.clear()
processed_leaves.clear()
return ret
示例9: pcky
def pcky(sentence, grammar):
tokens = word_tokenize(sentence)
ts = '[0]'
for i, token in enumerate(tokens):
ts += ' ' + token + ' [{}]'.format(i + 1)
print(ts)
non_terminal = set([prod.lhs() for prod in grammar.productions() if is_nonterminal(prod.lhs())])
table = [[{nt: 0 for nt in non_terminal} for i in range(len(tokens) + 1)] for j in range(len(tokens) + 1)]
for i, token in enumerate(tokens):
productions = grammar.productions(rhs=token)
for prod in productions:
table[i][i + 1][prod.lhs()] = prod.prob()
for span in range(2, len(tokens) + 1):
for start in range(len(tokens) - span + 1):
end = start + span
for split in range(start + 1, end):
non_term1 = table[start][split]
non_term2 = table[split][end]
for nt1 in non_term1:
for nt2 in non_term2:
if non_term1[nt1] > 0 and non_term2[nt2] > 0:
prodlist = grammar.productions(rhs=nt1)
for prod in prodlist:
if prod.rhs() == (nt1, nt2):
table[start][end][prod.lhs()] = prod.prob() * non_term1[nt1] * non_term2[nt2]
print('[{}] {}:({:.2f}) [{}] {}:({:.2f}) [{}] -> [{}] {}:({:.5f}) [{}]'.format(start, nt1,
non_term1[nt1],
split, nt2,
non_term2[nt2],
end,
start,
prod.lhs(),
table[start][end][prod.lhs()],
end))
if table[0][len(tokens)][grammar.start()] > 0:
print('The sentence is derived from the grammar')
return True
else:
print('The sentence is not derived from the grammar')
return False