本文整理汇总了Python中grammar.Grammar.get_lexicon方法的典型用法代码示例。如果您正苦于以下问题:Python Grammar.get_lexicon方法的具体用法?Python Grammar.get_lexicon怎么用?Python Grammar.get_lexicon使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类grammar.Grammar
的用法示例。
在下文中一共展示了Grammar.get_lexicon方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from grammar import Grammar [as 别名]
# 或者: from grammar.Grammar import get_lexicon [as 别名]
class BottomUpChartParser:
grammar = None # Grammar object that includes lexicon and
# production rules
queue = None # Queue object on which new edges are stacked
chart = None # Chart object in which edges are stored for the
# final parse generation
sentence_length = 0
will_print_chart = True # Set to false if you want to deactivate printing of the found parses
def __init__(self, grammar):
self.grammar = Grammar(grammar)
def parse(self, sentence, number_of_parses, strategy):
'''
Parse the input sentence
This is the central method to be called from outside.
'''
### Preprocessing ###
# Tokenize input sentence
tokens = self.tokenize(sentence)
self.sentence_length = len(tokens)
# Check for unknown tokens
unknown_words = self.get_unknown_words(tokens)
if unknown_words:
# TODO: Run fallback solutions to fix unknown words, else
# raise exception
raise ParseException("Sentence contains unknown words (%s). Please try again!" % ', '.join(unknown_words))
### Main steps ###
# (1) Initialize empty chart and queue
self.initialize_chart()
self.initialize_queue(strategy)
# (2) For every token, create a complete edge and push it to
# the queue
self.init_rule(tokens)
# Iteration counter for evaluation purposes
iters = 0
# (3) Repeat until no more edges are added
# or sufficient number of parses has been found:
while not self.queue.is_empty() and not self.enough_parses_found(number_of_parses):
iters = iters + 1
# (3.1) Add next element on queue to the chart
edge = self.queue.get_next_edge()
self.chart.add_edge(edge)
# (3.2) If input edge is complete,
# apply predict rule and fundamental rule.
# If input edge is incomplete,
# apply fundamental rule only
if edge.is_complete():
self.predict_rule(edge)
self.fundamental_rule(edge)
# (3.3) For alt search strategy, run search rule
# if input edge is a complete parse
# or last element of priority queue
if strategy == 'altsearch':
if ( ( (not self.queue.is_priority_active()) # Case 1: Complete parse was added to chart
and edge.get_prod_rule().get_lhs() == 'S'
and edge.is_complete()
and edge.get_start() == 0
and edge.get_end() == self.sentence_length )
or (self.queue.is_priority_active() # Case 2: Priority queue emptied
and self.queue.is_priority_empty() ) ):
self.search_rule(edge)
# 4) Display generated parses
s_edges = self.chart.get_s_edges()
print '%s parses found after %s iterations:' % (len(s_edges),iters)
if self.will_print_chart :
self.display_parses()
else:
for s_edge in s_edges:
print 'Found s-edge: %s' % s_edge
def tokenize(self, sentence):
'''
Separate a sentence into a list of tokens and return the list.
Currently this simply splits at each whitespace character with no
special preprocessing
'''
return sentence.split()
def get_unknown_words(self, tokens):
'''
Check list of tokens for unknown words by consulting the
lexicon and return them
'''
lexicon = self.grammar.get_lexicon()
unknown_words = [token for token in tokens if token not in lexicon]
return unknown_words
def initialize_chart(self):
#.........这里部分代码省略.........