本文整理汇总了Python中simpleparse.parser.Parser.buildTagger方法的典型用法代码示例。如果您正苦于以下问题:Python Parser.buildTagger方法的具体用法?Python Parser.buildTagger怎么用?Python Parser.buildTagger使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类simpleparse.parser.Parser
的用法示例。
在下文中一共展示了Parser.buildTagger方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testTermCompression
# 需要导入模块: from simpleparse.parser import Parser [as 别名]
# 或者: from simpleparse.parser.Parser import buildTagger [as 别名]
def testTermCompression( self ):
"""Test that unreported productions are compressed
Term compression is basically an inlining of terminal
expressions into the calling table. At the moment
the terminal expressions are all duplicated, which may
balloon the size of the grammar, not sure if this will
be an actual problem. As written, this optimization
should provide a significant speed up, but there may
the even more of a speed up if we allow for sharing
the terminal tuples as well.
This:
a:=b <b>:= -c* c:='this'
Should eventually compress to this:
a := -'this'*
"""
failures = []
for first, second in [
("""a:=b <b>:= -c* c:='this'""", """a := -'this'*"""),
("""a:=b >b<:= c c:= 'this'""", """a := c c:= 'this'"""),
("""a:=b >b<:= c <c>:= 'this'""", """a := 'this'"""),
("""a:=b >b<:= c+ <c>:= 'this'""", """a := 'this'+"""),
# The following will never work, so eventually may raise
# an error or at least give a warning!
("""a:=b,c >b<:= c+ <c>:= 'this'""", """a := 'this'+,'this'"""),
("""a:=b/c >b<:= c+ <c>:= 'this'""", """a := 'this'+/'this'"""),
# This is requiring group-compression, which isn't yet written
("""a:=-b/c >b<:= c+ <c>:= 'this'""", """a := -'this'+/'this'"""),
("""a := (table1 / table2 / any_line)*
<any_line> := ANY*, EOL
<ANY> := -EOL
<EOL> := '\n'
table1 := 'a'
table2 := 'b'
""", """a := (table1 / table2 / (-'\n'*, '\n'))*
table1 := 'a'
table2 := 'b'
"""),
("""a:= b,c <b>:= -c* <c>:= '\n'""", """a := -'\n'*,'\n'"""),
]:
pFirst = Parser( first, "a")
pSecond = Parser( second, "a")
tFirst = pFirst.buildTagger()
tSecond = pSecond.buildTagger()
if not rcmp( tFirst , tSecond):
tFirstRepr = pprint.pformat(tFirst)
tSecondRepr = pprint.pformat(tSecond)
failures.append( """%(first)r did not produce the same parser as %(second)r\n\t%(tFirstRepr)s\n\t%(tSecondRepr)s"""%locals())
if failures:
raise ValueError( "\n".join(failures))
示例2: testTermSharing
# 需要导入模块: from simpleparse.parser import Parser [as 别名]
# 或者: from simpleparse.parser.Parser import buildTagger [as 别名]
def testTermSharing( self ):
"""Test that shared terminal productions are using the same parser"""
first =""" a := b,b >b<:= d d:= 'this'"""
pFirst = Parser( first, "a")
tFirst = pFirst.buildTagger()
b,c = tFirst
assert b is c, """Not sharing the same tuple for b and c instances"""
示例3: internal
# 需要导入模块: from simpleparse.parser import Parser [as 别名]
# 或者: from simpleparse.parser.Parser import buildTagger [as 别名]
class GeneratorAPI1:
"""Stand-in class supporting operation of SimpleParse 1.0 applications
There was really only the one method of interest, parserbyname,
everything else was internal (and is now part of
simpleparsegrammar.py).
"""
def __init__( self, production, prebuilt=() ):
from simpleparse.parser import Parser
self.parser = Parser( production, prebuilts=prebuilt )
def parserbyname( self, name ):
"""Retrieve a tag-table by production name"""
return self.parser.buildTagger( name )
示例4: EBNFSpill
# 需要导入模块: from simpleparse.parser import Parser [as 别名]
# 或者: from simpleparse.parser.Parser import buildTagger [as 别名]
class EBNFSpill(object):
DEFAULT_MAX_TIMES_CHAR = 35
DEFAULT_MAX_TIMES_FUNC = 10
DEFAULT_MAX_SELF_RECURSION = 25
DEFAULT_MAX_WALK_RECURSION = 100
def __init__(self,showTags=False,showTagsRecursive=False,recursionLevel=0):
self._reset()
self.showTags=showTags
self.showTagsRecursive=showTagsRecursive
self.recursionLevelObj=recursionLevel
if self.recursionLevelObj>self.DEFAULT_MAX_SELF_RECURSION: raise Exception("a")
#print "INIT",recursionLevel
pass
def __del__(self):
self.recursionLevelObj-=1
pass
def validate(self,data):
return self.parser.parse(data)
def setDeclaration(self,declaration,production):
self.parser = Parser(declaration, production)
self.table = self.parser.buildTagger(production=production)
def setTable(self,table,nodes=None):
self.table = table
self.nodes=nodes or self.nodes
def _reset(self):
self.nodes = {}
self.ctx = [] # context (infos like recurion for table2)
#self.recursionLevelObj=0
self.recursionLevelWalk=0
random.seed()
def setDefaults(self,**kwargs):
valid_defaults = [i for i in dir(self) if i.startswith("DEFAULT_")]
for k,v in kwargs.iteritems():
if k in valid_defaults:
setattr(self,k,v)
else:
raise Exception("Not allowed to change %s to %s (valid options: %s)"%(k,v,valid_defaults))
def getTable(self):
return self.table
def getTagName(self,node):
if self.showTags and node[0]:
return "<%s>"%node[0]
return ""
def checkTypeIterable(self,l):
return isinstance(l, collections.Iterable) and not isinstance(l, basestring)
def checkTypeIterableRecursive(self,l):
return isinstance(l, collections.Iterable) and not isinstance(l, basestring) and isinstance(l,tuple) and isinstance(l[0],list) and isinstance(l[1],int)
def checkTypeNodeBase(self,l):
#checks ( None|str, int, *)
return self.checkTypeIterable(l) and len(l)>=2 and (l[0]==None or isinstance(l[0],basestring)) and isinstance(l[1],int)
def checkTypeNodeWithChilds(self,l):
#print "check_",str(l)[:50]
try:
#print "check_metric",checkTypeNodeBase(l),len(l)>=3 , checkTypeIterable(l[2])
pass
except:
pass
return self.checkTypeNodeBase(l) and len(l)>=3 and self.checkTypeIterable(l[2])
def next(self):
return
def rndTimesFunc(self,sample_func,args,minlen=0,maxlen=None):
maxlen = maxlen or self.DEFAULT_MAX_TIMES_FUNC
maxlen+=1
out = ""
for i in range(random.randrange(minlen,maxlen)):
out+=sample_func(args)
return out
def rndTimes(self,sample,minlen=0,maxlen=None):
maxlen = maxlen or self.DEFAULT_MAX_TIMES_CHAR
maxlen+=1
out = ""
for i in range(random.randrange(minlen,maxlen)):
out+=sample
return out
def rndSelect(self,haystack,sample_len=1,minlen=0,maxlen=None):
maxlen = maxlen or self.DEFAULT_MAX_TIMES_CHAR
maxlen+=1
out = ""
for i in range(random.randrange(minlen,maxlen)):
out += "".join(random.sample(haystack,sample_len))
return out
def eval(self,node):
#.........这里部分代码省略.........
示例5: kombinationer
# 需要导入模块: from simpleparse.parser import Parser [as 别名]
# 或者: from simpleparse.parser.Parser import buildTagger [as 别名]
#.........这里部分代码省略.........
# ones (so that we don't mistake "3 § MBL" for "3 § MB"+"L")
# lawlist.sort(cmp=lambda x, y: len(y) - len(x))
lawlist.sort(key=len, reverse=True)
lawdecl = "LawAbbreviation ::= ('%s')\n" % "'/'".join(lawlist)
self.decl += lawdecl
self.roots.insert(0, "kortlagrumref")
if self.EGLAGSTIFTNING in args:
productions = self.load_ebnf(scriptdir + "/../../../res/etc/eglag.ebnf")
for p in productions:
self.uriformatter[p] = self.eglag_format_uri
self.roots.append("eglagref")
if self.FORARBETEN in args:
productions = self.load_ebnf(
scriptdir + "/../../../res/etc/forarbeten.ebnf")
for p in productions:
self.uriformatter[p] = self.forarbete_format_uri
self.roots.append("forarbeteref")
if self.RATTSFALL in args:
productions = self.load_ebnf(scriptdir + "/../../../res/etc/rattsfall.ebnf")
for p in productions:
self.uriformatter[p] = self.rattsfall_format_uri
self.roots.append("rattsfallref")
if self.EGRATTSFALL in args:
productions = self.load_ebnf(scriptdir + "/../../../res/etc/egratt.ebnf")
for p in productions:
self.uriformatter[p] = self.egrattsfall_format_uri
self.roots.append("ecjcaseref")
rootprod = "root ::= (%s/plain)+\n" % "/".join(self.roots)
self.decl += rootprod
self.parser = Parser(self.decl.encode(SP_CHARSET), "root")
self.tagger = self.parser.buildTagger("root")
# util.writefile("tagger.tmp", repr(self.tagger), SP_CHARSET)
# print "tagger length: %d" % len(repr(self.tagger))
self.verbose = False
self.depth = 0
# SFS-specifik kod
self.currentlaw = None
self.currentchapter = None
self.currentsection = None
self.currentpiece = None
self.lastlaw = None
self.currentlynamedlaws = {}
def load_ebnf(self, file):
"""Laddar in produktionerna i den angivna filen i den
EBNF-deklaration som används, samt returnerar alla
*Ref och *RefId-produktioner"""
# base.ebnf contains 0x1A, ie the EOF character on windows,
# therefore we need to read it in binary mode
f = open(file, 'rb')
# assume our ebnf files use the same charset
content = f.read(os.stat(file).st_size).decode(SP_CHARSET)
self.decl += content
f.close()
return [x.group(1) for x in re.finditer(r'(\w+(Ref|RefID))\s*::=', content)]
def get_relations(self, predicate):
d = {}
for obj, subj in self.graph.subject_objects(predicate):
d[six.text_type(subj)] = six.text_type(obj)
return d
示例6: __init__
# 需要导入模块: from simpleparse.parser import Parser [as 别名]
# 或者: from simpleparse.parser.Parser import buildTagger [as 别名]
class Reference:
LAGRUM = 1
KORTALAGRUM = 2
FORESKRIFTER = 3
FORARBETEN = 6
reUriSegments = re.compile(r'([\w]+://[^/]+/[^\d]*)(\d+:(bih\.[_ ]|N|)?\d+([_ ]s\.\d+|))#?(K([a-z0-9]+)|)(P([a-z0-9]+)|)(S(\d+)|)(N(\d+)|)')
reEscapeCompound = re.compile(r'\b(\w+-) (och) (\w+-?)(lagen|förordningen)\b', re.UNICODE)
reEscapeNamed = re.compile(r'\B(lagens?|balkens?|förordningens?|formens?|ordningens?|kungörelsens?|stadgans?)\b', re.UNICODE)
reDescapeCompound = re.compile(r'\b(\w+-)_(och)_(\w+-?)(lagen|förordningen)\b', re.UNICODE)
reDescapeNamed = re.compile(r'\|(lagens?|balkens?|förordningens?|formens?|ordningens?|kungörelsens?|stadgans?)')
reXmlCharref = re.compile('&#\d+;')
def __init__(self, *args):
scriptDir = os.getcwd()
self.graph = Graph()
n3File = Util.relpath(scriptDir + '/etc/sfs-extra.n3')
self.graph.load(n3File, format='n3')
self.roots = []
self.uriFormatter = {}
self.decl = ''
self.namedLaws = {}
self.loadEbnf(scriptDir + '/etc/base.ebnf')
self.args = args
if self.LAGRUM in args:
prods = self.loadEbnf(scriptDir + '/etc/lagrum.ebnf')
for p in prods:
self.uriFormatter[p] = self.sfsFormatUri
self.namedLaws.update(self.getRelationship(RDFS.label))
self.roots.append('sfsrefs')
self.roots.append('sfsref')
if self.KORTALAGRUM in args:
# TODO: Fix korta lagrum also
pass
if self.FORARBETEN in args:
prods = self.loadEbnf(scriptDir + '/etc/forarbeten.ebnf')
for p in prods:
self.uriFormatter[p] = self.forarbeteFormatUri
self.roots.append('forarbeteref')
self.decl += 'root ::= (%s/plain)+\n' % '/'.join(self.roots)
self.parser = Parser(self.decl, 'root')
self.tagger = self.parser.buildTagger('root')
self.depth = 0
#SFS specific settings
self.currentLaw = None
self.currentChapter = None
self.currentSection = None
self.currentPiece = None
self.lastLaw = None
self.currentNamedLaws = {}
def loadEbnf(self, file):
"""Loads the syntax from a given EBNF file"""
f = open(file)
syntax = f.read()
self.decl += syntax
f.close()
return [x.group(1) for x in re.finditer(r'(\w+(Ref|RefID))\s*::=', syntax)]
def getRelationship(self, predicate):
d = {}
for obj, subj in self.graph.subject_objects(predicate):
d[unicode(subj)] = unicode(obj)
return d
def parse(self, indata, baseUri='http://rinfo.lagrummet.se/publ/sfs/9999:999#K9P9S9P9',predicate=None):
if indata == '':
return indata
self.predicate = predicate
self.baseUri = baseUri
if baseUri:
m = self.reUriSegments.match(baseUri)
if m:
self.baseUriAttrs = {'baseUri' : m.group(1),
'law' : m.group(2),
'chapter' : m.group(6),
'section' : m.group(8),
'piece' : m.group(10),
'item' : m.group(12)}
else:
self.baseUriAttrs = {'baseUri':baseUri}
else:
self.baseUriAttrs = {}
fixedIndata = unicode(indata)
if self.LAGRUM in self.args:
fixedIndata = self.reEscapeCompound.sub(r'\1_\2_\3\4', fixedIndata)
fixedIndata = self.reEscapeNamed.sub(r'|\1', fixedIndata)
if isinstance(fixedIndata, unicode):
#.........这里部分代码省略.........