Python Parser.buildTagger方法代码示例

本文整理汇总了Python中simpleparse.parser.Parser.buildTagger方法的典型用法代码示例。如果您正苦于以下问题：Python Parser.buildTagger方法的具体用法？Python Parser.buildTagger怎么用？Python Parser.buildTagger使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类simpleparse.parser.Parser的用法示例。

在下文中一共展示了Parser.buildTagger方法的6个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: testTermCompression

# 需要导入模块: from simpleparse.parser import Parser [as 别名]
# 或者: from simpleparse.parser.Parser import buildTagger [as 别名]
    def testTermCompression( self ):
        """Test that unreported productions are compressed

        Term compression is basically an inlining of terminal
        expressions into the calling table.  At the moment
        the terminal expressions are all duplicated, which may
        balloon the size of the grammar, not sure if this will
        be an actual problem.  As written, this optimization
        should provide a significant speed up, but there may
        the even more of a speed up if we allow for sharing
        the terminal tuples as well.

        This:
            a:=b <b>:= -c* c:='this'
        Should eventually compress to this:
            a := -'this'*
        """
        failures = []
        for first, second in [
            ("""a:=b <b>:= -c* c:='this'""", """a := -'this'*"""),
            ("""a:=b >b<:= c c:= 'this'""", """a := c c:= 'this'"""),
            ("""a:=b >b<:= c <c>:= 'this'""", """a := 'this'"""),
            ("""a:=b >b<:= c+ <c>:= 'this'""", """a := 'this'+"""),
            # The following will never work, so eventually may raise
            # an error or at least give a warning!
            ("""a:=b,c >b<:= c+ <c>:= 'this'""", """a := 'this'+,'this'"""),
            ("""a:=b/c >b<:= c+ <c>:= 'this'""", """a := 'this'+/'this'"""),
            # This is requiring group-compression, which isn't yet written
            ("""a:=-b/c >b<:= c+ <c>:= 'this'""", """a := -'this'+/'this'"""),
            ("""a    := (table1 / table2 / any_line)*
  <any_line> := ANY*, EOL
  <ANY>      := -EOL
  <EOL>      := '\n'
  table1 := 'a'
  table2 := 'b'
  """, """a    := (table1 / table2 / (-'\n'*, '\n'))*
    table1 := 'a'
  table2 := 'b'
"""),
            ("""a:= b,c <b>:= -c* <c>:= '\n'""", """a := -'\n'*,'\n'"""),
            
        ]:
            pFirst = Parser( first, "a")
            pSecond = Parser( second, "a")
            tFirst = pFirst.buildTagger()
            tSecond = pSecond.buildTagger()
            if not rcmp( tFirst , tSecond):
                tFirstRepr = pprint.pformat(tFirst)
                tSecondRepr = pprint.pformat(tSecond)
                failures.append( """%(first)r did not produce the same parser as %(second)r\n\t%(tFirstRepr)s\n\t%(tSecondRepr)s"""%locals())
        if failures:
            raise ValueError( "\n".join(failures))

开发者ID:johnfkraus，项目名称:pyparseurl，代码行数:54，代码来源:test_optimisation.py

示例2: testTermSharing

# 需要导入模块: from simpleparse.parser import Parser [as 别名]
# 或者: from simpleparse.parser.Parser import buildTagger [as 别名]
 def testTermSharing( self ):
     """Test that shared terminal productions are using the same parser"""
     first =""" a := b,b >b<:= d d:= 'this'"""
     pFirst = Parser( first, "a")
     tFirst = pFirst.buildTagger()
     b,c = tFirst
     assert b is c, """Not sharing the same tuple for b and c instances"""

开发者ID:johnfkraus，项目名称:pyparseurl，代码行数:9，代码来源:test_optimisation.py

示例3: internal

# 需要导入模块: from simpleparse.parser import Parser [as 别名]
# 或者: from simpleparse.parser.Parser import buildTagger [as 别名]
class GeneratorAPI1:
	"""Stand-in class supporting operation of SimpleParse 1.0 applications

	There was really only the one method of interest, parserbyname,
	everything else was internal (and is now part of
	simpleparsegrammar.py).
	"""
	def __init__( self, production, prebuilt=() ):
		from simpleparse.parser import Parser
		self.parser = Parser( production, prebuilts=prebuilt )
	def parserbyname( self, name ):
		"""Retrieve a tag-table by production name"""
		return self.parser.buildTagger( name )

开发者ID:johnfkraus，项目名称:pyparseurl，代码行数:15，代码来源:generator.py

示例4: EBNFSpill

# 需要导入模块: from simpleparse.parser import Parser [as 别名]
# 或者: from simpleparse.parser.Parser import buildTagger [as 别名]
class EBNFSpill(object):
    DEFAULT_MAX_TIMES_CHAR = 35
    DEFAULT_MAX_TIMES_FUNC = 10
    DEFAULT_MAX_SELF_RECURSION = 25
    DEFAULT_MAX_WALK_RECURSION = 100

    def __init__(self,showTags=False,showTagsRecursive=False,recursionLevel=0):
        self._reset()
        self.showTags=showTags
        self.showTagsRecursive=showTagsRecursive
        self.recursionLevelObj=recursionLevel
        
        if self.recursionLevelObj>self.DEFAULT_MAX_SELF_RECURSION: raise Exception("a")
        #print "INIT",recursionLevel
        pass
    
    def __del__(self):
        self.recursionLevelObj-=1
        pass
    
    def validate(self,data):
        return self.parser.parse(data)
    
    def setDeclaration(self,declaration,production):
        self.parser = Parser(declaration, production)
        self.table =  self.parser.buildTagger(production=production)
    
    def setTable(self,table,nodes=None):
        self.table = table
        self.nodes=nodes or self.nodes
    
    def _reset(self):
        self.nodes = {}
        self.ctx = []       # context (infos like recurion for table2)
        #self.recursionLevelObj=0
        self.recursionLevelWalk=0
        random.seed()
        
    def setDefaults(self,**kwargs):
        valid_defaults = [i for i in dir(self) if i.startswith("DEFAULT_")]
        for k,v in kwargs.iteritems():
            if k in valid_defaults:
                setattr(self,k,v)
            else:
                raise Exception("Not allowed to change %s to %s (valid options: %s)"%(k,v,valid_defaults))
    
    def getTable(self):
        return self.table

    def getTagName(self,node):
        if self.showTags and node[0]:
            return "<%s>"%node[0]
        return ""
    
    def checkTypeIterable(self,l):
        return isinstance(l, collections.Iterable) and not isinstance(l, basestring)
    def checkTypeIterableRecursive(self,l):
        return isinstance(l, collections.Iterable) and not isinstance(l, basestring) and isinstance(l,tuple) and isinstance(l[0],list) and isinstance(l[1],int)

    def checkTypeNodeBase(self,l):
        #checks ( None|str, int, *)
        return self.checkTypeIterable(l) and len(l)>=2 and (l[0]==None or isinstance(l[0],basestring)) and isinstance(l[1],int)
    def checkTypeNodeWithChilds(self,l):
        #print "check_",str(l)[:50]
        try:
            #print "check_metric",checkTypeNodeBase(l),len(l)>=3 , checkTypeIterable(l[2])
            pass
        except:
            pass
        return self.checkTypeNodeBase(l) and len(l)>=3 and self.checkTypeIterable(l[2])
    
    def next(self):
        return

    def rndTimesFunc(self,sample_func,args,minlen=0,maxlen=None):
        maxlen = maxlen or self.DEFAULT_MAX_TIMES_FUNC
        maxlen+=1
        out = ""
        for i in range(random.randrange(minlen,maxlen)):
            out+=sample_func(args)
        return out   

    def rndTimes(self,sample,minlen=0,maxlen=None):
        maxlen = maxlen or self.DEFAULT_MAX_TIMES_CHAR
        maxlen+=1
        out = ""
        for i in range(random.randrange(minlen,maxlen)):
            out+=sample
        return out
    
    def rndSelect(self,haystack,sample_len=1,minlen=0,maxlen=None):
        maxlen = maxlen or self.DEFAULT_MAX_TIMES_CHAR
        maxlen+=1
        out = ""
        for i in range(random.randrange(minlen,maxlen)):
            out += "".join(random.sample(haystack,sample_len))
        
        return out

    def eval(self,node):
#.........这里部分代码省略.........

开发者ID:tintinweb，项目名称:EBNFSpill，代码行数:103，代码来源:EBNFSpill.py

示例5: kombinationer

# 需要导入模块: from simpleparse.parser import Parser [as 别名]
# 或者: from simpleparse.parser.Parser import buildTagger [as 别名]

#.........这里部分代码省略.........
            # ones (so that we don't mistake "3 § MBL" for "3 § MB"+"L")
            # lawlist.sort(cmp=lambda x, y: len(y) - len(x))
            lawlist.sort(key=len, reverse=True)
            lawdecl = "LawAbbreviation ::= ('%s')\n" % "'/'".join(lawlist)
            self.decl += lawdecl
            self.roots.insert(0, "kortlagrumref")

        if self.EGLAGSTIFTNING in args:
            productions = self.load_ebnf(scriptdir + "/../../../res/etc/eglag.ebnf")
            for p in productions:
                self.uriformatter[p] = self.eglag_format_uri
            self.roots.append("eglagref")
        if self.FORARBETEN in args:
            productions = self.load_ebnf(
                scriptdir + "/../../../res/etc/forarbeten.ebnf")
            for p in productions:
                self.uriformatter[p] = self.forarbete_format_uri
            self.roots.append("forarbeteref")
        if self.RATTSFALL in args:
            productions = self.load_ebnf(scriptdir + "/../../../res/etc/rattsfall.ebnf")
            for p in productions:
                self.uriformatter[p] = self.rattsfall_format_uri
            self.roots.append("rattsfallref")
        if self.EGRATTSFALL in args:
            productions = self.load_ebnf(scriptdir + "/../../../res/etc/egratt.ebnf")
            for p in productions:
                self.uriformatter[p] = self.egrattsfall_format_uri
            self.roots.append("ecjcaseref")

        rootprod = "root ::= (%s/plain)+\n" % "/".join(self.roots)
        self.decl += rootprod

        self.parser = Parser(self.decl.encode(SP_CHARSET), "root")
        self.tagger = self.parser.buildTagger("root")
        # util.writefile("tagger.tmp", repr(self.tagger), SP_CHARSET)
        # print "tagger length: %d" % len(repr(self.tagger))
        self.verbose = False
        self.depth = 0

        # SFS-specifik kod
        self.currentlaw = None
        self.currentchapter = None
        self.currentsection = None
        self.currentpiece = None
        self.lastlaw = None
        self.currentlynamedlaws = {}

    def load_ebnf(self, file):
        """Laddar in produktionerna i den angivna filen i den
        EBNF-deklaration som används, samt returnerar alla
        *Ref och *RefId-produktioner"""
        # base.ebnf contains 0x1A, ie the EOF character on windows,
        # therefore we need to read it in binary mode

        f = open(file, 'rb')
        # assume our ebnf files use the same charset
        content = f.read(os.stat(file).st_size).decode(SP_CHARSET)
        self.decl += content
        f.close()
        return [x.group(1) for x in re.finditer(r'(\w+(Ref|RefID))\s*::=', content)]

    def get_relations(self, predicate):
        d = {}
        for obj, subj in self.graph.subject_objects(predicate):
            d[six.text_type(subj)] = six.text_type(obj)
        return d

开发者ID:h4ck3rm1k3，项目名称:ferenda，代码行数:70，代码来源:legalref.py

示例6: init

# 需要导入模块: from simpleparse.parser import Parser [as 别名]
# 或者: from simpleparse.parser.Parser import buildTagger [as 别名]
class Reference:
	LAGRUM 			= 1
	KORTALAGRUM 	= 2
	FORESKRIFTER 	= 3
	FORARBETEN 		= 6

	reUriSegments 		= re.compile(r'([\w]+://[^/]+/[^\d]*)(\d+:(bih\.[_ ]|N|)?\d+([_ ]s\.\d+|))#?(K([a-z0-9]+)|)(P([a-z0-9]+)|)(S(\d+)|)(N(\d+)|)')
	reEscapeCompound 	= re.compile(r'\b(\w+-) (och) (\w+-?)(lagen|förordningen)\b', re.UNICODE)
	reEscapeNamed 		= re.compile(r'\B(lagens?|balkens?|förordningens?|formens?|ordningens?|kungörelsens?|stadgans?)\b', re.UNICODE)

	reDescapeCompound 	= re.compile(r'\b(\w+-)_(och)_(\w+-?)(lagen|förordningen)\b', re.UNICODE)
	reDescapeNamed 		= re.compile(r'\|(lagens?|balkens?|förordningens?|formens?|ordningens?|kungörelsens?|stadgans?)')
	reXmlCharref		= re.compile('&#\d+;')

	def __init__(self, *args):
		scriptDir = os.getcwd()

		self.graph = Graph()
		n3File = Util.relpath(scriptDir + '/etc/sfs-extra.n3')
		self.graph.load(n3File, format='n3')

		self.roots = []
		self.uriFormatter = {}
		self.decl = ''
		self.namedLaws = {}
		self.loadEbnf(scriptDir + '/etc/base.ebnf')
		self.args = args
		
		if self.LAGRUM in args:
			prods = self.loadEbnf(scriptDir + '/etc/lagrum.ebnf')
			for p in prods: 
				self.uriFormatter[p] = self.sfsFormatUri
			self.namedLaws.update(self.getRelationship(RDFS.label))
			self.roots.append('sfsrefs')
			self.roots.append('sfsref')

		if self.KORTALAGRUM in args:
			# TODO: Fix korta lagrum also
			pass

		if self.FORARBETEN in args:
			prods = self.loadEbnf(scriptDir + '/etc/forarbeten.ebnf')
			for p in prods:
				self.uriFormatter[p] = self.forarbeteFormatUri
			self.roots.append('forarbeteref')

		self.decl += 'root ::= (%s/plain)+\n' % '/'.join(self.roots)
		self.parser = Parser(self.decl, 'root')
		self.tagger = self.parser.buildTagger('root')
		self.depth 	= 0

		#SFS specific settings
		self.currentLaw 		= None
		self.currentChapter 	= None
		self.currentSection 	= None
		self.currentPiece		= None
		self.lastLaw			= None
		self.currentNamedLaws	= {}

	def loadEbnf(self, file):
		"""Loads the syntax from a given EBNF file"""
		f = open(file)
		syntax = f.read()
		self.decl += syntax
		f.close()
		return [x.group(1) for x in re.finditer(r'(\w+(Ref|RefID))\s*::=', syntax)]

	def getRelationship(self, predicate):
		d = {}
		for obj, subj in self.graph.subject_objects(predicate):
			d[unicode(subj)] = unicode(obj)
		return d

	def parse(self, indata, baseUri='http://rinfo.lagrummet.se/publ/sfs/9999:999#K9P9S9P9',predicate=None):
		if indata == '':
			return indata
		self.predicate = predicate
		self.baseUri = baseUri
		if baseUri:
			m = self.reUriSegments.match(baseUri)
			if m:
				self.baseUriAttrs = {'baseUri'	: m.group(1),
									 'law'		: m.group(2),
									 'chapter'	: m.group(6),
									 'section'	: m.group(8),
									 'piece'	: m.group(10),
									 'item'		: m.group(12)}
			else:
				self.baseUriAttrs = {'baseUri':baseUri}
		else:
			self.baseUriAttrs = {}


		fixedIndata = unicode(indata)
		
		if self.LAGRUM in self.args:
			fixedIndata = self.reEscapeCompound.sub(r'\1_\2_\3\4', fixedIndata)
			fixedIndata = self.reEscapeNamed.sub(r'|\1', fixedIndata)

		if isinstance(fixedIndata, unicode):
#.........这里部分代码省略.........

开发者ID:Sup3rgnu，项目名称:lawParse，代码行数:103，代码来源:Reference.py