Python re.sub函数代码示例

本文整理汇总了Python中re.sub函数的典型用法代码示例。如果您正苦于以下问题：Python sub函数的具体用法？Python sub怎么用？Python sub使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了sub函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _parse_productions

    def _parse_productions(self):
        """
        Parse the current contents of the textwidget buffer, to create
        a list of productions.
        """
        productions = []

        # Get the text, normalize it, and split it into lines.
        text = self._textwidget.get('1.0', 'end')
        text = re.sub(self.ARROW, '->', text)
        text = re.sub('\t', ' ', text)
        lines = text.split('\n')

        # Convert each line to a CFG production
        for line in lines:
            line = line.strip()
            if line=='': continue
            productions += parse_cfg_production(line)
            #if line.strip() == '': continue
            #if not CFGEditor._PRODUCTION_RE.match(line):
            #    raise ValueError('Bad production string %r' % line)
            #
            #(lhs_str, rhs_str) = line.split('->')
            #lhs = Nonterminal(lhs_str.strip())
            #rhs = []
            #def parse_token(match, rhs=rhs):
            #    token = match.group()
            #    if token[0] in "'\"": rhs.append(token[1:-1])
            #    else: rhs.append(Nonterminal(token))
            #    return ''
            #CFGEditor._TOKEN_RE.sub(parse_token, rhs_str)
            #
            #productions.append(Production(lhs, *rhs))

        return productions

开发者ID:sfu-natlang，项目名称:nltk，代码行数:35，代码来源:cfg.py

示例2: getPanelInfo

 def getPanelInfo(self, doc, strXPath):
     try:
         npos = doc.text_content().find(strXPath)
         if npos == -1:
             return ""
         strContent = doc.text_content()[npos:-1]
         npos = strContent.find("})")
         if npos == -1:
             return ""
         strContent = strContent[0:npos+1]
         strContent = (strContent[strContent.find("\"html\":\"")+8:-4])
         if "v2" in self.xpathType:
             strContent = strContent.decode('unicode-escape')
         strContent = re.sub(r"(\\n)*(\\t)*(\\ /)*(\\)*", "", strContent)
         strContent = re.sub(r"\\/", "/", strContent)
         if strContent:
             strContent = strContent.replace("&lt;", "<").replace("&gt;", ">").replace("nbsp;", "")
         else:
             return ""
     except Exception:
         s=sys.exc_info()
         msg = (u"getPanelInfo Error %s happened on line %d" % (s[1],s[2].tb_lineno))
         logger.error(msg)
         return ""
     return strContent

开发者ID:CnPaMeng，项目名称:WeiboMsgBackupGUI，代码行数:25，代码来源:msgcomcrawler.py

示例3: GetBook

    def GetBook(self, book):
        self.footnotes=[]
        self.content=[]
        counter = 1
        plainBook = unicodeToPlain(book)
        while True:
            url='http://www.biblia.deon.pl/otworz.php'
            values={'ksiega': book.encode('iso8859_2'),
              'rozdzial': str(counter)}
            data=urllib.urlencode(values)
            response = urllib2.urlopen(urllib2.Request(url, data)).read()
            doc = html.fromstring(response)

            if counter == 1:
                BookTitle = (doc.findall('.//span[@style="font-size:22px;"]')[0])
                self.content.append(re.sub(r'</span>', r'</div>', re.sub(r'<span style=\"font-size:22px;\"',r'<br><br><a name="K' + plainBook + r'"></a><div class="tytul"', html.tostring(BookTitle))))
                ChaptersInBook = len(doc.findall('.//select[@name="rozdzial"]/option'))
            else:
                self.content.append('<br><br>')

            plainPrefix = plainBook + str(counter)
            self.content.append('<div class="numer">' + str(counter) + '</div>')
            Book.GetContent(self, doc.xpath('//div[@class="tresc"]')[0], plainPrefix)
            Book.GetFootnotes(self, doc.xpath('//td[@width="150"]/table/tr[5]/td/div[1]')[0], plainPrefix, unicodeToReference(book) + ' ' + str(counter))

            if counter == ChaptersInBook:
                self.content.append('<br><br>' + "".join(self.footnotes))
                break
            counter += 1

开发者ID:kamm，项目名称:project_b，代码行数:29，代码来源:project_b.py

示例4: extract_bow_v2_features

def extract_bow_v2_features(train, test, test_contains_labels = False):
    '''
    Performs feature extraction for another simple tfidf model used for 
    ensembling purposes.
    '''
    s_data = []
    s_labels = []
    t_data = []
    t_labels = []
    stemmer = PorterStemmer()    
    
    for i, row in train.iterrows():
        s=(" ").join(["q"+ z for z in BeautifulSoup(train["search_term"][i], "lxml").get_text(" ").split(" ")]) + " " + (" ").join(["z"+ z for z in BeautifulSoup(train.product_title[i], "lxml").get_text(" ").split(" ")]) + " " + BeautifulSoup(train.product_description[i], "lxml").get_text(" ")
        s=re.sub("[^a-zA-Z0-9]"," ", s)
        s= (" ").join([stemmer.stem(z) for z in s.split(" ")])
        s_data.append(s)
        s_labels.append(str(train["relevance"][i]))
    for i, row in test.iterrows():
        s=(" ").join(["q"+ z for z in BeautifulSoup(test["search_term"][i], "lxml").get_text().split(" ")]) + " " + (" ").join(["z"+ z for z in BeautifulSoup(test.product_title[i], "lxml").get_text().split(" ")]) + " " + BeautifulSoup(test.product_description[i], "lxml").get_text()
        s=re.sub("[^a-zA-Z0-9]"," ", s)
        s= (" ").join([stemmer.stem(z) for z in s.split(" ")])
        t_data.append(s)
        if test_contains_labels:
            t_labels.append(str(test["relevance"][i]))
            
    return (s_data, s_labels, t_data, t_labels)

开发者ID:pringlesLion，项目名称:mycs231n，代码行数:26，代码来源:extract.py

示例5: gen_xkcd_sub

def gen_xkcd_sub(msg, hook=False):
    # http://xkcd.com/1288/
    substitutions = {
        'witnesses': 'these dudes I know',
        'allegedly': 'kinda probably',
        'new study': 'tumblr post',
        'rebuild': 'avenge',
        'space': 'SPAAAAAACCCEEEEE',
        'google glass': 'virtual boy',
        'smartphone': 'pokedex',
        'electric': 'atomic',
        'senator': 'elf-lord',
        'car': 'cat',
        'election': 'eating contest',
        'congressional leaders': 'river spirits',
        'homeland security': 'homestar runner',
        'could not be reached for comment': 'is guilty and everyone knows it'
    }
    # http://xkcd.com/1031/
    substitutions['keyboard'] = 'leopard'
    # http://xkcd.com/1418/
    substitutions['force'] = 'horse'
    output = msg
    if not hook or random() < 0.001 or True:
        for text, replacement in substitutions.items():
            if text in output:
                output = re.sub(r"\b%s\b" % text, replacement, output)

    output = re.sub(r'(.*)(?:-ass )(.*)', r'\1 ass-\2', output)
    if msg == output:
        return None if hook else msg
    else:
        return output

开发者ID:tjcsl，项目名称:cslbot，代码行数:33，代码来源:textutils.py

示例6: classifyText

def classifyText( text, params ):
	start_time = params.my_time()
	#clean
	try: text = params.cleaner.clean_html( text )
	except: pass

	text = re.sub('<.*?>', ' ', text )
	text = re.sub('\s+', ' ', text )
	text = text.lower()

	#Tokenize
	tokens = re.findall('[a-z]+', text )

	#Remove stop words
	tokens_2 = []
	for t in tokens:
		if( not t in params.stopword_list ): tokens_2.append(t)

#	print tokens_2

	#Stem
	stems = []
	for t in tokens_2:
		stem = params.porterStemmer.stem( t, 0, len(t)-1 )
		stems.append(stem)

	z = 0#params.linear_classifier['{{intercept}}']+.6
	for s in stems:
		if s in params.linear_classifier:
#			print s, params.linear_classifier[s]
			z += params.linear_classifier[s]

	end_time = params.my_time()
	return ( z<0, [start_time, end_time, len(stems), z, 1/(1+math.exp(-z)), int(z>0)] )

开发者ID:dustin-stoltz，项目名称:snowcrawl，代码行数:34，代码来源:test3_processor.py

示例7: htmlify

 def htmlify (self, text):
     t=text.strip()
     #t=xml.sax.saxutils.escape(t)
     t="<p>%s</p>"%t
     t=re.sub('\n\n+','</p><p>',t)
     t=re.sub('\n','<br>',t)
     return t

开发者ID:ChrisWellington，项目名称:gourmet，代码行数:7，代码来源:html_exporter.py

示例8: convert_corpus

def convert_corpus(filepath, mapping, alignment, begin="xxBeGiN142xx", end="xxEnD142xx"):
    general_corpus = ''
    with open(filepath, 'rb') as f:
        general_corpus = re.sub('(' + begin + '\W+)+', ' . ', f.read())
        general_corpus = re.sub('\n+', ' this_is_n3wline ', general_corpus)

    corpus = []
    for token in general_corpus.split():
        if token.strip() == '.':
            if len(corpus) > 0:
                if '\n' not in corpus[-1]:
                    # If the token is punctuation assign a random punctuation.
                    corpus[-1] = corpus[-1] + random.choice(['.', '.', '.' , ',', ',' ',', '!', '?'])
        elif token.strip() == 'this_is_n3wline':
            corpus[-1] = corpus[-1] + '.\n\n'
        elif alignment[token] in mapping:
            if  len(corpus) > 0 and re.search('[\n\.!?]',corpus[-1]):
                corpus.append(mapping[alignment[token]].capitalize().strip())
            else:
                corpus.append(mapping[alignment[token]].strip())
    corpus[0] = corpus[0].capitalize()
    output = ' '.join(corpus)
    output = re.sub(r' +', ' ', output)
    output = re.sub(r'\n+ ', '\n\n', output)
    return output

开发者ID:lizrush，项目名称:shorties，代码行数:25，代码来源:convert_vocab.py

示例9: getCategoryUrl

def getCategoryUrl(site="",url=""):
    catDb = openTable(tableName=global_setting['catTable'])
    r = session.get(url)
    if not r.text:
        return False

    soup = BeautifulSoup(r.text)
    for level1 in soup.select('.classify_books'):
        curLevel1 = level1.select('.classify_title')[0].text
        curLevel1 = re.sub('\s', '', curLevel1)
        for level2 in level1.select('.classify_kind'):
            curLevel2 = level2.select('.classify_kind_name')[0].text
            curLevel2 = re.sub('\s', '', curLevel2)
            for level3 in level2.select('ul li a'):
                #curLevel3 = re.sub('\s', '', level3.text)
                curLevel3 =  level3.text.strip()
                curlUrl = level3['href']
                retFind = re.findall(r'\/cp(.*)\.html',curlUrl)
                if retFind:
                    curCatID = retFind[0]
                    catType = 'book'
                else:
                    retFind = re.findall(r'\/cid(.*)\.html',curlUrl)
                    if retFind:
                        curCatID = retFind[0]
                        catType = 'nonbook'
                if retFind:
                    if catDb.find({'catId':curCatID}).count() >0:
                        logger.debug('catetogy %s exists,skip\n'%(curCatID))
                    else:
                        catDb.insert({'catId':curCatID,'level1':curLevel1, 'level2':curLevel2, 'level3':curLevel3, 'catUrl':curlUrl,'catType':catType, 'site':site})
    return True

开发者ID:Neilfu，项目名称:NLP，代码行数:32，代码来源:getEcommence_dangdang.py

示例10: obfuscate_codeblocks

def obfuscate_codeblocks(source):
  """Method for obfuscating codeblocks contents.

  It can be often useful to temporarly obfuscate codeblocks contents for performing safely some tasks
  and then re-introducing them.

  Parameters
  ----------
  source : str
    string (as single stream) containing the source

  Returns
  -------
  protected_contents : list
    list of str containing the contents of codeblocks
  str
    source with codeblocks contents obfuscated and replaced by a safe placeholder

  >>> source = '``` my code block ``` other contents'
  >>> prot, ob_source = obfuscate_codeblocks(source)
  >>> prot[0][2]
  '``` my code block ```'
  >>> ob_source
  '$PROTECTED-1 other contents'
  """
  obfuscate_source = source
  protected_contents = []
  for match in re.finditer(__regex_codeblock__,obfuscate_source):
    protected_contents.append([match.start(),match.end(),match.group()])
    obfuscate_source = re.sub(__regex_codeblock__,'$PROTECTED-'+str(len(protected_contents)),obfuscate_source,1)
  for match in re.finditer(__regex_codeblock_html__,obfuscate_source):
    protected_contents.append([match.start(),match.end(),match.group()])
    obfuscate_source = re.sub(__regex_codeblock_html__,'$PROTECTED-'+str(len(protected_contents)),obfuscate_source,1)
  return protected_contents,obfuscate_source

开发者ID:nunb，项目名称:MaTiSSe，代码行数:34，代码来源:source_editor.py

示例11: makeIdentifier

 def makeIdentifier(self, string):
   string = re.sub( r"\s+", " ", string.strip())
   string = unicodedata.normalize('NFKD', safeEncode(string))
   string = re.sub(r"['\"[email protected]#$&%^*\(\)_+\.,;:/]","", string)
   string = re.sub(r"[_ ]+","_", string)
   string = string.strip('_')
   return string.strip().lower()

开发者ID:RussianPlex，项目名称:WikipediaRu，代码行数:7，代码来源:__init__.py

示例12: _sanitize

 def _sanitize(self, data):
     retv = ''
     if data.find('\x1b') != -1:
         tmp = filter(lambda x: x in string.printable, data)
         retv += re.sub('(\{|\}|\*|\%)', '', re.sub('\[[0-9\;]+m', '', tmp))
         return retv
     return data

开发者ID:fuzzy，项目名称:abused，代码行数:7，代码来源:emerge.py

示例13: _clean_text

    def _clean_text(self, text):
        """ Cleans up text before we make it into an HTML tree:
            1. Nukes <![CDATA stuff.
            2. Nukes XML encoding declarations
            3. Replaces </br> with <br/>
            4. Nukes invalid bytes in input
            5. ?
        """
        # Remove <![CDATA because it causes breakage in lxml.
        text = re.sub(r"<!\[CDATA\[", u"", text)
        text = re.sub(r"\]\]>", u"", text)

        # Remove <?xml> declaration in Unicode objects, because it causes an error:
        # "ValueError: Unicode strings with encoding declaration are not supported."
        # Note that the error only occurs if the <?xml> tag has an "encoding"
        # attribute, but we remove it in all cases, as there's no downside to
        # removing it. This moves our encoding detection to chardet, rather than
        # lxml.
        if isinstance(text, unicode):
            text = re.sub(r"^\s*<\?xml\s+.*?\?>", "", text)

        # Fix </br>
        text = re.sub("</br>", "<br/>", text)

        # Fix invalid bytes (http://stackoverflow.com/questions/8733233/filtering-out-certain-bytes-in-python)
        text = re.sub(u"[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\u10000-\u10FFFF]+", "", text)

        return text

开发者ID:m4h7，项目名称:juriscraper，代码行数:28，代码来源:AbstractSite.py

示例14: sendGPS

    def sendGPS(self, head, cmd):
        maxcnt = 100
        string = head + cmd
        res = ""
        print "GPS SEND: '%s'" % string

        self.dev.flushInput()
        self.dev.write(string + "\r\n")

        for j in xrange(maxcnt):
          res = self.dev.readline()
          if len(res) > 0:
            res = re.sub("^\s+", "", res)
            res = re.sub("\s+$", "", res)
            print "RAW GPS REPLY: '%s'" % res

            pos = res.find(head)
            if pos != -1:
              res = res[pos:].split("*")[0]
              print "GPS REPLY: '%s'" % res
              return res
          else:
            print "ZERO REPLY"
            return None

        print "sendGPS: FAILED: '%s'" % res

开发者ID:ghalfacree，项目名称:clock-tamer，代码行数:26，代码来源:tamerdevice.py

示例15: main

def main():
    cur_dir = os.path.dirname(__file__)
    os.chdir(os.path.join(cur_dir, ".."))
    modules = sys.argv[1:]

    if not modules:
        modules = ['django_evolution']

    p = subprocess.Popen(['pyflakes'] + modules,
                         stderr=subprocess.PIPE,
                         stdout=subprocess.PIPE,
                         close_fds=True)

    contents = p.stdout.readlines()
    # Read in the exclusions file
    exclusions = {}
    fp = open(os.path.join(cur_dir, "pyflakes.exclude"), "r")

    for line in fp.readlines():
        if not line.startswith("#"):
            exclusions[line.rstrip()] = 1

    fp.close()

    # Now filter thin
    for line in contents:
        line = line.rstrip()
        test_line = re.sub(r':[0-9]+:', r':*:', line, 1)
        test_line = re.sub(r'line [0-9]+', r'line *', test_line)

        if test_line not in exclusions:
            print line

开发者ID:Aeron，项目名称:django-evolution，代码行数:32，代码来源:run-pyflakes.py

注：本文中的re.sub函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。