本文整理汇总了Python中re.purge函数的典型用法代码示例。如果您正苦于以下问题:Python purge函数的具体用法?Python purge怎么用?Python purge使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了purge函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: finditer
def finditer(content, encodings, charset, min_size):
'''Generator function that iterates over all string matches inside the given content which are at least
min_size characters long.
@param content Binary content to search in
@param encodings Dictionary of encoding functions
@param charset An interable object containing the characters to consider as part of a string
@param min_size Minimal string size to consider as a string match
@return A tuple containing the match offset in content, encoding name, encoding key and the deobfuscated
string reconstructed from the blob found
'''
# iterate over available encoding fucntions
for encoding_name, (encoding_function, encoding_range) in encodings.items():
# iterate over all keys in range for that encoding function
for key in encoding_range:
encoded_charset = encoding_function(charset, key)
pattern = '[%s]{%d,}' % (re.escape(encoded_charset), min_size)
for match in re.finditer(pattern, content):
# deobfuscation: reconstruct the original string
deobf = ''.join(charset[encoded_charset.index(c)] for c in match.group(0))
yield (match.start(0), encoding_name, key, deobf)
# cleanup regex cache once in a while
re.purge()
示例2: color
def color ( adjoining_words_i, data, balises ):
"""Colorie les groupes de mots contigus dans une page web"""
n = len(adjoining_words_i) + 1
# on commence par les groupes les plus longs
for i in xrange( n, 1, -1 ):
# pour chaque groupe de mots
for j in adjoining_words_i[i]:
text = u'(\A|\W)(%s)(\W|\Z)'%( string.join([j[0][k] for k in range(0,i)] ,'(?:(?:</span>\W?)|\W)') )
pattern1 = re.compile(text, re.I|re.U|re.S)
replace = u'\g<1><span class="%s" style="color:blue; background-color:grey;">\g<2></span>\g<3>'%(string.join(j[0],""))
data = pattern1.sub(replace, data)
re.purge()
# recherche les emplacements sauvegardés des balises
data_color = u''
flag3 = re.compile( u'#([0-9]+?)#', re.I|re.U|re.S )
m = flag3.finditer( data )
k = 0
# remet les balises dans la chaine
for j in m:
data_color += data[k:j.start()] + balises[j.group(1)]
k = j.end()
data_color += data[k:]
return data_color
示例3: get_info
def get_info(Term, Subject):
url = "https://ssbp.mycampus.ca/prod/bwckschd.p_get_crse_unsec?TRM=U&term_in=" + Term + "&sel_subj=dummy&sel_day=dummy&sel_schd=dummy&sel_insm=dummy&sel_camp=dummy&sel_levl=dummy&sel_sess=dummy&sel_instr=dummy&sel_ptrm=dummy&sel_attr=dummy&sel_subj=" + Subject + "&sel_crse=&sel_title=&sel_from_cred=&sel_to_cred=&sel_camp=UON&begin_hh=0&begin_mi=0&begin_ap=a&end_hh=0&end_mi=0&end_ap=a"
htmltext = urllib.urlopen(url).read();
regex = '<TH CLASS="ddheader" scope="col" >(.+?)<BR><BR></TH>'
pattern = re.compile(regex)
courses = re.split(pattern, htmltext)
re.purge()
for course in courses:
regex = '<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) \(<ABBR title= "Primary">P</ABBR>\)</TD>'
regex2 = '<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault"><ABBR title = "To Be Announced">(.+?)</ABBR></TD>'
regex3 = '<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault"><ABBR title = "To Be Announced">(.+?)</ABBR></TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault"><ABBR title = "To Be Announced">(.+?)</ABBR></TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault"><ABBR title = "To Be Announced">(.+?)</ABBR></TD>'
regex4 = '<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) \(<ABBR title= "Primary">P</ABBR>\)(.+)?</TD>'
pattern = re.compile(regex)
pattern2 = re.compile(regex2)
pattern3 = re.compile(regex3)
pattern4 = re.compile(regex4)
entries = re.findall(pattern3, course) #this pattern is for courses that do not have a start time or class assigned
if entries:
print entries
else:
entries = re.findall(pattern2, course) #this pattern is for instructor TBA
if entries:
print entries
else:
entries = re.findall(pattern, course) #this pattern is for default structure of courses
if entries:
print entries
else:
entries = re.findall(pattern4, course) #this pattern returns two values for instructor
print entries
示例4: dash_R_cleanup
def dash_R_cleanup(fs, ps, pic):
import gc, copy_reg
import _strptime, linecache, dircache
import urlparse, urllib, urllib2, mimetypes, doctest
import struct, filecmp
from distutils.dir_util import _path_created
# Restore some original values.
warnings.filters[:] = fs
copy_reg.dispatch_table.clear()
copy_reg.dispatch_table.update(ps)
sys.path_importer_cache.clear()
sys.path_importer_cache.update(pic)
# Clear assorted module caches.
_path_created.clear()
re.purge()
_strptime._regex_cache.clear()
urlparse.clear_cache()
urllib.urlcleanup()
urllib2.install_opener(None)
dircache.reset()
linecache.clearcache()
mimetypes._default_mime_types()
struct._cache.clear()
filecmp._cache.clear()
doctest.master = None
# Collect cyclic trash.
gc.collect()
示例5: main
def main():
times = {}
html = urllib2.urlopen('http://example.webscraping.com/places/default/view/United-Kingdom-239').read()
NUM_ITERATIONS = 1000 # number of times to test each scraper
for name, scraper in ('Regular expressions', regex_scraper), ('Beautiful Soup', beautiful_soup_scraper), ('Lxml', lxml_scraper):
times[name] = []
# record start time of scrape
start = time.time()
for i in range(NUM_ITERATIONS):
if scraper == regex_scraper:
# the regular expression module will cache results
# so need to purge this cache for meaningful timings
re.purge()
result = scraper(html)
# check scraped result is as expected
assert(result['area'] == '244,820 square kilometres')
times[name].append(time.time() - start)
# record end time of scrape and output the total
end = time.time()
print('{}: {:.2f} seconds'.format(name, end - start))
writer = csv.writer(open('times.csv', 'w'))
header = sorted(times.keys())
writer.writerow(header)
for row in zip(*[times[scraper] for scraper in header]):
writer.writerow(row)
示例6: clear_cache
def clear_cache(self):
try:
re.purge()
dircache.reset()
tiedobj.reset()
except Exception, err:
sys.stderr.write('Crond.clear_cache(): %s\n' % err)
示例7: retrieve_devpaths
def retrieve_devpaths():
pipe = Popen('si projectinfo --devpaths --noacl --noattributes --noshowCheckpointDescription --noassociatedIssues --project="%s"' % sys.argv[1], shell=True, bufsize=1024, stdout=PIPE)
devpaths = pipe.stdout.read()
devpaths = devpaths [1:]
devpaths_re = re.compile(' (.+) \(([0-9][\.0-9]+)\)\n')
devpath_col = devpaths_re.findall(devpaths)
re.purge()
devpath_col.sort(key=lambda x: map(int, x[1].split('.'))) #order development paths by version
return devpath_col
示例8: purge
def purge():
"""re.purge: Purge internal regular expressions cache."""
def _cache_empty():
return not getattr(re, '_cache')
re.match('', '')
cache_created = not _cache_empty()
re.purge()
return cache_created and _cache_empty() and "empty cache"
示例9: test_regex_equality_nocache
def test_regex_equality_nocache(self):
pattern = r'^(?:[a-z0-9\.\-]*)://'
left = RegexValidator(pattern)
re.purge()
right = RegexValidator(pattern)
self.assertEqual(
left,
right,
)
示例10: get_skips
def get_skips(self, line):
skip_points = []
for r in self.skip_rules:
pattern = '('+r[0]+')('+r[1]+')'
matchobjs = re.finditer(pattern, line)
for i in matchobjs:
skip_points.append(i.end() )
re.purge()
return skip_points
示例11: get_breaks
def get_breaks(self, line):
break_points = []
for r in self.break_rules:
pattern = '('+r[0]+')('+r[1]+')'
matchobjs = re.finditer(pattern, line)
for i in matchobjs:
break_points.append(i.end() )
re.purge()
return break_points
示例12: check
def check(self, pattern):
self.model.clear()
if not pattern: return False
try:
re.compile(pattern, self.insertFlags())
re.purge()
return True
except re.error as rerr:
self.model.showError(str(rerr))
return False
示例13: getRegexpFeatures
def getRegexpFeatures(dct, number_of_words_per_type, number_of_words, select = None):
it = list()
for (mt, sen) in dct.iteritems():
it.append((len(sen), mt, sen))
it.sort(reverse=False)
itt = list()
for (l, mt, sen) in it:
random.shuffle(sen)
itt.append((l, mt, sen[0:1000]))
regexps = dict()
ret = list()
types = list()
for (_, meme, _sentences) in itt:
types.extend([meme for _ in _sentences])
types = [types]
#glob = regExpChooser()
#glob.add_types(types)
for (_, meme_type, sentences) in it:
if select != None and meme_type != select:
continue
regexps[meme_type] = cluster(sentences, meme_type)
N = len(regexps[meme_type])
n = 0
start = time.time()
loc = regExpChooser()
loc.add_types(replaceNotEqual(types, meme_type))
for regexp in regexps[meme_type]:
re.purge()
n += 1
sys.stdout.write(
"\r[{0}] {1}/{2} RE in {3} s. ({4})".format(
meme_type,
n,
N,
round(time.time() - start),
regexp
))
sys.stdout.flush()
compiled = re.compile(regexp)
search_result = list()
for (_, meme, _sentences) in itt:
for sent in _sentences:
search_result.append(
1 if compiled.search(sent.lower()) != None else 0)
loc.add_regexp(regexp, search_result)
#glob.add_regexp(regexp, search_result)
selection = loc.getBest(number_of_words_per_type)
ret.extend(selection)
print("\r[{0}] Regular expressions selected in {1} seconds. (best: {2})".format(
meme_type,
time.time() - start,
selection[0])
)
#ret.extend(glob.getBest(number_of_words))
return ret
示例14: markdownify_content
def markdownify_content(self):
self.content = re.sub(r'({{% question) "(.*)"(\s*%}})',r'### \2', self.content)
re.purge()
self.content = re.sub(r'{{< relref "(\w*)\.md[#\w\-éèà]*"\s*>}}\s*',r'\1', self.content)
re.purge()
self.content = re.sub(r'{{% (\w*) "(.*)" *%}}([\s\S]*?){{% \/\1 %}}',r'*\2*\3', self.content)
re.purge()
self.content = re.sub(r'\* Exemple : <.*\)',r'', self.content)
re.purge()
self.content = re.sub(r'(#+)\s',r'\1# ', self.content)
re.purge()
开发者ID:numerique-gouv,项目名称:numerique.gouv.fr,代码行数:11,代码来源:import-politique-de-contribution-open-source.py
示例15: remove_links
def remove_links(s, replace_by):
#quita url www.algo.com/djj
re.purge()
temp = re.compile(r"\s*www\.\. \w+\.(com|net|me|org)?(\s|/*[-\w+&@#/%!?=~_:.\[\]()0-9]*)")
s = temp.sub(replace_by, s)
#quita http://
temp = re.compile(r"(((http|ftp|https)://\. |(http|ftp|https)://\.)[-/\w.]*)")
s = temp.sub(replace_by, s)
temp = re.compile(r"\w+/\w")
s = temp.sub(replace_by, s)
return s