本文整理汇总了Python中re.finditer函数的典型用法代码示例。如果您正苦于以下问题:Python finditer函数的具体用法?Python finditer怎么用?Python finditer使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了finditer函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_pronoun_label_zh
def get_pronoun_label_zh(line):
f_pronouns = ['我', '我们', '我 的']
s_pronouns = ['你', '你们', '你 的']
f_count = 0
s_count = 0
f_positions = []
s_positions = []
for pro in f_pronouns:
f_zh = re.findall('^' + pro + ' ', line) + re.findall(' ' + pro + ' ', line) + re.findall(' ' + pro + '$', line)
f_positions += [m.span()[0] for m in re.finditer('^' + pro + ' ', line)] + [m.span()[0] for m in re.finditer(' ' + pro + ' ', line)] + [m.span()[0] for m in re.finditer(' ' + pro + '$', line)]
f_count += len(f_zh)
for pro in s_pronouns:
s_zh = re.findall('^' + pro + ' ', line) + re.findall(' ' + pro + ' ', line) + re.findall(' ' + pro + '$', line)
s_positions += [m.span()[0] for m in re.finditer('^' + pro + ' ', line)] + [m.span()[0] for m in re.finditer(' ' + pro + ' ', line)] + [m.span()[0] for m in re.finditer(' ' + pro + '$', line)]
s_count += len(s_zh)
if f_count == 0 and s_count == 0:
return ('none', 0, 0, [], [])
if(f_count == s_count):
f_min = min(f_positions)
s_min = min(s_positions)
starts_with = '1v' if f_min < s_min else '2v'
return (starts_with, f_count, s_count, f_zh, s_zh)
#return ('1v', f_count, s_count, f_zh, s_zh)
elif(f_count > s_count):
return ('1v', f_count, s_count, f_zh, s_zh)
else:
return ('2v', f_count, s_count, f_zh, s_zh)
示例2: ParseMethodAnnotation
def ParseMethodAnnotation(self, annotation):
if annotation.find('reservable = true') >= 0:
self._is_reservable = True
delegate_re = re.compile('delegate\s*=\s*'
'(?P<delegate>(true|false))')
for match in re.finditer(delegate_re, annotation):
delegate = match.group('delegate')
if delegate == 'true':
self._is_delegate = True
elif delegate == 'false':
self._is_delegate = False
disable_reflect_method_re = re.compile('disableReflectMethod\s*=\s*'
'(?P<disableReflectMethod>(true|false))')
for match in re.finditer(disable_reflect_method_re, annotation):
disable_reflect_method = match.group('disableReflectMethod')
if disable_reflect_method == 'true':
self._disable_reflect_method = True
else:
self._disable_reflect_method = False
pre_wrapline_re = re.compile('preWrapperLines\s*=\s*\{\s*('
'?P<pre_wrapline>(".*")(,\s*".*")*)\s*\}')
for match in re.finditer(pre_wrapline_re, annotation):
pre_wrapline = self.FormatWrapperLine(match.group('pre_wrapline'))
self._method_annotations[self.ANNOTATION_PRE_WRAPLINE] = pre_wrapline
post_wrapline_re = re.compile('postWrapperLines\s*=\s*\{\s*('
'?P<post_wrapline>(".*")(,\s*".*")*)\s*\}')
for match in re.finditer(post_wrapline_re, annotation):
post_wrapline = self.FormatWrapperLine(match.group('post_wrapline'))
self._method_annotations[self.ANNOTATION_POST_WRAPLINE] = post_wrapline
示例3: scan_page
def scan_page(url, data=None):
retval, usable = False, False
url, data = re.sub(r"=(&|\Z)", "=1\g<1>", url) if url else url, re.sub(r"=(&|\Z)", "=1\g<1>", data) if data else data
try:
for phase in (GET, POST):
current = url if phase is GET else (data or "")
for match in re.finditer(r"((\A|[?&])(?P<parameter>[\w\[\]]+)=)(?P<value>[^&]+)", current):
found, usable = False, True
print "* scanning %s parameter '%s'" % (phase, match.group("parameter"))
prefix, suffix = ("".join(random.sample(string.ascii_lowercase, PREFIX_SUFFIX_LENGTH)) for i in xrange(2))
for pool in (LARGER_CHAR_POOL, SMALLER_CHAR_POOL):
if not found:
tampered = current.replace(match.group(0), "%s%s" % (match.group(0), urllib.quote("%s%s%s%s" % ("'" if pool == LARGER_CHAR_POOL else "", prefix, "".join(random.sample(pool, len(pool))), suffix))))
content = (_retrieve_content(tampered, data) if phase is GET else _retrieve_content(url, tampered)).replace("%s%s" % ("'" if pool == LARGER_CHAR_POOL else "", prefix), prefix)
for sample in re.finditer("%s([^ ]+?)%s" % (prefix, suffix), content, re.I):
for regex, condition, info, content_removal_regex in XSS_PATTERNS:
context = re.search(regex % {"chars": re.escape(sample.group(0))}, re.sub(content_removal_regex or "", "", content), re.I)
if context and not found and sample.group(1).strip():
if _contains(sample.group(1), condition):
print " (i) %s parameter '%s' appears to be XSS vulnerable (%s)" % (phase, match.group("parameter"), info % dict((("filtering", "no" if all(char in sample.group(1) for char in LARGER_CHAR_POOL) else "some"),)))
found = retval = True
break
if not usable:
print " (x) no usable GET/POST parameters found"
except KeyboardInterrupt:
print "\r (x) Ctrl-C pressed"
return retval
示例4: tableViewInHierarchy
def tableViewInHierarchy():
viewDescription = fb.evaluateExpressionValue(
"(id)[(id)[[UIApplication sharedApplication] keyWindow] recursiveDescription]"
).GetObjectDescription()
searchView = None
# Try to find an instance of
classPattern = re.compile(r"UITableView: (0x[0-9a-fA-F]+);")
for match in re.finditer(classPattern, viewDescription):
searchView = match.group(1)
break
# Try to find a direct subclass
if not searchView:
subclassPattern = re.compile(r"(0x[0-9a-fA-F]+); baseClass = UITableView;")
for match in re.finditer(subclassPattern, viewDescription):
searchView = match.group(1)
break
# SLOW: check every pointer in town
if not searchView:
pattern = re.compile(r"(0x[0-9a-fA-F]+)[;>]")
for view in re.findall(pattern, viewDescription):
if fb.evaluateBooleanExpression("[" + view + " isKindOfClass:(id)[UITableView class]]"):
searchView = view
break
return searchView
示例5: setupTranslations
def setupTranslations(type, locales, projectName, key):
# Copy locales list, we don't want to change the parameter
locales = set(locales)
# Fill up with locales that we don't have but the browser supports
if type == 'chrome':
for locale in chromeLocales:
locales.add(locale)
else:
firefoxLocales = urllib2.urlopen('http://www.mozilla.org/en-US/firefox/all.html').read()
for match in re.finditer(r'&lang=([\w\-]+)"', firefoxLocales):
locales.add(mapLocale(type, match.group(1)))
langPacks = urllib2.urlopen('https://addons.mozilla.org/en-US/firefox/language-tools/').read()
for match in re.finditer(r'<tr>.*?</tr>', langPacks, re.S):
if match.group(0).find('Install Language Pack') >= 0:
match2 = re.search(r'lang="([\w\-]+)"', match.group(0))
if match2:
locales.add(mapLocale(type, match2.group(1)))
# Convert locale codes to the ones that Crowdin will understand
locales = set(map(lambda locale: mapLocale(type, locale), locales))
allowed = set()
allowedLocales = urllib2.urlopen('http://crowdin.net/page/language-codes').read()
for match in re.finditer(r'<tr>\s*<td\b[^<>]*>([\w\-]+)</td>', allowedLocales, re.S):
allowed.add(match.group(1))
if not allowed.issuperset(locales):
print 'Warning, following locales aren\'t allowed by server: ' + ', '.join(locales - allowed)
locales = list(locales & allowed)
locales.sort()
params = urllib.urlencode([('languages[]', locale) for locale in locales])
result = urllib2.urlopen('http://api.crowdin.net/api/project/%s/edit-project?key=%s' % (projectName, key), params).read()
if result.find('<success') < 0:
raise Exception('Server indicated that the operation was not successful\n' + result)
示例6: consistency_check
def consistency_check(text, word_pairs, err, msg, offset=0):
"""Build a consistency checker for the given word_pairs."""
errors = []
msg = " ".join(msg.split())
for w in word_pairs:
match1 = [m for m in re.finditer(w[0], text)]
match2 = [m for m in re.finditer(w[1], text)]
if len(match1) > 0 and len(match2) > 0:
if len(match1) > len(match2):
for m in match2:
errors.append((
m.start() + offset,
m.end() + offset,
err,
msg.format(m.group(0), w[0])))
else:
for m in match1:
errors.append((
m.start() + offset,
m.end() + offset,
err,
msg.format(m.group(0), w[1])))
return errors
示例7: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
if source_url and source_url != FORCE_NO_MATCH:
url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(url, cache_limit=.5)
match = re.search('This movie is of poor quality', html, re.I)
if match:
quality = QUALITIES.LOW
else:
quality = QUALITIES.HIGH
for match in re.finditer('href="([^"]+/embed\d*/[^"]+)', html):
url = match.group(1)
embed_html = self._http_get(url, cache_limit=.5)
r = re.search('{\s*write\("([^"]+)', embed_html)
if r:
plaintext = self._caesar(r.group(1), 13).decode('base-64')
if 'http' not in plaintext:
plaintext = self._caesar(r.group(1).decode('base-64'), 13).decode('base-64')
else:
plaintext = embed_html
hosters += self._get_links(plaintext)
pattern = 'href="([^"]+)".*play_video.gif'
for match in re.finditer(pattern, html, re.I):
url = match.group(1)
host = urlparse.urlparse(url).hostname
hoster = {'multi-part': False, 'url': url, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, quality), 'rating': None, 'views': None, 'direct': False}
hosters.append(hoster)
return hosters
示例8: _generate_entry_probe
def _generate_entry_probe(self):
# Any $entry(name) expressions result in saving that argument
# when entering the function.
self.args_to_probe = set()
regex = r"\$entry\((\w+)\)"
for expr in self.exprs:
for arg in re.finditer(regex, expr):
self.args_to_probe.add(arg.group(1))
for arg in re.finditer(regex, self.filter):
self.args_to_probe.add(arg.group(1))
if any(map(lambda expr: "$latency" in expr, self.exprs)) or \
"$latency" in self.filter:
self.args_to_probe.add("__latency")
self.param_types["__latency"] = "u64" # nanoseconds
for pname in self.args_to_probe:
if pname not in self.param_types:
raise ValueError("$entry(%s): no such param" \
% arg)
self.hashname_prefix = "%s_param_" % self.probe_hash_name
text = ""
for pname in self.args_to_probe:
# Each argument is stored in a separate hash that is
# keyed by pid.
text += "BPF_HASH(%s, u32, %s);\n" % \
(self.hashname_prefix + pname,
self.param_types[pname])
text += self._generate_entry()
return text
示例9: list_show_page
def list_show_page(self, url, page, seasons=False, episodes=False):
result = []
if "/p/epizody" in url or "/p/epiz%C3%B3dy" in url or "p/archiv" in url:
if seasons:
season_data = util.substr(page, SERIES_START2, SERIES_END2)
for m in re.finditer(SERIES_ITER_RE2, season_data, re.DOTALL | re.IGNORECASE):
item = self.dir_item()
item['title'] = m.group('title')
item['url'] = url + '#post=%s' % (m.group('id'))
self._filter(result, item)
if episodes:
for m in re.finditer(EPISODE_ITER_RE2, page, re.DOTALL | re.IGNORECASE):
item = self.video_item()
item['title'] = "%s (%s)" % (m.group('title'), m.group('date'))
item['url'] = m.group('url')
self._filter(result, item)
else:
if seasons:
season_data = util.substr(page, SERIES_START, SERIES_END)
for m in re.finditer(SERIES_ITER_RE, season_data, re.DOTALL | re.IGNORECASE):
item = self.dir_item()
item['title'] = m.group('title')
item['url'] = 'http://' + urlparse.urlparse(url).netloc + '/ajax.json?' + m.group('url')
self._filter(result, item)
if episodes:
episodes_data = util.substr(page, EPISODE_START, EPISODE_END)
for m in re.finditer(EPISODE_ITER_RE, page, re.DOTALL | re.IGNORECASE):
item = self.video_item()
item['title'] = "%s. %s (%s)" % (m.group('episode'), m.group('title'), m.group('date'))
item['url'] = m.group('url')
self._filter(result, item)
return result
示例10: dotransform
def dotransform(request, response):
emailaddr = []
msgfile = request.value
lookFor = ['To', 'From']
tmpfolder = request.fields['sniffMyPackets.outputfld']
with open(msgfile, mode='r') as msgfile:
reader = msgfile.read()
reader = str(reader)
for x in lookFor:
if x in reader:
for s in re.finditer('RCPT TO: <([\w.-][email protected][\w.-]+)>', reader):
to_addr = s.group(1), 'mail_to'
emailaddr.append(to_addr)
for t in re.finditer('MAIL FROM: <([\w.-][email protected][\w.-]+)>', reader):
from_addr = t.group(1), 'mail_from'
emailaddr.append(from_addr)
for addr, addrfield in emailaddr:
e = EmailAddress(addr)
e.linklabel = addrfield
e += Field('filelocation', request.value, displayname='File Location', matchingrule='loose')
e += Field('emailaddr', addrfield, displayname='Header Info')
response += e
return response
示例11: list_archive_page
def list_archive_page(self, show_page, showon=False, showoff=False):
showonlist = []
if showon:
page = util.substr(show_page, VYSIELANE_START, NEVYSIELANE_START)
for m in re.finditer(VYSIELANE_ITER_RE, page, re.DOTALL | re.IGNORECASE):
item = self.dir_item()
item['title'] = m.group('title')
item['plot'] = m.group('desc')
item['url'] = m.group('url') + "#season_episode"
if m.group('itime') is not None:
item['type'] = "showon7d"
else:
item['type'] = "showon"
showonlist.append(item)
showonlist.sort(key=lambda x: x['title'].lower())
showofflist = []
if showoff:
page = util.substr(show_page, NEVYSIELANE_START, NEVYSIELANE_END)
for m in re.finditer(NEVYSIELANE_ITER_RE, page, re.DOTALL | re.IGNORECASE):
item = self.dir_item()
item['title'] = m.group('title')
item['url'] = m.group('url') + "#season_episode"
item['type'] = "showoff"
showofflist.append(item)
showofflist.sort(key=lambda x: x['title'].lower())
result = showonlist + showofflist
return result
示例12: setupTranslations
def setupTranslations(localeConfig, projectName, key):
# Make a new set from the locales list, mapping to Crowdin friendly format
locales = {mapLocale(localeConfig['name_format'], locale)
for locale in localeConfig['locales']}
# Fill up with locales that we don't have but the browser supports
if 'chrome' in localeConfig['target_platforms']:
for locale in chromeLocales:
locales.add(mapLocale('ISO-15897', locale))
if 'gecko' in localeConfig['target_platforms']:
firefoxLocales = urllib2.urlopen('http://www.mozilla.org/en-US/firefox/all.html').read()
for match in re.finditer(r'&lang=([\w\-]+)"', firefoxLocales):
locales.add(mapLocale('BCP-47', match.group(1)))
langPacks = urllib2.urlopen('https://addons.mozilla.org/en-US/firefox/language-tools/').read()
for match in re.finditer(r'<tr>.*?</tr>', langPacks, re.S):
if match.group(0).find('Install Language Pack') >= 0:
match2 = re.search(r'lang="([\w\-]+)"', match.group(0))
if match2:
locales.add(mapLocale('BCP-47', match2.group(1)))
allowed = set()
allowedLocales = urllib2.urlopen('http://crowdin.net/page/language-codes').read()
for match in re.finditer(r'<tr>\s*<td\b[^<>]*>([\w\-]+)</td>', allowedLocales, re.S):
allowed.add(match.group(1))
if not allowed.issuperset(locales):
print 'Warning, following locales aren\'t allowed by server: ' + ', '.join(locales - allowed)
locales = list(locales & allowed)
locales.sort()
params = urllib.urlencode([('languages[]', locale) for locale in locales])
result = urllib2.urlopen('http://api.crowdin.net/api/project/%s/edit-project?key=%s' % (projectName, key), params).read()
if result.find('<success') < 0:
raise Exception('Server indicated that the operation was not successful\n' + result)
示例13: ExtendCurlys
def ExtendCurlys(self, list_of_terms, target_body):
"""
Run FindWordsInBracketsAndCurlies first.
Adds brackets to the same words if they have not yet received brackets.
"""
self.target_body = ' ' + target_body + ' '
self.dbrackets = [m.span(0) for m in re.finditer(r"\[([\w \(\)\-,.]+)\]", self.target_body)]
self.sbrackets = [m.span(0) for m in re.finditer(r"\{([\w \(\)\-,.]+)\}", self.target_body)]
self.allbrackets = self.dbrackets + self.sbrackets
def repl(matchobj):
for span in self.allbrackets:
if matchobj.start(0) in range(*span):
return matchobj.group(0)
self.curly_count += 1
return (matchobj.group(1) + self.curly_term + matchobj.group(2))
self.curly_count = 0
for i, term in enumerate(list_of_terms):
self.curly_term = '{' + term + '}'
regex = re.compile(r"([^\{\w])%s([^\}\w])" %term, re.IGNORECASE)
if i ==0:
self.ecoutput = re.sub(regex, repl, self.target_body)
else:
self.ecoutput = re.sub(regex, repl, self.ecoutput)
self.ecoutput = self.ecoutput[1:-1]
示例14: consistency_check
def consistency_check(text, word_pairs, err, msg, offset=0):
"""Build a consistency checker for the given word_pairs."""
errors = []
msg = " ".join(msg.split())
for w in word_pairs:
matches = [
[m for m in re.finditer(w[0], text)],
[m for m in re.finditer(w[1], text)]
]
if len(matches[0]) > 0 and len(matches[1]) > 0:
idx_minority = len(matches[0]) > len(matches[1])
for m in matches[idx_minority]:
errors.append((
m.start() + offset,
m.end() + offset,
err,
msg.format(w[~idx_minority], m.group(0)),
w[~idx_minority]))
return errors
示例15: stem_helper
def stem_helper(word, rem_umlaut = True):
"""rem_umlat: Remove umlaut from text"""
#Define R1 and R2 regions
#R1 is defined as the region after the first consonant followed by a vowel
try:
R1 = list(re.finditer(r"[aëeiouäöüâêîôûæœ][bdghfcjklmnspqrtvwz]",word))[0].start() + 2
except:
R1 = len(word)
#R2 is defined as the region within R1 after the first consonant followed by a vowel
try:
R2 = list(re.finditer(r"[aëeiouäöüâêîôûæœ][bdghfcjklmnspqrtvwz]",word[R1:]))[0].start() + 2 + R1
except:
R2 = len(word)
#Make sure the index of R1 is at least 3.
if R1<3:
try:
R1 = list(re.finditer(r"[aëeiouäöüâêîôûæœ][bdghfcjklmnspqrtvwz]",word[1:]))[0].start() + 2
except:
R1 = len(word)
if rem_umlaut:
word = remove_umlaut(word)
word = word[:R1] + re.sub(r'(wes|wen|est|ern|em|en|er|es|eȥ(?=[klmrt])s|(?=[lr])n|e)$',"",word[R1:])
word = word[:R1] + re.sub(r'(est|er|en|re|in|iu|(?=.{3})st,word[R1:])$',"",word[R1:])
word = word[:R2] + re.sub(r'(lich?.?.|keit|inc|isch?.?.)$',"",word[R2:])
return word