Python re.finditer函数代码示例

本文整理汇总了Python中re.finditer函数的典型用法代码示例。如果您正苦于以下问题：Python finditer函数的具体用法？Python finditer怎么用？Python finditer使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了finditer函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_pronoun_label_zh

def get_pronoun_label_zh(line):
	f_pronouns = ['我', '我们', '我 的']
	s_pronouns = ['你', '你们', '你 的']
	f_count = 0
	s_count = 0
	f_positions = []
	s_positions = []
	for pro in f_pronouns:
		f_zh = re.findall('^' + pro + ' ', line) + re.findall(' ' + pro + ' ', line) + re.findall(' ' + pro + '$', line)
		f_positions += [m.span()[0] for m in re.finditer('^' + pro + ' ', line)] +  [m.span()[0] for m in re.finditer(' ' + pro + ' ', line)] + [m.span()[0] for m in re.finditer(' ' + pro + '$', line)]
		f_count += len(f_zh)
	for pro in s_pronouns:
		s_zh = re.findall('^' + pro + ' ', line) + re.findall(' ' + pro + ' ', line) + re.findall(' ' + pro + '$', line)
		s_positions += [m.span()[0] for m in re.finditer('^' + pro + ' ', line)] +  [m.span()[0] for m in re.finditer(' ' + pro + ' ', line)] + [m.span()[0] for m in re.finditer(' ' + pro + '$', line)]
		s_count += len(s_zh)

	if f_count == 0 and s_count == 0:
		return ('none', 0, 0, [], [])
	if(f_count == s_count):
		f_min = min(f_positions)
		s_min = min(s_positions)
		starts_with = '1v' if f_min < s_min else '2v'
		return (starts_with, f_count, s_count, f_zh, s_zh)
		#return ('1v', f_count, s_count, f_zh, s_zh)
	elif(f_count > s_count):
		return ('1v', f_count, s_count, f_zh, s_zh)
	else:
		return ('2v', f_count, s_count, f_zh, s_zh)

开发者ID:raosudha89，项目名称:pronoun_restoration，代码行数:28，代码来源:to_vw_input_format.py

示例2: ParseMethodAnnotation

  def ParseMethodAnnotation(self, annotation):
    if annotation.find('reservable = true') >= 0:
      self._is_reservable = True

    delegate_re = re.compile('delegate\s*=\s*'
        '(?P<delegate>(true|false))')
    for match in re.finditer(delegate_re, annotation):
      delegate = match.group('delegate')
      if delegate == 'true':
        self._is_delegate = True
      elif delegate == 'false':
        self._is_delegate = False

    disable_reflect_method_re = re.compile('disableReflectMethod\s*=\s*'
        '(?P<disableReflectMethod>(true|false))')
    for match in re.finditer(disable_reflect_method_re, annotation):
      disable_reflect_method = match.group('disableReflectMethod')
      if disable_reflect_method == 'true':
        self._disable_reflect_method = True
      else:
        self._disable_reflect_method = False

    pre_wrapline_re = re.compile('preWrapperLines\s*=\s*\{\s*('
        '?P<pre_wrapline>(".*")(,\s*".*")*)\s*\}')
    for match in re.finditer(pre_wrapline_re, annotation):
      pre_wrapline = self.FormatWrapperLine(match.group('pre_wrapline'))
      self._method_annotations[self.ANNOTATION_PRE_WRAPLINE] = pre_wrapline

    post_wrapline_re = re.compile('postWrapperLines\s*=\s*\{\s*('
        '?P<post_wrapline>(".*")(,\s*".*")*)\s*\}')
    for match in re.finditer(post_wrapline_re, annotation):
      post_wrapline = self.FormatWrapperLine(match.group('post_wrapline'))
      self._method_annotations[self.ANNOTATION_POST_WRAPLINE] = post_wrapline

开发者ID:Avanznow，项目名称:crosswalk，代码行数:33，代码来源:java_method.py

示例3: scan_page

def scan_page(url, data=None):
    retval, usable = False, False
    url, data = re.sub(r"=(&|\Z)", "=1\g<1>", url) if url else url, re.sub(r"=(&|\Z)", "=1\g<1>", data) if data else data
    try:
        for phase in (GET, POST):
            current = url if phase is GET else (data or "")
            for match in re.finditer(r"((\A|[?&])(?P<parameter>[\w\[\]]+)=)(?P<value>[^&]+)", current):
                found, usable = False, True
                print "* scanning %s parameter '%s'" % (phase, match.group("parameter"))
                prefix, suffix = ("".join(random.sample(string.ascii_lowercase, PREFIX_SUFFIX_LENGTH)) for i in xrange(2))
                for pool in (LARGER_CHAR_POOL, SMALLER_CHAR_POOL):
                    if not found:
                        tampered = current.replace(match.group(0), "%s%s" % (match.group(0), urllib.quote("%s%s%s%s" % ("'" if pool == LARGER_CHAR_POOL else "", prefix, "".join(random.sample(pool, len(pool))), suffix))))
                        content = (_retrieve_content(tampered, data) if phase is GET else _retrieve_content(url, tampered)).replace("%s%s" % ("'" if pool == LARGER_CHAR_POOL else "", prefix), prefix)
                        for sample in re.finditer("%s([^ ]+?)%s" % (prefix, suffix), content, re.I):
                            for regex, condition, info, content_removal_regex in XSS_PATTERNS:
                                context = re.search(regex % {"chars": re.escape(sample.group(0))}, re.sub(content_removal_regex or "", "", content), re.I)
                                if context and not found and sample.group(1).strip():
                                    if _contains(sample.group(1), condition):
                                        print " (i) %s parameter '%s' appears to be XSS vulnerable (%s)" % (phase, match.group("parameter"), info % dict((("filtering", "no" if all(char in sample.group(1) for char in LARGER_CHAR_POOL) else "some"),)))
                                        found = retval = True
                                    break
        if not usable:
            print " (x) no usable GET/POST parameters found"
    except KeyboardInterrupt:
        print "\r (x) Ctrl-C pressed"
    return retval

开发者ID:brock7，项目名称:scripts，代码行数:27，代码来源:dsxs.py

示例4: tableViewInHierarchy

def tableViewInHierarchy():
    viewDescription = fb.evaluateExpressionValue(
        "(id)[(id)[[UIApplication sharedApplication] keyWindow] recursiveDescription]"
    ).GetObjectDescription()

    searchView = None

    # Try to find an instance of
    classPattern = re.compile(r"UITableView: (0x[0-9a-fA-F]+);")
    for match in re.finditer(classPattern, viewDescription):
        searchView = match.group(1)
        break

    # Try to find a direct subclass
    if not searchView:
        subclassPattern = re.compile(r"(0x[0-9a-fA-F]+); baseClass = UITableView;")
        for match in re.finditer(subclassPattern, viewDescription):
            searchView = match.group(1)
            break

    # SLOW: check every pointer in town
    if not searchView:
        pattern = re.compile(r"(0x[0-9a-fA-F]+)[;>]")
        for view in re.findall(pattern, viewDescription):
            if fb.evaluateBooleanExpression("[" + view + " isKindOfClass:(id)[UITableView class]]"):
                searchView = view
                break

    return searchView

开发者ID:kristiandelay，项目名称:chisel，代码行数:29，代码来源:FBPrintCommands.py

示例5: setupTranslations

def setupTranslations(type, locales, projectName, key):
  # Copy locales list, we don't want to change the parameter
  locales = set(locales)

  # Fill up with locales that we don't have but the browser supports
  if type == 'chrome':
    for locale in chromeLocales:
      locales.add(locale)
  else:
    firefoxLocales = urllib2.urlopen('http://www.mozilla.org/en-US/firefox/all.html').read()
    for match in re.finditer(r'&amp;lang=([\w\-]+)"', firefoxLocales):
      locales.add(mapLocale(type, match.group(1)))
    langPacks = urllib2.urlopen('https://addons.mozilla.org/en-US/firefox/language-tools/').read()
    for match in re.finditer(r'<tr>.*?</tr>', langPacks, re.S):
      if match.group(0).find('Install Language Pack') >= 0:
        match2 = re.search(r'lang="([\w\-]+)"', match.group(0))
        if match2:
          locales.add(mapLocale(type, match2.group(1)))

  # Convert locale codes to the ones that Crowdin will understand
  locales = set(map(lambda locale: mapLocale(type, locale), locales))

  allowed = set()
  allowedLocales = urllib2.urlopen('http://crowdin.net/page/language-codes').read()
  for match in re.finditer(r'<tr>\s*<td\b[^<>]*>([\w\-]+)</td>', allowedLocales, re.S):
    allowed.add(match.group(1))
  if not allowed.issuperset(locales):
    print 'Warning, following locales aren\'t allowed by server: ' + ', '.join(locales - allowed)

  locales = list(locales & allowed)
  locales.sort()
  params = urllib.urlencode([('languages[]', locale) for locale in locales])
  result = urllib2.urlopen('http://api.crowdin.net/api/project/%s/edit-project?key=%s' % (projectName, key), params).read()
  if result.find('<success') < 0:
    raise Exception('Server indicated that the operation was not successful\n' + result)

开发者ID:chinnurtb，项目名称:buildtools，代码行数:35，代码来源:localeTools.py

示例6: consistency_check

def consistency_check(text, word_pairs, err, msg, offset=0):
    """Build a consistency checker for the given word_pairs."""
    errors = []

    msg = " ".join(msg.split())

    for w in word_pairs:
        match1 = [m for m in re.finditer(w[0], text)]
        match2 = [m for m in re.finditer(w[1], text)]

        if len(match1) > 0 and len(match2) > 0:

            if len(match1) > len(match2):
                for m in match2:
                    errors.append((
                        m.start() + offset,
                        m.end() + offset,
                        err,
                        msg.format(m.group(0), w[0])))
            else:
                for m in match1:
                    errors.append((
                        m.start() + offset,
                        m.end() + offset,
                        err,
                        msg.format(m.group(0), w[1])))

    return errors

开发者ID:ComSecNinja，项目名称:proselint，代码行数:28，代码来源:tools.py

示例7: get_sources

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)

            match = re.search('This movie is of poor quality', html, re.I)
            if match:
                quality = QUALITIES.LOW
            else:
                quality = QUALITIES.HIGH

            for match in re.finditer('href="([^"]+/embed\d*/[^"]+)', html):
                url = match.group(1)
                embed_html = self._http_get(url, cache_limit=.5)
                r = re.search('{\s*write\("([^"]+)', embed_html)
                if r:
                    plaintext = self._caesar(r.group(1), 13).decode('base-64')
                    if 'http' not in plaintext:
                        plaintext = self._caesar(r.group(1).decode('base-64'), 13).decode('base-64')
                else:
                    plaintext = embed_html
                hosters += self._get_links(plaintext)
            
            pattern = 'href="([^"]+)".*play_video.gif'
            for match in re.finditer(pattern, html, re.I):
                url = match.group(1)
                host = urlparse.urlparse(url).hostname
                hoster = {'multi-part': False, 'url': url, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, quality), 'rating': None, 'views': None, 'direct': False}
                hosters.append(hoster)
        return hosters

开发者ID:assli100，项目名称:kodi-openelec，代码行数:32，代码来源:afdah_scraper.py

示例8: _generate_entry_probe

        def _generate_entry_probe(self):
                # Any $entry(name) expressions result in saving that argument
                # when entering the function.
                self.args_to_probe = set()
                regex = r"\$entry\((\w+)\)"
                for expr in self.exprs:
                        for arg in re.finditer(regex, expr):
                                self.args_to_probe.add(arg.group(1))
                for arg in re.finditer(regex, self.filter):
                        self.args_to_probe.add(arg.group(1))
                if any(map(lambda expr: "$latency" in expr, self.exprs)) or \
                   "$latency" in self.filter:
                        self.args_to_probe.add("__latency")
                        self.param_types["__latency"] = "u64"    # nanoseconds
                for pname in self.args_to_probe:
                        if pname not in self.param_types:
                                raise ValueError("$entry(%s): no such param" \
                                                % arg)

                self.hashname_prefix = "%s_param_" % self.probe_hash_name
                text = ""
                for pname in self.args_to_probe:
                        # Each argument is stored in a separate hash that is
                        # keyed by pid.
                        text += "BPF_HASH(%s, u32, %s);\n" % \
                             (self.hashname_prefix + pname,
                              self.param_types[pname])
                text += self._generate_entry()
                return text

开发者ID:att-innovate，项目名称:bcc，代码行数:29，代码来源:argdist.py

示例9: list_show_page

 def list_show_page(self, url, page, seasons=False, episodes=False):
     result = []
     if "/p/epizody" in url or "/p/epiz%C3%B3dy" in url or "p/archiv" in url:
         if seasons:
             season_data = util.substr(page, SERIES_START2, SERIES_END2)
             for m in re.finditer(SERIES_ITER_RE2, season_data, re.DOTALL | re.IGNORECASE):
                 item = self.dir_item()
                 item['title'] = m.group('title')
                 item['url'] = url + '#post=%s' % (m.group('id'))
                 self._filter(result, item)
         if episodes:
             for m in re.finditer(EPISODE_ITER_RE2, page, re.DOTALL | re.IGNORECASE):
                 item = self.video_item()
                 item['title'] = "%s (%s)" % (m.group('title'), m.group('date'))
                 item['url'] = m.group('url')
                 self._filter(result, item)
     else:
         if seasons:
             season_data = util.substr(page, SERIES_START, SERIES_END)
             for m in re.finditer(SERIES_ITER_RE, season_data, re.DOTALL | re.IGNORECASE):
                 item = self.dir_item()
                 item['title'] = m.group('title')
                 item['url'] = 'http://' + urlparse.urlparse(url).netloc + '/ajax.json?' + m.group('url')
                 self._filter(result, item)
         if episodes:
             episodes_data = util.substr(page, EPISODE_START, EPISODE_END)
             for m in re.finditer(EPISODE_ITER_RE, page, re.DOTALL | re.IGNORECASE):
                 item = self.video_item()
                 item['title'] = "%s. %s (%s)" % (m.group('episode'), m.group('title'), m.group('date'))
                 item['url'] = m.group('url')
                 self._filter(result, item)
     return result

开发者ID:Jakub2013，项目名称:plugin.video.joj.sk，代码行数:32，代码来源:joj.py

示例10: dotransform

def dotransform(request, response):
  
  emailaddr = []
  msgfile = request.value
  lookFor = ['To', 'From']
  tmpfolder = request.fields['sniffMyPackets.outputfld']
  
  with open(msgfile, mode='r') as msgfile:
    reader = msgfile.read()
    reader = str(reader)
    for x in lookFor:
      if x in reader:
        for s in re.finditer('RCPT TO: <([\w.-][email protected][\w.-]+)>', reader):
          to_addr = s.group(1), 'mail_to'
          emailaddr.append(to_addr)
        for t in re.finditer('MAIL FROM: <([\w.-][email protected][\w.-]+)>', reader):
          from_addr = t.group(1), 'mail_from'
          emailaddr.append(from_addr)

  
	
  for addr, addrfield in emailaddr:
    e = EmailAddress(addr)
    e.linklabel = addrfield
    e += Field('filelocation', request.value, displayname='File Location', matchingrule='loose')
    e += Field('emailaddr', addrfield, displayname='Header Info')
    response += e
  return response

开发者ID:FomkaV，项目名称:wifi-arsenal，代码行数:28，代码来源:smtpaddress.py

示例11: list_archive_page

 def list_archive_page(self, show_page, showon=False, showoff=False):
     showonlist = []
     if showon:
         page = util.substr(show_page, VYSIELANE_START, NEVYSIELANE_START)
         for m in re.finditer(VYSIELANE_ITER_RE, page, re.DOTALL | re.IGNORECASE):
             item = self.dir_item()
             item['title'] = m.group('title')
             item['plot'] = m.group('desc')
             item['url'] = m.group('url') + "#season_episode"
             if m.group('itime') is not None:
                 item['type'] = "showon7d"
             else:
                 item['type'] = "showon"
             showonlist.append(item)
     showonlist.sort(key=lambda x: x['title'].lower())
     showofflist = []
     if showoff:
         page = util.substr(show_page, NEVYSIELANE_START, NEVYSIELANE_END)
         for m in re.finditer(NEVYSIELANE_ITER_RE, page, re.DOTALL | re.IGNORECASE):
             item = self.dir_item()
             item['title'] = m.group('title')
             item['url'] = m.group('url') + "#season_episode"
             item['type'] = "showoff"
             showofflist.append(item)
     showofflist.sort(key=lambda x: x['title'].lower())
     result = showonlist + showofflist
     return result

开发者ID:Jakub2013，项目名称:plugin.video.joj.sk，代码行数:27，代码来源:joj.py

示例12: setupTranslations

def setupTranslations(localeConfig, projectName, key):
  # Make a new set from the locales list, mapping to Crowdin friendly format
  locales = {mapLocale(localeConfig['name_format'], locale)
             for locale in localeConfig['locales']}

  # Fill up with locales that we don't have but the browser supports
  if 'chrome' in localeConfig['target_platforms']:
    for locale in chromeLocales:
      locales.add(mapLocale('ISO-15897', locale))

  if 'gecko' in localeConfig['target_platforms']:
    firefoxLocales = urllib2.urlopen('http://www.mozilla.org/en-US/firefox/all.html').read()
    for match in re.finditer(r'&amp;lang=([\w\-]+)"', firefoxLocales):
      locales.add(mapLocale('BCP-47', match.group(1)))
    langPacks = urllib2.urlopen('https://addons.mozilla.org/en-US/firefox/language-tools/').read()
    for match in re.finditer(r'<tr>.*?</tr>', langPacks, re.S):
      if match.group(0).find('Install Language Pack') >= 0:
        match2 = re.search(r'lang="([\w\-]+)"', match.group(0))
        if match2:
          locales.add(mapLocale('BCP-47', match2.group(1)))

  allowed = set()
  allowedLocales = urllib2.urlopen('http://crowdin.net/page/language-codes').read()
  for match in re.finditer(r'<tr>\s*<td\b[^<>]*>([\w\-]+)</td>', allowedLocales, re.S):
    allowed.add(match.group(1))
  if not allowed.issuperset(locales):
    print 'Warning, following locales aren\'t allowed by server: ' + ', '.join(locales - allowed)

  locales = list(locales & allowed)
  locales.sort()
  params = urllib.urlencode([('languages[]', locale) for locale in locales])
  result = urllib2.urlopen('http://api.crowdin.net/api/project/%s/edit-project?key=%s' % (projectName, key), params).read()
  if result.find('<success') < 0:
    raise Exception('Server indicated that the operation was not successful\n' + result)

开发者ID:HoverHell，项目名称:abp_buildtools，代码行数:34，代码来源:localeTools.py

示例13: ExtendCurlys

	def ExtendCurlys(self, list_of_terms, target_body):
		"""
		Run FindWordsInBracketsAndCurlies first.
		Adds brackets to the same words if they have not yet received brackets.
		"""
		self.target_body = ' ' + target_body + ' '

		self.dbrackets = [m.span(0) for m in re.finditer(r"\[([\w \(\)\-,.]+)\]", self.target_body)]
		self.sbrackets = [m.span(0) for m in re.finditer(r"\{([\w \(\)\-,.]+)\}", self.target_body)]
		self.allbrackets = self.dbrackets + self.sbrackets

		def repl(matchobj):
			for span in self.allbrackets:
				if matchobj.start(0) in range(*span):
					return matchobj.group(0)

			self.curly_count += 1
			return (matchobj.group(1) + self.curly_term + matchobj.group(2))


		self.curly_count = 0
		for i, term in enumerate(list_of_terms):
			self.curly_term = '{' + term + '}'

			regex = re.compile(r"([^\{\w])%s([^\}\w])" %term, re.IGNORECASE)
			if i ==0:
				self.ecoutput = re.sub(regex, repl, self.target_body)
			else:
				self.ecoutput = re.sub(regex, repl, self.ecoutput)

		self.ecoutput = self.ecoutput[1:-1]

开发者ID:law826，项目名称:radiology_diagnoser，代码行数:31，代码来源:importnlp.py

示例14: consistency_check

def consistency_check(text, word_pairs, err, msg, offset=0):
    """Build a consistency checker for the given word_pairs."""
    errors = []

    msg = " ".join(msg.split())

    for w in word_pairs:
        matches = [
            [m for m in re.finditer(w[0], text)],
            [m for m in re.finditer(w[1], text)]
        ]

        if len(matches[0]) > 0 and len(matches[1]) > 0:

            idx_minority = len(matches[0]) > len(matches[1])

            for m in matches[idx_minority]:
                errors.append((
                    m.start() + offset,
                    m.end() + offset,
                    err,
                    msg.format(w[~idx_minority], m.group(0)),
                    w[~idx_minority]))

    return errors

开发者ID:CatherineH，项目名称:proselint，代码行数:25，代码来源:tools.py

示例15: stem_helper

def stem_helper(word, rem_umlaut = True):
	"""rem_umlat: Remove umlaut from text"""
	
	#Define R1 and R2 regions
	
	#R1 is defined as the region after the first consonant followed by a vowel
	
	try:
		R1 = list(re.finditer(r"[aëeiouäöüâêîôûæœ][bdghfcjklmnspqrtvwz]",word))[0].start() + 2
	except:
		R1 = len(word)
		
	#R2 is defined as the region within R1 after the first consonant followed by a vowel
	
	try:
		R2 = list(re.finditer(r"[aëeiouäöüâêîôûæœ][bdghfcjklmnspqrtvwz]",word[R1:]))[0].start() + 2 + R1
	except:
		R2 = len(word)
		
	#Make sure the index of R1 is at least 3. 
	
	if R1<3:
		try:
			R1 = list(re.finditer(r"[aëeiouäöüâêîôûæœ][bdghfcjklmnspqrtvwz]",word[1:]))[0].start() + 2
		except:
			R1 = len(word)
	
	if rem_umlaut:
		word = remove_umlaut(word)
	
	word = word[:R1] + re.sub(r'(wes|wen|est|ern|em|en|er|es|eȥ(?=[klmrt])s|(?=[lr])n|e)$',"",word[R1:])
	word = word[:R1] + re.sub(r'(est|er|en|re|in|iu|(?=.{3})st,word[R1:])$',"",word[R1:])
	word = word[:R2] + re.sub(r'(lich?.?.|keit|inc|isch?.?.)$',"",word[R2:])
	
	return word

开发者ID:TylerKirby，项目名称:cltk，代码行数:35，代码来源:stem.py

注：本文中的re.finditer函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。