当前位置: 首页>>代码示例>>Python>>正文


Python regex.match函数代码示例

本文整理汇总了Python中regex.match函数的典型用法代码示例。如果您正苦于以下问题:Python match函数的具体用法?Python match怎么用?Python match使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了match函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

 def __init__(self, text, lv=None, lc=None, vc=None):
     if isinstance(text, Ex):
         self._text = text.text
         self._lc = text.lc
         self._vc = text.vc
     elif not isinstance(text, str):
         raise TypeError("text must be string")
     else:
         self._text = text
         if lv:
             if re.match(r'^[a-z]{3}-[0-9]{3,}$', lv):
                 self._lc = lv[:3]
                 self._vc = int(lv[4:])
             else: raise ValueError("lv must be in the format xxx-000")
         elif lc and (vc != None):
             lc = lc.lower()
             if re.match(r'^[a-z]{3}$', lc):
                 self._lc = lc
             else: raise ValueError("lc must be a 3-letter ISO 639 code")
             try:
                 vc = int(vc)
                 if vc < 0: raise ValueError("vc must be a positive integer")
                 self._vc = vc
             except ValueError: raise ValueError("vc must be a positive integer")
         else:
             raise TypeError("{cls} requires lv".format(cls=self.__class__.__name__))
开发者ID:longnow,项目名称:panlex-tools,代码行数:26,代码来源:panlex.py

示例2: get_next_document

def get_next_document(h):
  while True:
    l = h.readline()
    if not l:
      doc = None
      break
    l = l.decode('utf-8').strip()
    if not l:
      continue
  
    if re.match(u'^<doc ', l, re.UNICODE):

      # Fix _unk_.
      l = re.sub(r'_unk_', 'unknown', l)

      # Forum detection.
      if re.match(RE_FORUM, l, re.UNICODE):
        l = re.sub(u'>$', r' forum="1">', l, re.UNICODE)
      else:
        l = re.sub(u'>$', r' forum="0">', l, re.UNICODE)

      # Host and tld extraction.
      l = re.sub(r'( url="https{0,1}://)([^/]+)\.([a-z]{2,4})(|/|%)([^"]*")', r'\1\2.\3\4\5 urldomain="\2.\3" tld="\3"', l)

      # Fix some known problems in doc attr values.
      l = re.sub(r'=" +"', r'="unknown"', l)          # fix: attr=" "
      l = re.sub(r'="([^"]+)\\" ', r'="\1" ', l)   # fix: attr="val\"

      doc = [l]
    else:
      doc = doc + [l]
      if re.match(u'^</doc>', l, re.UNICODE):
        break
  return doc
开发者ID:rsling,项目名称:cow,代码行数:34,代码来源:cow16-finalize-es.py

示例3: guess_split

def guess_split(majiribun, reading):
    kanjis=[]
    matchreg_greedy=''
    matchreg_nongreedy=''
    for char in majiribun:
        if kanji_re.match(char):
            kanjis.append(char)
            matchreg_greedy += "(\p{Hiragana}+)"
            matchreg_nongreedy += "(\p{Hiragana}+?)"
        else:
            matchreg_greedy += re.escape(char)
            matchreg_nongreedy += re.escape(char)

    m = re.match(matchreg_greedy + '$', reading)
    if m:
        yomis = m.groups()

        yomis_nongreedy = re.match(matchreg_nongreedy + '$', reading).groups()
        if yomis != yomis_nongreedy:
            # Ambiguous!
            return None
        d = {}
        for idx in range(0, len(kanjis)):
            d[kanjis[idx]] = yomis[idx]
        return(d)
开发者ID:leoboiko,项目名称:yomisplit,代码行数:25,代码来源:__init__.py

示例4: process_file

def process_file(file_path, tagger, idf_doc_count, idf_table, threshold, maximum_words):
    """
    Takes the uploaded file, detecs its type (plain text, alto XML, zip)
    and calls a parsing function accordingly. If everything succeeds it
    returns keywords and 200 code, returns an error otherwise.
    """
    file_info = magic.from_file(file_path)
    lines = []
    if re.match("^UTF-8 Unicode (with BOM) text", file_info):
        lines = lines_from_txt_file(file_path, encoding='utf-8-sig')
    elif re.match("^UTF-8 Unicode", file_info):
        lines = lines_from_txt_file(file_path, encoding='utf-8')
    elif re.match("^ASCII text", file_info):
        lines = lines_from_txt_file(file_path, encoding='utf-8')
    elif re.match('^XML 1.0 document', file_info) and \
            (file_path.endswith('.alto') or file_path.endswith('.xml')):
        lines = lines_from_alto_file(file_path)
    elif re.match('^Zip archive data', file_info):
        lines = lines_from_zip_file(file_path)
    else:
        return {"eror": "Unsupported file type: {}".format(file_info)}, 400

    if not lines:
        return {"error": "Empty file"}, 400
    return keywords.get_keywords(lines, tagger, idf_doc_count, idf_table, threshold, maximum_words), 200
开发者ID:ufal,项目名称:ker,代码行数:25,代码来源:server.py

示例5: __init__

 def __init__(self, room, s, negative=True):
  """
  парсит выражения типа '/5m jid [email protected]', 'nick exp regexp', etc.
  короче в стиле глюкса
  """
  self.room = room
  self.negative = negative
  self.end_time, s = fetch_time(s)
  if s.count('||'): s, self.reason = s[:s.find('||')].strip(), s[s.find('||')+2:].strip()
  else: s, self.reason = s.strip(), ''
  if s.lower().startswith('jid '):
   self.by_jid = True
   s = s[4:].lower()
   if not s: raise ValueError
  elif s.lower().startswith('nick '):
   self.by_jid = False
   s = s[5:]
   if not s: raise ValueError
  else:
   self.by_jid = True
   self.regexp = False
   item = room.get(s, None)
   if item:
    if item.jid == item.realjid: raise NoJID(item.jid)
    else: self.value = item.realjid.lower()
   else: raise NickNotFound(s)
   return
  if s.lower().startswith('exp '):
   self.regexp = True
   s = s[4:]
   try: regex.match(s, '[email protected]')
   except: raise MyRegexpError(s)
  else: self.regexp = False
  self.value = s
开发者ID:TLemur,项目名称:freq-bot,代码行数:34,代码来源:alists.py

示例6: testRegex

    def testRegex(self):

        # Basic match, beginning of string
        self.assertEqual(1, match("foo", "foobar"))

        # Basic match, middle of string
        self.assertEqual(1, match("oba", "foobar"))

        # Basic match, no match
        self.assertEqual(0, match("obo", "foobar"))

        # Match with start qualifier
        self.assertEqual(1, match("^fo", "foobar"))

        # Match with start qualifier in body
        self.assertEqual(0, match("^bar", "foobar"))

        # Match with end qualifier
        self.assertEqual(1, match("bar$", "foobar"))

        # Match with end qualifier in body
        self.assertEqual(0, match("foo$", "foobar"))

        # Match with optional qualifier
        self.assertEqual(1, match("fo*b", "foobar"))

        # Match with optional qualifier 2
        self.assertEqual(1, match("fooa*b", "foobar"))

        # Match with optional qualifier 3
        self.assertEqual(1, match("a*foo", "foobar"))
开发者ID:heuristicfencepost,项目名称:beautiful_code,代码行数:31,代码来源:regex_test.py

示例7: parseaddr

def parseaddr(address):
	# This is probably not perfect
	address = string.strip(address)
	# Case 1: part of the address is in <[email protected]> form.
	pos = regex.search('<.*>', address)
	if pos >= 0:
		name = address[:pos]
		address = address[pos:]
		length = regex.match('<.*>', address)
		name = name + address[length:]
		address = address[:length]
	else:
		# Case 2: part of the address is in (comment) form
		pos = regex.search('(.*)', address)
		if pos >= 0:
			name = address[pos:]
			address = address[:pos]
			length = regex.match('(.*)', name)
			address = address + name[length:]
			name = name[:length]
		else:
			# Case 3: neither. Only an address
			name = ''
	name = string.strip(name)
	address = string.strip(address)
	if address and address[0] == '<' and address[-1] == '>':
		address = address[1:-1]
	if name and name[0] == '(' and name[-1] == ')':
		name = name[1:-1]
	return name, address
开发者ID:asottile,项目名称:ancient-pythons,代码行数:30,代码来源:rfc822.py

示例8: test_yaml

def test_yaml(md_filepath):
    filestring = md_filepath.read()
    reg = regex.compile(r'^---(.*?)---',flags=regex.DOTALL)
    match = regex.search(reg, filestring)

    if not match: pytest.skip('No YAML header')

    yaml_text = match.group(1)
    parsed_yaml = yaml.load(yaml_text)
    for requirement in requirements:
        req = requirements[requirement]
        if req['required']:
            assert requirement in parsed_yaml, 'YAML metadata missing required element: ' + requirement
        if req['type'] == 'link':
            # Check external links have balanced brackets
            regexp = regex.compile(r'\[(.*)\]\((.*)\)')
            assert regex.match(regexp,parsed_yaml[requirement]), 'YAML metadata formatting error: ' + requirement
        if req['type'] == 'date' and requirement in parsed_yaml:
            try:
                d = parse(str(parsed_yaml[requirement]))
            except ValueError:
                assert False, 'YAML metadata formatting error: ' + requirement + ' date parse failed.'
            regexp = regex.compile(r'20[0-9]{2}-[0-9]{2}-[0-9]{2}')
            assert regex.match(regexp,str(parsed_yaml[requirement])), 'YAML metadata formatting error: ' + requirement + ' should use the format YYYY-MM-DD.'

    for header in parsed_yaml:
        assert header in requirements, 'YAML metadata header ' + header + ' is not a valid metadata type.' 
开发者ID:Jonchun,项目名称:docs,代码行数:27,代码来源:test_yaml.py

示例9: faiordict2contigorder

def faiordict2contigorder(file_name, file_format):
    '''Takes either a .fai or .dict file, and return a contig order dictionary, i.e., chrom_seq['chr1'] == 0'''

    assert file_format in ('fai', 'dict')

    contig_sequence = []
    with open(file_name) as gfile:
        line_i = gfile.readline().rstrip('\n')

        while line_i:

            if file_format == 'fai':
                contig_match = re.match(r'([^\t]+)\t', line_i)

            elif file_format == 'dict':
                if line_i.startswith('@SQ'):
                    contig_match = re.match(r'@SQ\tSN:([^\t]+)\tLN:', line_i)

            if contig_match:
                contig_i = contig_match.groups()[0].split(' ')[0]
                # some .fai files have space after the contig for descriptions.
                contig_sequence.append( contig_i )

            line_i = gfile.readline().rstrip('\n')

    chrom_seq = {}
    for n,contig_i in enumerate(contig_sequence):
        chrom_seq[contig_i] = n

    return chrom_seq
开发者ID:razZ0r,项目名称:somaticseq,代码行数:30,代码来源:vcf.py

示例10: acroize_heading

        def acroize_heading(m):
            acro = text.get('acronym')
            if not acro:
                return m[0]
            heading = m[2]
            if not heading:
                return acro
            m2 = regex.match(r'(\d+(?:–(\d+))?)(?:\.)?\s*(.*)$', heading)
            if not m2:
                h_text = heading
            else:
                h_num = m2[1]
                h_text = m2[3]

                m3 = regex.match(r'(.*?)(\d+(?:–(\d+))?)$', text['acronym'])
                acro_prefix = m3[1]
                acro_num = m3[2]

                if acro_num == h_num:
                    heading = h_text
                elif '–' in acro_num and h_num:
                    acro = acro_prefix + h_num
                    heading = h_text
                
                

            new_heading = f'<span class="acro">{acro}</span>{": " if h_text else ""}{h_text}'
            return f'{m[1]}{new_heading}'
开发者ID:suttacentral,项目名称:suttacentral,代码行数:28,代码来源:make_html.py

示例11: process_lines

def process_lines(lines, NONBREAKING_PREFIX):
    # loop text, add lines together until we get a blank line or a <p>
    out_text = ''

    text = ""
    for line in lines:
        line = line.strip()
        m = re_tag.match(line)
        if m is None:
            m = regex.match('^\s*$', line)

        if m is not None:
            # time to process this block, we've hit a blank or <p>
            out_text += do_it_for(text, line, NONBREAKING_PREFIX)
            if regex.match('^\s*$', line) and len(text): ##if we have text followed by <P>
                out_text += "<P>\n"
                text = ""
        else:
            # append the text, with a space
            text += line + " "

    # do the leftover text
    if len(text):
        out_text += do_it_for(text, "", NONBREAKING_PREFIX)
    return out_text
开发者ID:xiaolanchong,项目名称:sentence_splitter,代码行数:25,代码来源:splitter.py

示例12: create_activation

def create_activation(data, labels, standard_cols, group_labels=[]):

    activation = database.Activation()

    for i, col in enumerate(data):

        # Cast to integer or float if appropriate
        # if regex.match('[-\d]+$', col):
        #     col = int(col)
        # elif regex.match('[-\d\.]+$', col):
        #     col = float(col)

        # Set standard attributes if applicable and do validation where appropriate.
        # Generally, validation will not prevent a bad value from making it into the
        # activation object, but it will flag any potential issues using the "problem" column.
        if standard_cols[i] is not None:

            sc = standard_cols[i]

            # Validate XYZ columns: Should only be integers (and possible trailing decimals).
            # If they're not, keep only leading numbers. The exception is that ScienceDirect 
            # journals often follow the minus sign with a space (e.g., - 35), which we strip.
            if regex.match('[xyz]$', sc):
                m = regex.match('(-)\s+(\d+\.*\d*)$', col)
                if m:
                    col = "%s%s" % (m.group(1), m.group(2))
                if not regex.match('(-*\d+)\.*\d*$', col):
                    logging.debug("Value %s in %s column is not valid" % (col, sc))
                    activation.problems.append("Value in %s column is not valid" % sc)
                    # col = regex.search('(-*\d+)', col).group(1)
                    return activation
                col = (float(col))

            elif sc == 'region':
                if not regex.search('[a-zA-Z]', col):
                    logging.debug("Value in region column is not a string")
                    activation.problems.append("Value in region column is not a string")

            setattr(activation, sc, col)


        # Always include all columns in record
        activation.add_col(labels[i], col)
      
        # Handle columns with multiple coordinates (e.g., 45;12;-12).
        # Assume that any series of 3 numbers in a non-standard column
        # reflects coordinates. Will fail if there are leading numbers!!!
        # Also need to remove space between minus sign and numbers; some ScienceDirect
        # journals leave a gap.
        if not i in standard_cols:
            cs = '([\-\.\s]*\d{1,3})'
            m = regex.search('%s[,;\s]+%s[,;\s]+%s' % (cs, cs, cs), unicode(col).strip())
            if m:
                x, y, z = [regex.sub('-\s+', '-', c) for c in [m.group(1), m.group(2), m.group(3)]]
                logger.info("Found multi-coordinate column: %s\n...and extracted: %s, %s, %s" % (col, x, y, z))
                activation.set_coords(x, y, z)

    activation.groups = group_labels
    return activation
开发者ID:iceberg273,项目名称:ACE,代码行数:59,代码来源:tableparser.py

示例13: parse_line

def parse_line(line, perv_url):
    if not line or len(line.strip()) == 0:
        raise ValueError("STR_EMPTY")
    line = line.strip()
    spt = line.split('-')
    if len(spt) == 3:
        name_1 = spt[0]
        name_2 = spt[1]
        attrs = spt[2]
        attrs_spt = attrs.split(',')

        if not (len(attrs_spt) == 2 or (len(attrs_spt) == 1 and perv_url)):
            raise ValueError("STR_ENTRY_EMPTY")

        if not name_1 \
            or not name_2 \
            or not regex.match("^["+_cryllic+"\s]+$", name_1)\
            or not regex.match("^["+_cryllic+"\s]+$", name_2)\
            or len(name_1.split(' ')) != 2\
            or len(name_2.split(' ')) != 2:
                raise ValueError("STR_NAME_FORMAT")

        if name_1 == name_2:
            raise ValueError("STR_SAME_NAMES")

        if len(attrs_spt) == 2 and perv_url:
            raise ValueError("STR_TAG_FORMAT")
        if not regex.match("^(?!\.)["+_cryllic+"\.]+(?<!\.)$", attrs_spt[0]):
            raise ValueError("STR_TAG_FORMAT")

        link_types = attrs_spt[0].split('.')

        if filter(lambda x: not x, link_types):
            raise ValueError("STR_TAG_FORMAT")

        arr = collections.Counter(link_types)
        doubled_tags = set(i for i in arr if arr[i]>1)
        if len(doubled_tags) != 0:
            raise ValueError("STR_TAG_DOUBLED:" + ",".join(doubled_tags))

        url = attrs_spt[1] if len(attrs_spt) == 2 else perv_url

        if not regex.match("http://[\w\.]+/[\w]+$", url):
            raise ValueError("STR_LINK_FORMAT")
        """
        sim_names = list(es.get_similar_names([name_1, name_2]))
        if isinstance(sim_names[0], basestring):
            raise ValueError(u"STR_SIMILAR_NAME:{},{}".format(name_1,sim_names[0]))
        if isinstance(sim_names[1], basestring):
            raise ValueError(u"STR_SIMILAR_NAME:{},{}".format(name_2,sim_names[1]))

        tags = filter(lambda x: not x[1], zip(link_types, es.check_tags(link_types)))
        if len(tags) != 0:
            raise ValueError(u"STR_TAG_NOT_FOUND:{}".format(",".join(map(lambda x: x[0], tags))))
        """

        return (name_1, name_2, link_types, url)
    else:
        raise ValueError("STR_FORMAT")
开发者ID:baio,项目名称:knit-service,代码行数:59,代码来源:line2bucket.py

示例14: test_zero_or_one

 def test_zero_or_one(self):
     p = regex.build_regex("ba?")
     result = regex.match(p, "ba")
     self.assertTrue(result)
     result = regex.match(p, "b")
     self.assertTrue(result)
     result = regex.match(p, "aa")
     self.assertFalse(result)
开发者ID:Felttrip,项目名称:PythonRegex,代码行数:8,代码来源:test_regex.py

示例15: process_marked_lines

def process_marked_lines(lines, markers, return_flags=[False, -1, -1]):
    """Run regexes against message's marked lines to strip quotations.

    Return all but the last quoted segment if it exists.
    >>> mark_message_lines(['Hello', 'From: [email protected]', '', '> Hi', 'tsem'])
    ['Hello']

    Also returns return_flags.
    return_flags = [were_lines_deleted, first_deleted_line,
                    last_deleted_line]
    """
    # Pre-process marker sequence

    # if there are no splitter there should be no markers. However, allow markers if more than 3!
    if 's' not in markers and not re.search('(me*){3}', markers):
        markers = markers.replace('m', 't')

    # Look for forwards (don't remove anything on a forward)

    # if there is an f before the first split, then it's a forward.
    if re.match('[te]*f', markers):
        return_flags[:] = [False, -1, -1]
        return lines

    # Remove last quoted segment

    # match from the end of the markers list
    markers.reverse()

    # match for unmarked quote following split
    quotation = re.match(r'e*(te*)+(se*)+', markers)
    if not quotation:

        # match for inline replies
        if re_orig.match(r'e*[mfts]*((te*)+(me*)+)+[mfts]*((se*)+|(me*){2,})', markers):
            return_flags[:] = [False, -1, -1]
            return lines 

        # match for normal reply with quote
        quotation = re_orig.match(r'e*(me*)+[mefts]*((se*)+|(me*){2,})', markers)

    if not quotation:
        # match for normal reply with quote and signature below quote
        if re.match(r'e*(te*)+(me*)+.*(s)+e*(te*)+', markers):
            quotation = re.match(r'e*(te*)+(me*)+.*(s)+', markers)

    markers.reverse()

    # If quotation, return it
    if quotation:
        start = len(markers) - quotation.end() + 1
        end = len(markers) - quotation.start() - 1
        return_flags[:] = True, start, end
        return lines[:start] + lines[end:]


    return_flags[:] = [False, -1, -1]
    return lines
开发者ID:mgontav,项目名称:talon,代码行数:58,代码来源:quotations.py


注:本文中的regex.match函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。