當前位置: 首頁>>代碼示例>>Python>>正文


Python string.punctuation方法代碼示例

本文整理匯總了Python中string.punctuation方法的典型用法代碼示例。如果您正苦於以下問題:Python string.punctuation方法的具體用法?Python string.punctuation怎麽用?Python string.punctuation使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在string的用法示例。


在下文中一共展示了string.punctuation方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: clean_captions

# 需要導入模塊: import string [as 別名]
# 或者: from string import punctuation [as 別名]
def clean_captions(captions):
	# Prepare translation table for removing punctuation
	table = str.maketrans('', '', string.punctuation)
	for _, caption_list in captions.items():
		for i in range(len(caption_list)):
			caption = caption_list[i]
			# Tokenize i.e. split on white spaces
			caption = caption.split()
			# Convert to lowercase
			caption = [word.lower() for word in caption]
			# Remove punctuation from each token
			caption = [w.translate(table) for w in caption]
			# Remove hanging 's' and 'a'
			caption = [word for word in caption if len(word)>1]
			# Remove tokens with numbers in them
			caption = [word for word in caption if word.isalpha()]
			# Store as string
			caption_list[i] =  ' '.join(caption) 
開發者ID:dabasajay,項目名稱:Image-Caption-Generator,代碼行數:20,代碼來源:preprocessing.py

示例2: normalize_answer

# 需要導入模塊: import string [as 別名]
# 或者: from string import punctuation [as 別名]
def normalize_answer(s):
    """Lower text and remove extra whitespace."""
    def remove_articles(text):
        return re_art.sub(' ', text)

    def remove_punc(text):
        return re_punc.sub(' ', text)  # convert punctuation to spaces

    def white_space_fix(text):
        return ' '.join(text.split())

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s)))) 
開發者ID:hugochan,項目名稱:BAMnet,代碼行數:17,代碼來源:generic_utils.py

示例3: random_string

# 需要導入模塊: import string [as 別名]
# 或者: from string import punctuation [as 別名]
def random_string(n):
    if n == 0:
        return ""

    x = random.random()
    if x > 0.5:
        pad = " " * n
    elif x > 0.3:
        pad = "".join(random.choices(digits + " \t\n", k=n))
    elif x > 0.2:
        pad = "".join(random.choices(ascii_uppercase + " \t\n", k=n))
    elif x > 0.1:
        pad = "".join(random.choices(ascii_uppercase + digits + " \t\n", k=n))
    else:
        pad = "".join(
            random.choices(ascii_uppercase + digits + punctuation + " \t\n", k=n)
        )

    return pad 
開發者ID:zzzDavid,項目名稱:ICDAR-2019-SROIE,代碼行數:21,代碼來源:my_utils.py

示例4: tokenize_sentence

# 需要導入模塊: import string [as 別名]
# 或者: from string import punctuation [as 別名]
def tokenize_sentence(sentence):
    """
    Splits a sentence into words, strips punctuation and turns it to lowercase.

    :param sentence           : the sentence to tokenize.
    :type sentence            : str

    :return                   : list of words
    """

    # Get rid of non-ascii characters to avoid errors with unrecognised characters
    sentence = "".join([c for c in sentence if 0 < ord(c) < 127])

    sentence = sentence.encode("ascii", errors="ignore").decode()

    # Only works in Python 3
    sentenceNoPunctuation = sentence.translate(str.maketrans("", "", string.punctuation))

    sentenceLower         = sentenceNoPunctuation.lower()
    sentenceWords         = sentenceLower.split()

    return sentenceWords 
開發者ID:Wluper,項目名稱:edm,代碼行數:24,代碼來源:data_structures.py

示例5: __init__

# 需要導入模塊: import string [as 別名]
# 或者: from string import punctuation [as 別名]
def __init__(
        self,
        parser,
        stop_words=spacy.lang.en.stop_words.STOP_WORDS,
        punctuations=string.punctuation,
    ):
        """Initialize the BOWTokenizer object.

        Arguments:
            parser {spacy.lang.en.English - by default} -- Any parser object
                that supports parser(sentence) call on it.

        Keyword Arguments:
            stop_words {iterable over str} -- Set of stop words to be removed.
            (default: {spacy.lang.en.stop_words.STOP_WORDS})
            punctuations {iterable over str} -- Set of punctuations to be
            removed. (default: {string.punctuation})
        """
        self.parser = parser
        # list of stop words and punctuation marks
        self.stop_words = stop_words
        self.punctuations = punctuations 
開發者ID:interpretml,項目名稱:interpret-text,代碼行數:24,代碼來源:utils_classical.py

示例6: gen_salt

# 需要導入模塊: import string [as 別名]
# 或者: from string import punctuation [as 別名]
def gen_salt(self, set_=True):
        """
            Generate a random salt
        """

        min_char = 8
        max_char = 12
        allchar = string.ascii_letters + string.punctuation + string.digits
        salt = "".join(choice(allchar)
                       for x in range(randint(min_char, max_char))).encode()

        # Set the salt in the same instance if required
        if set_:
            self.set_salt(salt)

        return salt 
開發者ID:gabfl,項目名稱:vault,代碼行數:18,代碼來源:Encryption.py

示例7: normalize_answer

# 需要導入模塊: import string [as 別名]
# 或者: from string import punctuation [as 別名]
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s)))) 
開發者ID:HKUST-KnowComp,項目名稱:MnemonicReader,代碼行數:18,代碼來源:evaluate-v1.1.py

示例8: get_character_type

# 需要導入模塊: import string [as 別名]
# 或者: from string import punctuation [as 別名]
def get_character_type(character):
    """
    >>> get_character_type('a')
    'alpha'
    >>> get_character_type('1')
    'numeric'
    >>> get_character_type('.')
    'punctuation'
    >>> get_character_type(' ')
    """
    if character.isalpha():
        return 'alpha'
    elif character.isnumeric():
        return 'numeric'
    elif character in string.punctuation:
        return 'punctuation'
    return None 
開發者ID:alvinwan,項目名稱:timefhuman,代碼行數:19,代碼來源:tokenize.py

示例9: getRandomStr

# 需要導入模塊: import string [as 別名]
# 或者: from string import punctuation [as 別名]
def getRandomStr(types='letter', length=8):
        """ 隨機產生length長度的字符串

        :param types: 隨機字符串的類型
        types in ['letter', 'ascii'] 返回包含字母的字符串
        types in ['digit', 'num']: 返回包含數字的字符串
        其他:返回混合字母和數字的字符串

        :param length: 返回字符串的長度
        :return: 長度為length,類型為types的字符串

        todo string.punctuation

        """
        import random
        import string
        if types in ['letter', 'ascii']:
            return ''.join(random.sample(string.ascii_letters, length))
        if types in ['digit', 'num']:
            return ''.join(random.sample(string.digits, length))
        else:
            return ''.join(random.sample(string.ascii_letters + string.digits, length)) 
開發者ID:pchaos,項目名稱:wanggeService,代碼行數:24,代碼來源:base.py

示例10: normalize_answer

# 需要導入模塊: import string [as 別名]
# 或者: from string import punctuation [as 別名]
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation + zh.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s)))) 
開發者ID:eva-n27,項目名稱:BERT-for-Chinese-Question-Answering,代碼行數:18,代碼來源:eval.py

示例11: _normalize_answer

# 需要導入模塊: import string [as 別名]
# 或者: from string import punctuation [as 別名]
def _normalize_answer(s):
  """Lower text and remove punctuation, articles and extra whitespace.

  Directly copied from official SQuAD eval script, SHOULD NOT BE MODIFIED.

  Args:
    s: Input text.
  Returns:
    Normalized text.
  """

  def remove_articles(text):
    return re.sub(r'\b(a|an|the)\b', ' ', text)

  def white_space_fix(text):
    return ' '.join(text.split())

  def remove_punc(text):
    exclude = set(string.punctuation)
    return ''.join(ch for ch in text if ch not in exclude)

  def lower(text):
    return text.lower()

  return white_space_fix(remove_articles(remove_punc(lower(s)))) 
開發者ID:google,項目名稱:mipsqa,代碼行數:27,代碼來源:squad_data.py

示例12: trivial_tokenize_urdu

# 需要導入模塊: import string [as 別名]
# 或者: from string import punctuation [as 別名]
def trivial_tokenize_urdu(text): 
    """tokenize Urdu string 

    A trivial tokenizer which just tokenizes on the punctuation boundaries. 
    This also includes punctuations for the Urdu script.
    These punctuations characters were identified from the Unicode database 
    for Arabic script by looking for punctuation symbols.

    Args:
        text (str): text to tokenize

    Returns:
        list: list of tokens
    """
    tok_str=triv_tokenizer_urdu_pat.sub(r' \1 ',text.replace('\t',' '))
    return re.sub(r'[ ]+',' ',tok_str).strip(' ').split(' ') 
開發者ID:anoopkunchukuttan,項目名稱:indic_nlp_library,代碼行數:18,代碼來源:indic_tokenize.py

示例13: normalize_answer

# 需要導入模塊: import string [as 別名]
# 或者: from string import punctuation [as 別名]
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""

    def remove_articles(text):
        return re.sub(r"\b(a|an|the)\b", " ", text)

    def white_space_fix(text):
        return " ".join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return "".join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s)))) 
開發者ID:cdqa-suite,項目名稱:cdQA,代碼行數:19,代碼來源:evaluation.py

示例14: get_preceeding_text

# 需要導入模塊: import string [as 別名]
# 或者: from string import punctuation [as 別名]
def get_preceeding_text(self, prefix_arr):
		intermediate = ""
		consumed = 0

		# print("Get preceeding text:", prefix_arr)
		for idx in range(len(prefix_arr)-1, 0-1, -1):
			if isinstance(prefix_arr[idx], TokenBase):
				# print("Get preceeding text returning:", (prefix_arr[:idx+1], None, intermediate))
				return prefix_arr[:idx+1], None, intermediate
			if all([char in string.punctuation+string.whitespace for char in prefix_arr[idx]]):
				intermediate = prefix_arr[idx] + intermediate
				consumed += 1
			else:
				# print("Get preceeding text returning:", (prefix_arr[:idx], prefix_arr[idx], intermediate))
				return prefix_arr[:idx], prefix_arr[idx], intermediate

		# print("get_preceeding_text", ([], None, intermediate))
		return [], None, intermediate 
開發者ID:fake-name,項目名稱:ReadableWebProxy,代碼行數:20,代碼來源:titleParseNew.py

示例15: test_parse_configuration_passes_through_quoted_punctuation

# 需要導入模塊: import string [as 別名]
# 或者: from string import punctuation [as 別名]
def test_parse_configuration_passes_through_quoted_punctuation():
    escaped_punctuation = string.punctuation.replace('\\', r'\\').replace('"', r'\"')

    mock_config_and_schema(
        '''
        location:
            source_directories:
                - /home

            repositories:
                - "{}.borg"
        '''.format(
            escaped_punctuation
        )
    )

    result = module.parse_configuration('config.yaml', 'schema.yaml')

    assert result == {
        'location': {
            'source_directories': ['/home'],
            'repositories': ['{}.borg'.format(string.punctuation)],
        }
    } 
開發者ID:witten,項目名稱:borgmatic,代碼行數:26,代碼來源:test_validate.py


注:本文中的string.punctuation方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。