本文整理汇总了Python中string.punctuation方法的典型用法代码示例。如果您正苦于以下问题:Python string.punctuation方法的具体用法?Python string.punctuation怎么用?Python string.punctuation使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类string
的用法示例。
在下文中一共展示了string.punctuation方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: clean_captions
# 需要导入模块: import string [as 别名]
# 或者: from string import punctuation [as 别名]
def clean_captions(captions):
# Prepare translation table for removing punctuation
table = str.maketrans('', '', string.punctuation)
for _, caption_list in captions.items():
for i in range(len(caption_list)):
caption = caption_list[i]
# Tokenize i.e. split on white spaces
caption = caption.split()
# Convert to lowercase
caption = [word.lower() for word in caption]
# Remove punctuation from each token
caption = [w.translate(table) for w in caption]
# Remove hanging 's' and 'a'
caption = [word for word in caption if len(word)>1]
# Remove tokens with numbers in them
caption = [word for word in caption if word.isalpha()]
# Store as string
caption_list[i] = ' '.join(caption)
示例2: normalize_answer
# 需要导入模块: import string [as 别名]
# 或者: from string import punctuation [as 别名]
def normalize_answer(s):
"""Lower text and remove extra whitespace."""
def remove_articles(text):
return re_art.sub(' ', text)
def remove_punc(text):
return re_punc.sub(' ', text) # convert punctuation to spaces
def white_space_fix(text):
return ' '.join(text.split())
def lower(text):
return text.lower()
return white_space_fix(remove_articles(remove_punc(lower(s))))
示例3: random_string
# 需要导入模块: import string [as 别名]
# 或者: from string import punctuation [as 别名]
def random_string(n):
if n == 0:
return ""
x = random.random()
if x > 0.5:
pad = " " * n
elif x > 0.3:
pad = "".join(random.choices(digits + " \t\n", k=n))
elif x > 0.2:
pad = "".join(random.choices(ascii_uppercase + " \t\n", k=n))
elif x > 0.1:
pad = "".join(random.choices(ascii_uppercase + digits + " \t\n", k=n))
else:
pad = "".join(
random.choices(ascii_uppercase + digits + punctuation + " \t\n", k=n)
)
return pad
示例4: tokenize_sentence
# 需要导入模块: import string [as 别名]
# 或者: from string import punctuation [as 别名]
def tokenize_sentence(sentence):
"""
Splits a sentence into words, strips punctuation and turns it to lowercase.
:param sentence : the sentence to tokenize.
:type sentence : str
:return : list of words
"""
# Get rid of non-ascii characters to avoid errors with unrecognised characters
sentence = "".join([c for c in sentence if 0 < ord(c) < 127])
sentence = sentence.encode("ascii", errors="ignore").decode()
# Only works in Python 3
sentenceNoPunctuation = sentence.translate(str.maketrans("", "", string.punctuation))
sentenceLower = sentenceNoPunctuation.lower()
sentenceWords = sentenceLower.split()
return sentenceWords
示例5: __init__
# 需要导入模块: import string [as 别名]
# 或者: from string import punctuation [as 别名]
def __init__(
self,
parser,
stop_words=spacy.lang.en.stop_words.STOP_WORDS,
punctuations=string.punctuation,
):
"""Initialize the BOWTokenizer object.
Arguments:
parser {spacy.lang.en.English - by default} -- Any parser object
that supports parser(sentence) call on it.
Keyword Arguments:
stop_words {iterable over str} -- Set of stop words to be removed.
(default: {spacy.lang.en.stop_words.STOP_WORDS})
punctuations {iterable over str} -- Set of punctuations to be
removed. (default: {string.punctuation})
"""
self.parser = parser
# list of stop words and punctuation marks
self.stop_words = stop_words
self.punctuations = punctuations
示例6: gen_salt
# 需要导入模块: import string [as 别名]
# 或者: from string import punctuation [as 别名]
def gen_salt(self, set_=True):
"""
Generate a random salt
"""
min_char = 8
max_char = 12
allchar = string.ascii_letters + string.punctuation + string.digits
salt = "".join(choice(allchar)
for x in range(randint(min_char, max_char))).encode()
# Set the salt in the same instance if required
if set_:
self.set_salt(salt)
return salt
示例7: normalize_answer
# 需要导入模块: import string [as 别名]
# 或者: from string import punctuation [as 别名]
def normalize_answer(s):
"""Lower text and remove punctuation, articles and extra whitespace."""
def remove_articles(text):
return re.sub(r'\b(a|an|the)\b', ' ', text)
def white_space_fix(text):
return ' '.join(text.split())
def remove_punc(text):
exclude = set(string.punctuation)
return ''.join(ch for ch in text if ch not in exclude)
def lower(text):
return text.lower()
return white_space_fix(remove_articles(remove_punc(lower(s))))
示例8: get_character_type
# 需要导入模块: import string [as 别名]
# 或者: from string import punctuation [as 别名]
def get_character_type(character):
"""
>>> get_character_type('a')
'alpha'
>>> get_character_type('1')
'numeric'
>>> get_character_type('.')
'punctuation'
>>> get_character_type(' ')
"""
if character.isalpha():
return 'alpha'
elif character.isnumeric():
return 'numeric'
elif character in string.punctuation:
return 'punctuation'
return None
示例9: getRandomStr
# 需要导入模块: import string [as 别名]
# 或者: from string import punctuation [as 别名]
def getRandomStr(types='letter', length=8):
""" 随机产生length长度的字符串
:param types: 随机字符串的类型
types in ['letter', 'ascii'] 返回包含字母的字符串
types in ['digit', 'num']: 返回包含数字的字符串
其他:返回混合字母和数字的字符串
:param length: 返回字符串的长度
:return: 长度为length,类型为types的字符串
todo string.punctuation
"""
import random
import string
if types in ['letter', 'ascii']:
return ''.join(random.sample(string.ascii_letters, length))
if types in ['digit', 'num']:
return ''.join(random.sample(string.digits, length))
else:
return ''.join(random.sample(string.ascii_letters + string.digits, length))
示例10: normalize_answer
# 需要导入模块: import string [as 别名]
# 或者: from string import punctuation [as 别名]
def normalize_answer(s):
"""Lower text and remove punctuation, articles and extra whitespace."""
def remove_articles(text):
return re.sub(r'\b(a|an|the)\b', ' ', text)
def white_space_fix(text):
return ' '.join(text.split())
def remove_punc(text):
exclude = set(string.punctuation + zh.punctuation)
return ''.join(ch for ch in text if ch not in exclude)
def lower(text):
return text.lower()
return white_space_fix(remove_articles(remove_punc(lower(s))))
示例11: _normalize_answer
# 需要导入模块: import string [as 别名]
# 或者: from string import punctuation [as 别名]
def _normalize_answer(s):
"""Lower text and remove punctuation, articles and extra whitespace.
Directly copied from official SQuAD eval script, SHOULD NOT BE MODIFIED.
Args:
s: Input text.
Returns:
Normalized text.
"""
def remove_articles(text):
return re.sub(r'\b(a|an|the)\b', ' ', text)
def white_space_fix(text):
return ' '.join(text.split())
def remove_punc(text):
exclude = set(string.punctuation)
return ''.join(ch for ch in text if ch not in exclude)
def lower(text):
return text.lower()
return white_space_fix(remove_articles(remove_punc(lower(s))))
示例12: trivial_tokenize_urdu
# 需要导入模块: import string [as 别名]
# 或者: from string import punctuation [as 别名]
def trivial_tokenize_urdu(text):
"""tokenize Urdu string
A trivial tokenizer which just tokenizes on the punctuation boundaries.
This also includes punctuations for the Urdu script.
These punctuations characters were identified from the Unicode database
for Arabic script by looking for punctuation symbols.
Args:
text (str): text to tokenize
Returns:
list: list of tokens
"""
tok_str=triv_tokenizer_urdu_pat.sub(r' \1 ',text.replace('\t',' '))
return re.sub(r'[ ]+',' ',tok_str).strip(' ').split(' ')
示例13: normalize_answer
# 需要导入模块: import string [as 别名]
# 或者: from string import punctuation [as 别名]
def normalize_answer(s):
"""Lower text and remove punctuation, articles and extra whitespace."""
def remove_articles(text):
return re.sub(r"\b(a|an|the)\b", " ", text)
def white_space_fix(text):
return " ".join(text.split())
def remove_punc(text):
exclude = set(string.punctuation)
return "".join(ch for ch in text if ch not in exclude)
def lower(text):
return text.lower()
return white_space_fix(remove_articles(remove_punc(lower(s))))
示例14: get_preceeding_text
# 需要导入模块: import string [as 别名]
# 或者: from string import punctuation [as 别名]
def get_preceeding_text(self, prefix_arr):
intermediate = ""
consumed = 0
# print("Get preceeding text:", prefix_arr)
for idx in range(len(prefix_arr)-1, 0-1, -1):
if isinstance(prefix_arr[idx], TokenBase):
# print("Get preceeding text returning:", (prefix_arr[:idx+1], None, intermediate))
return prefix_arr[:idx+1], None, intermediate
if all([char in string.punctuation+string.whitespace for char in prefix_arr[idx]]):
intermediate = prefix_arr[idx] + intermediate
consumed += 1
else:
# print("Get preceeding text returning:", (prefix_arr[:idx], prefix_arr[idx], intermediate))
return prefix_arr[:idx], prefix_arr[idx], intermediate
# print("get_preceeding_text", ([], None, intermediate))
return [], None, intermediate
示例15: test_parse_configuration_passes_through_quoted_punctuation
# 需要导入模块: import string [as 别名]
# 或者: from string import punctuation [as 别名]
def test_parse_configuration_passes_through_quoted_punctuation():
escaped_punctuation = string.punctuation.replace('\\', r'\\').replace('"', r'\"')
mock_config_and_schema(
'''
location:
source_directories:
- /home
repositories:
- "{}.borg"
'''.format(
escaped_punctuation
)
)
result = module.parse_configuration('config.yaml', 'schema.yaml')
assert result == {
'location': {
'source_directories': ['/home'],
'repositories': ['{}.borg'.format(string.punctuation)],
}
}