本文整理匯總了Python中bs4.UnicodeDammit.startswith方法的典型用法代碼示例。如果您正苦於以下問題:Python UnicodeDammit.startswith方法的具體用法?Python UnicodeDammit.startswith怎麽用?Python UnicodeDammit.startswith使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類bs4.UnicodeDammit
的用法示例。
在下文中一共展示了UnicodeDammit.startswith方法的3個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: _sub_read
# 需要導入模塊: from bs4 import UnicodeDammit [as 別名]
# 或者: from bs4.UnicodeDammit import startswith [as 別名]
def _sub_read(self, f):
example_num = 0
curr_id = 'EXAMPLE_0'
for line in f:
# Process encoding
if not isinstance(line, text_type):
line = UnicodeDammit(line, ['utf-8',
'windows-1252']).unicode_markup
line = line.strip()
# Handle instance lines
if line.startswith('#'):
curr_id = line[1:].strip()
elif line and line not in ['TRAIN', 'TEST', 'DEV']:
split_line = line.split()
num_cols = len(split_line)
del line
# Line is just a class label
if num_cols == 1:
class_name = safe_float(split_line[0],
replace_dict=self.class_map)
field_pairs = []
# Line has a class label and feature-value pairs
elif num_cols % 2 == 1:
class_name = safe_float(split_line[0],
replace_dict=self.class_map)
field_pairs = split_line[1:]
# Line just has feature-value pairs
elif num_cols % 2 == 0:
class_name = None
field_pairs = split_line
curr_info_dict = {}
if len(field_pairs) > 0:
# Get current instances feature-value pairs
field_names = islice(field_pairs, 0, None, 2)
# Convert values to floats, because otherwise
# features'll be categorical
field_values = (safe_float(val) for val in
islice(field_pairs, 1, None, 2))
# Add the feature-value pairs to dictionary
curr_info_dict.update(zip(field_names, field_values))
if len(curr_info_dict) != len(field_pairs) / 2:
raise ValueError(('There are duplicate feature ' +
'names in {} for example ' +
'{}.').format(self.path_or_list,
curr_id))
yield curr_id, class_name, curr_info_dict
# Set default example ID for next instance, in case we see a
# line without an ID.
example_num += 1
curr_id = 'EXAMPLE_{}'.format(example_num)
示例2: convert_to_libsvm
# 需要導入模塊: from bs4 import UnicodeDammit [as 別名]
# 或者: from bs4.UnicodeDammit import startswith [as 別名]
def convert_to_libsvm(lines):
'''
Converts a sequence of lines (e.g., a file or list of strings) in MegaM
format to LibSVM format.
:param lines: The sequence of lines to convert.
:type lines: L{file} or L{list} of L{str}
:return: A tuple of the newly formatted data, the mappings from class names
to numbers, and the mappings from feature names to numbers.
:rtype: 3-L{tuple} of (L{list} of L{unicode}, L{dict}, and L{dict})
'''
# Initialize variables
field_num_dict = UniqueNumberDict()
class_num_dict = UniqueNumberDict()
result_list = []
# Iterate through MegaM file
for line in lines:
line_fields = set()
# Process encoding
line = UnicodeDammit(line, ['utf-8', 'windows-1252']).unicode_markup.strip()
# Ignore comments (and TEST/DEV lines)
if not line.startswith('#') and not line == 'TEST' and not line == 'DEV':
result_string = ''
split_line = line.split()
result_string += '{0}'.format(class_num_dict[split_line[0]])
# Handle features if there are any
if len(split_line) > 1:
del split_line[0]
# Loop through all feature-value pairs printing out pairs
# separated by commas (and with feature names replaced with
# numbers)
for field_num, value in sorted(zip((field_num_dict[field_name] for field_name in islice(split_line, 0, None, 2)),
(float(value) if value != 'N/A' else 0.0 for value in islice(split_line, 1, None, 2)))):
# Check for duplicates
if field_num in line_fields:
field_name = (field_name for field_name, f_num in field_num_dict.items() if f_num == field_num).next()
raise AssertionError("Field {} occurs on same line twice.".format(field_name))
# Otherwise output non-empty features
elif value != 'N/A' and float(value):
result_string += ' {}:{}'.format(field_num, value)
line_fields.add(field_num)
result_list.append(result_string)
return result_list, class_num_dict, field_num_dict
示例3: on_pubmsg
# 需要導入模塊: from bs4 import UnicodeDammit [as 別名]
# 或者: from bs4.UnicodeDammit import startswith [as 別名]
def on_pubmsg(self, c, e):
nick = e.source.nick
target = e.target if is_channel(e.target) else nick
def reply(msg):
self.send(target, msg)
def dm(msg):
self.send(nick, msg)
line = UnicodeDammit(e.arguments[0]).unicode_markup
log(' \033[37m{}→{}\033[0m'.format(nick, line))
a = line.split(":", 1)
if len(a) > 1 and a[0].lower() == self.nick:
self.do_command(e, a[1].strip().lower(), nick, target, reply, dm)
return
# zeltofilter
if 'zeltoph' in nick:
return
foo = settings.VIPS.get(nick, 0)
if random() < foo:
self.kick(nick)
match = re.match('.*┻━┻.*', line)
if match:
reply('┬─┬ノ(ಠ_ಠノ)')
return
match = re.match('^({} *:)? *chaos-?([☆★☼☀*]|sternchen) *: ?(.*)$'.format(self.nick), line)
if match:
newcs = match.group(3)
self.chaossternchen.append(newcs)
self.sendchan('Chaos-☆ Nr. {} notiert: {}'.format(len(self.chaossternchen), newcs))
return
if line.startswith('.wiki '):
wikipage = line[len('.wiki '):].strip()
if re.match('^[-_+\w]+$', wikipage):
wikiurl = 'http://afra-berlin.de/dokuwiki/doku.php?id={}'.format(wikipage)
if 'Dieses Thema existiert noch nicht' in requests.get(wikiurl).text:
reply("I'm sorry, I can't find a wiki page with that name.")
else:
reply(wikiurl)
else:
reply('Try to troll somebot else.')
return
if line == 'wat?':
reply("I don't have a clue.")
return
if re.match('^hail eris[.!]* ', line.lower()):
reply("All Hail Discordia!")
return
m = re.findall('(^|\s)?(gh?ah?nh?dh?ih?)(\s|$)?', line, re.IGNORECASE)
for _1,match,_2 in m:
if not re.match('(^|\s)?gandhi(\s|$)?', match, re.IGNORECASE):
self.kick(nick, "It's spelled Gandhi")
return
if re.search('https?://[-a-z0-9.]*facebook.com', line.lower()):
reply('A facebook link? srsly? Get some self-respect!')
return
match = re.search('https?://pr0gramm.com/#(newest/\*/[0-9/]*)', line.lower())
if match:
reply('Fixed that pr0gramm link for you: http://pr0gramm.com/static/'+match.group(1))
return
if line == 'moin':
self.moincount += 1
if self.moincount == 5:
reply('moin')
return
else:
self.moincount = 0
if line.lstrip('.!#').startswith('eta '):
eta = line[4:].strip()
with self.db as db:
db.execute("DELETE FROM etas WHERE nick=?", (nick,))
if eta:
db.execute("INSERT INTO etas VALUES (DATETIME('now'), ?, ?)", (nick, eta))
dm('ETA registered. Thanks!')
return
m = re.findall(URL_REGEX, line.lower())
for url,*_ in m:
res = requests.get(url)
if res.status_code == requests.codes.ok:
soup = BeautifulSoup(res.text)
reply(soup.title.string)
m = re.findall('(^|\s)(afra)(\s|$)', line, re.IGNORECASE)
for _1,match,_2 in m:
if match != 'AfRA' and match != 'afra' and random() < 0.1:
reply("I'm sure you meant AfRA, not "+match)
return