本文整理汇总了Python中codecs.BOM_UTF8.decode方法的典型用法代码示例。如果您正苦于以下问题:Python BOM_UTF8.decode方法的具体用法?Python BOM_UTF8.decode怎么用?Python BOM_UTF8.decode使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类codecs.BOM_UTF8
的用法示例。
在下文中一共展示了BOM_UTF8.decode方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_import_stops_txt_bom
# 需要导入模块: from codecs import BOM_UTF8 [as 别名]
# 或者: from codecs.BOM_UTF8 import decode [as 别名]
def test_import_stops_txt_bom(self):
if PY3: # pragma: no cover
text = (BOM_UTF8.decode('utf-8') + """\
stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,\
location_type,parent_station,stop_timezone
FUR_CREEK_RES,FC,Furnace Creek Resort,,36.425288,-117.133162,A,\
http://example.com/fcr,0,FUR_CREEK_STA,
FUR_CREEK_STA,,Furnace Creek Station,"Our Station",36.425288,-117.133162,A,\
http://example.com,1,,America/Los_Angeles
""")
else:
text = (BOM_UTF8 + b"""\
stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,\
location_type,parent_station,stop_timezone
FUR_CREEK_RES,FC,Furnace Creek Resort,,36.425288,-117.133162,A,\
http://example.com/fcr,0,FUR_CREEK_STA,
FUR_CREEK_STA,,Furnace Creek Station,"Our Station",36.425288,-117.133162,A,\
http://example.com,1,,America/Los_Angeles
""")
stops_txt = StringIO(text)
Stop.import_txt(stops_txt, self.feed)
self.assertEqual(Stop.objects.count(), 2)
station = Stop.objects.get(stop_id='FUR_CREEK_STA')
stop = Stop.objects.get(stop_id='FUR_CREEK_RES')
self.assertEqual(stop.parent_station, station)
示例2: test_utf8_bom
# 需要导入模块: from codecs import BOM_UTF8 [as 别名]
# 或者: from codecs.BOM_UTF8 import decode [as 别名]
def test_utf8_bom():
unicode_bom = BOM_UTF8.decode('utf-8')
module = parso.parse(unicode_bom)
endmarker = module.children[0]
assert endmarker.type == 'endmarker'
assert unicode_bom == endmarker.prefix
module = parso.parse(unicode_bom + 'foo = 1')
expr_stmt = module.children[0]
assert expr_stmt.type == 'expr_stmt'
assert unicode_bom == expr_stmt.get_first_leaf().prefix
示例3: bom_prefix_csv
# 需要导入模块: from codecs import BOM_UTF8 [as 别名]
# 或者: from codecs.BOM_UTF8 import decode [as 别名]
def bom_prefix_csv(text):
"""
Prefix CSV text with a Byte-order Marker (BOM).
The return value needs to be encoded differently so the CSV reader will
handle the BOM correctly:
- Python 2 returns a UTF-8 encoded bytestring
- Python 3 returns unicode text
"""
if PY3:
return BOM_UTF8.decode('utf-8') + text
else:
return BOM_UTF8 + text.encode('utf-8')
示例4: read_unicode
# 需要导入模块: from codecs import BOM_UTF8 [as 别名]
# 或者: from codecs.BOM_UTF8 import decode [as 别名]
def read_unicode(fn):
"""Read an Unicode file that may encode with utf_16_le, utf_16_be, or utf_8.
"""
from codecs import BOM_UTF16_LE, BOM_UTF16_BE, BOM_UTF8
with open(fn, "rb") as in_file:
bs = in_file.read()
if bs.startswith(BOM_UTF16_LE):
us = bs.decode("utf_16_le").lstrip(BOM_UTF16_LE.decode("utf_16_le"))
elif bs.startswith(BOM_UTF16_BE):
us = bs.decode("utf_16_be").lstrip(BOM_UTF16_BE.decode("utf_16_be"))
else:
us = bs.decode("utf_8").lstrip(BOM_UTF8.decode("utf_8"))
return us
示例5: test_import_bom
# 需要导入模块: from codecs import BOM_UTF8 [as 别名]
# 或者: from codecs.BOM_UTF8 import decode [as 别名]
def test_import_bom(self):
if PY3: # pragma: no cover
text = (BOM_UTF8.decode('utf-8') + """\
agency_name,agency_url,agency_timezone
Demo Transit Authority,http://google.com,America/Los_Angeles
""")
else:
text = (BOM_UTF8 + b"""\
agency_name,agency_url,agency_timezone
Demo Transit Authority,http://google.com,America/Los_Angeles
""")
agency_txt = StringIO(text)
Agency.import_txt(agency_txt, self.feed)
agency = Agency.objects.get()
self.assertEqual(agency.agency_id, '')
self.assertEqual(agency.name, 'Demo Transit Authority')
self.assertEqual(agency.url, 'http://google.com')
self.assertEqual(agency.timezone, 'America/Los_Angeles')
self.assertEqual(agency.lang, '')
self.assertEqual(agency.phone, '')
self.assertEqual(agency.fare_url, '')
示例6: str
# 需要导入模块: from codecs import BOM_UTF8 [as 别名]
# 或者: from codecs.BOM_UTF8 import decode [as 别名]
# Python 3
from urllib.request import urlopen
from .__init__ import Graph, Node, Edge, bfs
from .__init__ import WEIGHT, CENTRALITY, EIGENVECTOR, BETWEENNESS
import os
import sys
try:
MODULE = os.path.dirname(os.path.realpath(__file__))
except:
MODULE = ""
if sys.version > "3":
BOM_UTF8 = str(BOM_UTF8.decode("utf-8"))
else:
BOM_UTF8 = BOM_UTF8.decode("utf-8")
#### COMMONSENSE SEMANTIC NETWORK ##################################################################
#--- CONCEPT ---------------------------------------------------------------------------------------
class Concept(Node):
def __init__(self, *args, **kwargs):
""" A concept in the sematic network.
"""
Node.__init__(self, *args, **kwargs)
self._properties = None
示例7: import_txt
# 需要导入模块: from codecs import BOM_UTF8 [as 别名]
# 或者: from codecs.BOM_UTF8 import decode [as 别名]
#.........这里部分代码省略.........
# Pick a conversion function for the field
if point_match:
converter = point_convert
elif isinstance(field, models.DateField):
converter = date_convert
elif isinstance(field, models.BooleanField):
converter = bool_convert
elif isinstance(field, models.CharField):
converter = char_convert
elif field.rel:
converter = instance_convert(field, feed, rel_name)
assert not isinstance(field, models.ManyToManyField)
elif field.null:
converter = null_convert
elif field.has_default():
converter = default_convert(field)
else:
converter = no_convert
if point_match:
index = int(point_match.group('index'))
point_map[csv_name] = (index, converter)
else:
val_map[csv_name] = converter
# Read and convert the source txt
csv_reader = reader(txt_file)
unique_line = dict()
count = 0
first = True
extra_counts = defaultdict(int)
if PY3: # pragma: no cover
bom = BOM_UTF8.decode('utf-8')
else: # pragma: no cover
bom = BOM_UTF8
new_objects = []
for row in csv_reader:
if first:
# Read the columns
columns = row
if columns[0].startswith(bom):
columns[0] = columns[0][len(bom):]
first = False
continue
if filter_func and not filter_func(zip(columns, row)):
continue
# Read a data row
fields = dict()
point_coords = [None, None]
ukey_values = {}
if cls._rel_to_feed == 'feed':
fields['feed'] = feed
for column_name, value in zip(columns, row):
if column_name not in name_map:
val = null_convert(value)
if val is not None:
fields.setdefault('extra_data', {})[column_name] = val
extra_counts[column_name] += 1
elif column_name in val_map:
fields[name_map[column_name]] = val_map[column_name](value)
else:
assert column_name in point_map
pos, converter = point_map[column_name]
示例8: quit
# 需要导入模块: from codecs import BOM_UTF8 [as 别名]
# 或者: from codecs.BOM_UTF8 import decode [as 别名]
# Fixed XML when is not decoded
import oerplib
import argparse
import base64
from lxml import objectify
from codecs import BOM_UTF8
BOM_UTF8U = BOM_UTF8.decode('UTF-8')
PARSER = argparse.ArgumentParser()
PARSER.add_argument("-d", "--db", help="DataBase Name", required=True)
PARSER.add_argument("-r", "--user", help="OpenERP User", required=True)
PARSER.add_argument("-w", "--passwd", help="OpenERP Password", required=True)
PARSER.add_argument("-p", "--port",
type=int,
help="Port, 8069 for default", default="8069")
PARSER.add_argument("-s", "--server",
help="Server IP, 127.0.0.1 for default",
default="127.0.0.1")
ARGS = PARSER.parse_args()
if ARGS.db is None or ARGS.user is None or ARGS.passwd is None:
print "Must be specified DataBase, User and Password"
quit()
DB_NAME = ARGS.db
USER = ARGS.user
PASSW = ARGS.passwd
SERVER = ARGS.server
PORT = ARGS.port
OERP_CONNECT = oerplib.OERP(SERVER,
示例9: test_eval_bom
# 需要导入模块: from codecs import BOM_UTF8 [as 别名]
# 或者: from codecs.BOM_UTF8 import decode [as 别名]
def test_eval_bom(self):
self.assertEqual(eval(BOM_UTF8 + '"foo"'), 'foo')
# Actual BOM ignored, so causes a SyntaxError
self.assertRaises(SyntaxError, eval,
BOM_UTF8.decode('iso-8859-1') + '"foo"')
示例10: detect_encoding
# 需要导入模块: from codecs import BOM_UTF8 [as 别名]
# 或者: from codecs.BOM_UTF8 import decode [as 别名]
dirname = os.readlink(dirname)
fullname = os.path.join(dirname, filename)
if os.path.exists(fullname):
return fullname
return None
# }}}
# {{{ file encoding detection
# stolen from Python 3.1's tokenize.py, by Ka-Ping Yee
import re
cookie_re = re.compile("^\s*#.*coding[:=]\s*([-\w.]+)")
from codecs import lookup, BOM_UTF8
if PY3:
BOM_UTF8 = BOM_UTF8.decode()
def detect_encoding(readline):
"""
The detect_encoding() function is used to detect the encoding that should
be used to decode a Python source file. It requires one argment, readline,
in the same way as the tokenize() generator.
It will call readline a maximum of twice, and return the encoding used
(as a string) and a list of any lines (left as bytes) it has read
in.
It detects the encoding from the presence of a utf-8 bom or an encoding
cookie as specified in pep-0263. If both a bom and a cookie are present,
but disagree, a SyntaxError will be raised. If the encoding cookie is an
示例11: import
# 需要导入模块: from codecs import BOM_UTF8 [as 别名]
# 或者: from codecs.BOM_UTF8 import decode [as 别名]
from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap,
NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
ERROR_DEDENT, FSTRING_STRING, FSTRING_START,
FSTRING_END)
from parso._compatibility import py_version
from parso.utils import split_lines
TokenCollection = namedtuple(
'TokenCollection',
'pseudo_token single_quoted triple_quoted endpats whitespace '
'fstring_pattern_map always_break_tokens',
)
BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
_token_collection_cache = {}
if py_version >= 30:
# Python 3 has str.isidentifier() to check if a char is a valid identifier
is_identifier = str.isidentifier
else:
namechars = string.ascii_letters + '_'
is_identifier = lambda s: s in namechars
def group(*choices, **kwargs):
capture = kwargs.pop('capture', False) # Python 2, arrghhhhh :(
assert not kwargs
示例12: __enter__
# 需要导入模块: from codecs import BOM_UTF8 [as 别名]
# 或者: from codecs.BOM_UTF8 import decode [as 别名]
def __enter__(self):
with open(self.filename, encoding='utf-8') as infile:
content = infile.read()
if content[0] == BOM_UTF8.decode('utf8'):
content = content[1:]
return content
示例13: getLogger
# 需要导入模块: from codecs import BOM_UTF8 [as 别名]
# 或者: from codecs.BOM_UTF8 import decode [as 别名]
from datetime import datetime, date
from logging import getLogger
import re
from django.contrib.gis.db import models
from django.contrib.gis.db.models.query import GeoQuerySet
from django.db.models.fields.related import ManyToManyField
from django.utils.six import StringIO, text_type, PY3
from multigtfs.compat import get_blank_value, write_text_rows
logger = getLogger(__name__)
re_point = re.compile(r'(?P<name>point)\[(?P<index>\d)\]')
batch_size = 1000
large_queryset_size = 100000
CSV_BOM = BOM_UTF8.decode('utf-8') if PY3 else BOM_UTF8
class BaseQuerySet(GeoQuerySet):
def populated_column_map(self):
'''Return the _column_map without unused optional fields'''
column_map = []
cls = self.model
for csv_name, field_pattern in cls._column_map:
# Separate the local field name from foreign columns
if '__' in field_pattern:
field_name = field_pattern.split('__', 1)[0]
else:
field_name = field_pattern
# Handle point fields
示例14: __enter__
# 需要导入模块: from codecs import BOM_UTF8 [as 别名]
# 或者: from codecs.BOM_UTF8 import decode [as 别名]
def __enter__(self):
content = open(self.filename, encoding="utf-8").read()
if content[0] == BOM_UTF8.decode("utf8"):
content = content[1:]
return content
示例15: test_simple_prefix_splitting
# 需要导入模块: from codecs import BOM_UTF8 [as 别名]
# 或者: from codecs.BOM_UTF8 import decode [as 别名]
try:
from itertools import zip_longest
except ImportError:
# Python 2
from itertools import izip_longest as zip_longest
from codecs import BOM_UTF8
import pytest
import parso
unicode_bom = BOM_UTF8.decode('utf-8')
@pytest.mark.parametrize(('string', 'tokens'), [
('', ['']),
('#', ['#', '']),
(' # ', ['# ', '']),
(' # \n', ['# ', '\n', '']),
(' # \f\n', ['# ', '\f', '\n', '']),
(' \n', ['\n', '']),
(' \n ', ['\n', ' ']),
(' \f ', ['\f', ' ']),
(' \f ', ['\f', ' ']),
(' \r\n', ['\r\n', '']),
('\\\n', ['\\\n', '']),
('\\\r\n', ['\\\r\n', '']),
('\t\t\n\t', ['\n', '\t']),
])
def test_simple_prefix_splitting(string, tokens):