本文整理汇总了Python中backports.lzma.open方法的典型用法代码示例。如果您正苦于以下问题:Python lzma.open方法的具体用法?Python lzma.open怎么用?Python lzma.open使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类backports.lzma
的用法示例。
在下文中一共展示了lzma.open方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _compress_image_stream
# 需要导入模块: from backports import lzma [as 别名]
# 或者: from backports.lzma import open [as 别名]
def _compress_image_stream(self, stream):
outfile = os.path.join(self.workflow.source.workdir,
EXPORTED_COMPRESSED_IMAGE_NAME_TEMPLATE)
if self.method == 'gzip':
outfile = outfile.format('gz')
fp = gzip.open(outfile, 'wb', compresslevel=6)
elif self.method == 'lzma':
outfile = outfile.format('xz')
fp = lzma.open(outfile, 'wb')
else:
raise RuntimeError('Unsupported compression format {0}'.format(self.method))
_chunk_size = 1024**2 # 1 MB chunk size for reading/writing
self.log.info('compressing image %s to %s using %s method',
self.workflow.image, outfile, self.method)
data = stream.read(_chunk_size)
while data != b'':
fp.write(data)
data = stream.read(_chunk_size)
self.uncompressed_size = stream.tell()
return outfile
示例2: load
# 需要导入模块: from backports import lzma [as 别名]
# 或者: from backports.lzma import open [as 别名]
def load(self):
""""""
if self.vocab_loadname and os.path.exists(self.vocab_loadname):
vocab_filename = self.vocab_loadname
else:
self._loaded = False
return False
with open(vocab_filename, 'rb') as f:
self._tokens, self._embeddings = pkl.load(f, encoding='utf-8', errors='ignore')
cur_idx = len(self.special_tokens)
for token in self._tokens:
self[token] = cur_idx
cur_idx += 1
self._embedding_size = self._embeddings.shape[1]
self._loaded = True
return True
#=============================================================
示例3: load_next
# 需要导入模块: from backports import lzma [as 别名]
# 或者: from backports.lzma import open [as 别名]
def load_next(self, file_idx=None):
""""""
if self._cur_file_idx == -1 or len(self.conllu_files) > 1:
self.reset()
if file_idx is None:
self._cur_file_idx = (self._cur_file_idx + 1) % len(self.conllu_files)
file_idx = self._cur_file_idx
with self.open():
for sent in self.itersents(self.conllu_files[file_idx]):
self.add(sent)
return
#=============================================================
示例4: extract_7zip
# 需要导入模块: from backports import lzma [as 别名]
# 或者: from backports.lzma import open [as 别名]
def extract_7zip(fname):
import backports.lzma as lzma
import tarfile
lz = lzma.open(str(fname))
print('Extracting "%s"...' % fname)
print(' decompressing...')
tar = tarfile.open(fileobj=lz)
def progress_generator(tar):
prog = 0
so_far = 0
total = len(tar.getmembers())
last = 0.0
for ti in tar:
so_far += 1
percent = int((float(so_far) / float(total)) * 100.0)
if last is None or percent - last >= (100.0 / 5.0):
last = percent
print(' %3d%% extracted' % percent)
yield ti
tar.extractall(members=progress_generator(tar))
return None
示例5: __init__
# 需要导入模块: from backports import lzma [as 别名]
# 或者: from backports.lzma import open [as 别名]
def __init__(self, name, mode='r', compression=None):
"""
Constructor.
"""
self.fp = None # file pointer to give access to
self.ctype = None # compression type
# in some cases an additional file pointer is needed
self.fp_extra = None
self.open(name, mode=mode, compression=compression)
示例6: open
# 需要导入模块: from backports import lzma [as 别名]
# 或者: from backports.lzma import open [as 别名]
def open(self, name, mode='r', compression=None):
"""
Open a file pointer. Note that a file is *always* opened in text
mode. The method inherits its input parameters from the constructor
of :class:`FileObject`.
"""
if compression == 'use_ext':
self.get_compression_type(name)
else:
self.ctype = compression
if not self.ctype:
self.fp = open(name, mode)
elif self.ctype == 'gzip':
self.fp = gzip.open(name, mode + 't')
elif self.ctype == 'bzip2':
try:
# Python 3 supports opening bzip2 files in text mode
# therefore, we prefer to open them this way
self.fp = bz2.open(name, mode + 't')
except:
# BZ2File opens a file in binary mode
# thus, we have to use codecs.getreader()
# to be able to use it in text mode
self.fp_extra = bz2.BZ2File(name, mode)
if mode == 'r':
self.fp = codecs.getreader('ascii')(self.fp_extra)
else: # mode == 'w'
self.fp = codecs.getwriter('ascii')(self.fp_extra)
else: # self.ctype == 'lzma'
# LZMA is available in Python 2 only if backports.lzma is installed
# Python 3 supports it by default
assert lzma_present, 'LZMA compression is unavailable.'
self.fp = lzma.open(name, mode=mode + 't')
示例7: run
# 需要导入模块: from backports import lzma [as 别名]
# 或者: from backports.lzma import open [as 别名]
def run(self):
if is_scratch_build(self.workflow):
# required only to make an archive for Koji
self.log.info('scratch build, skipping plugin')
return
if self.load_exported_image and len(self.workflow.exported_image_sequence) > 0:
image_metadata = self.workflow.exported_image_sequence[-1]
image = image_metadata.get('path')
image_type = image_metadata.get('type')
self.log.info('preparing to compress image %s', image)
with open(image, 'rb') as image_stream:
outfile = self._compress_image_stream(image_stream)
else:
if self.source_build:
self.log.info('skipping, no exported source image to compress')
return
image = self.workflow.image
image_type = IMAGE_TYPE_DOCKER_ARCHIVE
self.log.info('fetching image %s from docker', image)
with self.tasker.get_image(image) as image_stream:
outfile = self._compress_image_stream(image_stream)
metadata = get_exported_image_metadata(outfile, image_type)
if self.uncompressed_size != 0:
metadata['uncompressed_size'] = self.uncompressed_size
savings = 1 - metadata['size'] / metadata['uncompressed_size']
self.log.debug('uncompressed: %s, compressed: %s, ratio: %.2f %% saved',
human_size(metadata['uncompressed_size']),
human_size(metadata['size']),
100*savings)
self.workflow.exported_image_sequence.append(metadata)
self.log.info('compressed image is available as %s', outfile)
示例8: dump
# 需要导入模块: from backports import lzma [as 别名]
# 或者: from backports.lzma import open [as 别名]
def dump(self):
if self.save_as_pickle and not os.path.exists(self.vocab_loadname):
os.makedirs(os.path.dirname(self.vocab_loadname), exist_ok=True)
with open(self.vocab_loadname, 'wb') as f:
pkl.dump((self._tokens, self._embeddings), f, protocol=pkl.HIGHEST_PROTOCOL)
return
#=============================================================
示例9: open
# 需要导入模块: from backports import lzma [as 别名]
# 或者: from backports.lzma import open [as 别名]
def open(self):
""""""
self._multibucket.open()
for vocab in self:
vocab.open()
self._is_open = True
return self
#=============================================================
示例10: itersents
# 需要导入模块: from backports import lzma [as 别名]
# 或者: from backports.lzma import open [as 别名]
def itersents(conllu_file):
""""""
if conllu_file.endswith('.zip'):
open_func = zipfile.Zipfile
kwargs = {}
elif conllu_file.endswith('.gz'):
open_func = gzip.open
kwargs = {}
elif conllu_file.endswith('.xz'):
open_func = lzma.open
kwargs = {'errors': 'ignore'}
else:
open_func = codecs.open
kwargs = {'errors': 'ignore'}
with open_func(conllu_file, 'rb') as f:
reader = codecs.getreader('utf-8')(f, **kwargs)
buff = []
for line in reader:
line = line.strip()
if line and not line.startswith('#'):
if not re.match('[0-9]+[-.][0-9]+', line):
buff.append(line.split('\t'))
elif buff:
yield buff
buff = []
yield buff
#=============================================================
示例11: extract
# 需要导入模块: from backports import lzma [as 别名]
# 或者: from backports.lzma import open [as 别名]
def extract(filename):
with tarfile.open(filename) as tar:
tar.extractall()
示例12: get_decompresser
# 需要导入模块: from backports import lzma [as 别名]
# 或者: from backports.lzma import open [as 别名]
def get_decompresser(fn):
if '.bz2' in fn:
decompress = bz2.BZ2File
elif '.xz' in fn:
decompress = lzma.open
elif '.zst' in fn:
print('zst not implemented yet!')
exit()
return decompress
# the below is adapted from:
# https://github.com/eukaryote31/openwebtext/blob/master/filter.py
示例13: count
# 需要导入模块: from backports import lzma [as 别名]
# 或者: from backports.lzma import open [as 别名]
def count(self, *args):
""""""
max_embed_count = self.max_embed_count
if self.pretrained_file.endswith('.zip'):
open_func = zipfile.Zipfile
kwargs = {}
elif self.pretrained_file.endswith('.gz'):
open_func = gzip.open
kwargs = {}
elif self.pretrained_file.endswith('.xz'):
open_func = lzma.open
kwargs = {'errors': 'ignore'}
else:
open_func = codecs.open
kwargs = {'errors': 'ignore'}
cur_idx = len(self.special_tokens)
tokens = []
# Determine the dimensions of the embedding matrix
with open_func(self.pretrained_file, 'rb') as f:
reader = codecs.getreader('utf-8')(f, **kwargs)
first_line = reader.readline().rstrip().split(' ')
if len(first_line) == 2: # It has a header that gives the dimensions
has_header = True
shape = [int(first_line[0])+cur_idx, int(first_line[1])]
else: # We have to compute the dimensions ourself
has_header = False
for line_num, line in enumerate(reader):
pass
shape = [cur_idx+line_num+1, len(line.split())-1]
shape[0] = min(shape[0], max_embed_count+cur_idx) if max_embed_count else shape[0]
embeddings = np.zeros(shape, dtype=np.float32)
# Fill in the embedding matrix
#with open_func(self.pretrained_file, 'rt', encoding='utf-8') as f:
with open_func(self.pretrained_file, 'rb') as f:
for line_num, line in enumerate(f):
if line_num:
if cur_idx < shape[0]:
line = line.rstrip()
if line:
line = line.decode('utf-8', errors='ignore').split(' ')
embeddings[cur_idx] = line[1:]
tokens.append(line[0])
self[line[0]] = cur_idx
cur_idx += 1
else:
break
self._embed_size = shape[1]
self._tokens = tokens
self._embeddings = embeddings
self.dump()
return True
#=============================================================
示例14: verify_hostname
# 需要导入模块: from backports import lzma [as 别名]
# 或者: from backports.lzma import open [as 别名]
def verify_hostname(self, connection, cert, errno, depth, preverifyOK):
if DEBUG:
print('Verify: pre=%d depth=%d cert=%s issuer=%s' % (preverifyOK,
depth,
cert.get_subject(),
cert.get_issuer()))
if not preverifyOK:
# FIXME if below is true about constant-time, then this
# shouldn't bail out early either...
return False
if depth >= len(self.chain):
print("depth is %d, but we have a chain %d entries long." % (depth, len(self.chain)))
return False
# FIXME TODO requires thinking
# need to ensure this is constant-time? e.g. compare hashes?
# or are we already screwed on that since we'll bail out of
# this whole "verify" callback stack early when one cert
# fails?
# correct way is to compare hashes (then we only have to store
# the hash of the public cert, not the actual thing)
verify_pubkey = PublicKey(cert.get_pubkey())
golden_pubkey = self.chain[depth].getPublicKey()
if DEBUG:
print('incoming="%s", golden="%s"' % (verify_pubkey.keyHash(), golden_pubkey.keyHash()))
if not golden_pubkey.matches(verify_pubkey):
# getting out the CN (common name) for a nicer output,
# but maybe we don't want that -- let "experts" examine
# "failed.pem" and everyone else just needs to know "it
# didn't work"?
cn = ''
for (k, v) in cert.get_subject().get_components():
if k == 'CN':
cn = v
common_name = ''.join(
str(x[1])
for x in cert.get_subject().get_components()
if x[0] == 'CN'
)
print('Certificate chain verification failed for "%s".' % common_name)
print('Public key md5 hash is "%s" but wanted "%s".' % (verify_pubkey.keyHash(), golden_pubkey.keyHash()))
print('Dumping failing certificate to "failed.pem".')
with open('failed.pem', 'a') as f:
f.write(OpenSSL.crypto.dump_certificate(OpenSSL.SSL.FILETYPE_PEM, cert))
return False
# already checked preverifyOK
return True
示例15: verify_hostname
# 需要导入模块: from backports import lzma [as 别名]
# 或者: from backports.lzma import open [as 别名]
def verify_hostname(self, connection, cert, errno, depth, preverifyOK):
if DEBUG:
print('Verify: pre=%d depth=%d cert=%s issuer=%s' % (preverifyOK,
depth,
cert.get_subject(),
cert.get_issuer()))
if not preverifyOK:
# FIXME if below is true about constant-time, then this
# shouldn't bail out early either...
return False
if depth >= len(self.chain):
print("depth is %d, but we have a chain %d entries long." % (depth, len(self.chain)))
return False
# FIXME TODO requires thinking
# need to ensure this is constant-time? e.g. compare hashes?
# or are we already screwed on that since we'll bail out of
# this whole "verify" callback stack early when one cert
# fails?
# correct way is to compare hashes (then we only have to store
# the hash of the public cert, not the actual thing)
verify_pubkey = PublicKey(cert.get_pubkey())
golden_pubkey = self.chain[depth].getPublicKey()
if DEBUG:
print('incoming="%s", golden="%s"' % (verify_pubkey.keyHash(), golden_pubkey.keyHash()))
if not golden_pubkey.matches(verify_pubkey):
# getting out the CN (common name) for a nicer output,
# but maybe we don't want that -- let "experts" examine
# "failed.pem" and everyone else just needs to know "it
# didn't work"?
cn = ''
for (k, v) in cert.get_subject().get_components():
if k == 'CN':
cn = v
common_name = ''.join(map(lambda x: str(x[1]), filter(lambda x: x[0] == 'CN', cert.get_subject().get_components())))
print('Certificate chain verification failed for "%s".' % common_name)
print('Public key md5 hash is "%s" but wanted "%s".' % (verify_pubkey.keyHash(), golden_pubkey.keyHash()))
print('Dumping failing certificate to "failed.pem".')
with open('failed.pem', 'a') as f:
f.write(OpenSSL.crypto.dump_certificate(OpenSSL.SSL.FILETYPE_PEM, cert))
return False
# already checked preverifyOK
return True