本文整理匯總了Python中gzip.open方法的典型用法代碼示例。如果您正苦於以下問題:Python gzip.open方法的具體用法?Python gzip.open怎麽用?Python gzip.open使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類gzip
的用法示例。
在下文中一共展示了gzip.open方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: saveState
# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def saveState(dataHub):
import pickle as pickle
import gzip
pickle.dump(dataHub, gzip.open(dataHub.args.save_state, "wb"))
logging.warn("^"*20 + " saving state to pickle and exiting " + "^"*20)
示例2: main
# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def main(_):
"""Runs `text_utils.simplify_nq_example` over all shards of a split.
Prints simplified examples to a single gzipped file in the same directory
as the input shards.
"""
split = os.path.basename(FLAGS.data_dir)
outpath = os.path.join(FLAGS.data_dir,
"simplified-nq-{}.jsonl.gz".format(split))
with gzip.open(outpath, "wb") as fout:
num_processed = 0
start = time.time()
for inpath in glob.glob(os.path.join(FLAGS.data_dir, "nq-*-??.jsonl.gz")):
print("Processing {}".format(inpath))
with gzip.open(inpath, "rb") as fin:
for l in fin:
utf8_in = l.decode("utf8", "strict")
utf8_out = json.dumps(
text_utils.simplify_nq_example(json.loads(utf8_in))) + u"\n"
fout.write(utf8_out.encode("utf8"))
num_processed += 1
if not num_processed % 100:
print("Processed {} examples in {}.".format(num_processed,
time.time() - start))
示例3: loadW2V
# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def loadW2V(self,emb_path, type="bin"):
print("Loading W2V data...")
num_keys = 0
if type=="textgz":
# this seems faster than gensim non-binary load
for line in gzip.open(emb_path):
l = line.strip().split()
st=l[0].lower()
self.pre_emb[st]=np.asarray(l[1:])
num_keys=len(self.pre_emb)
if type=="text":
# this seems faster than gensim non-binary load
for line in open(emb_path):
l = line.strip().split()
st=l[0].lower()
self.pre_emb[st]=np.asarray(l[1:])
num_keys=len(self.pre_emb)
else:
self.pre_emb = Word2Vec.load_word2vec_format(emb_path,binary=True)
self.pre_emb.init_sims(replace=True)
num_keys=len(self.pre_emb.vocab)
print("loaded word2vec len ", num_keys)
gc.collect()
示例4: getTsvData
# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def getTsvData(self, filepath):
print("Loading training data from "+filepath)
x1=[]
x2=[]
y=[]
# positive samples from file
for line in open(filepath):
l=line.strip().split("\t")
if len(l)<2:
continue
if random() > 0.5:
x1.append(l[0].lower())
x2.append(l[1].lower())
else:
x1.append(l[1].lower())
x2.append(l[0].lower())
y.append(int(l[2]))
return np.asarray(x1),np.asarray(x2),np.asarray(y)
示例5: dumpValidation
# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def dumpValidation(self,x1_text,x2_text,y,shuffled_index,dev_idx,i):
print("dumping validation "+str(i))
x1_shuffled=x1_text[shuffled_index]
x2_shuffled=x2_text[shuffled_index]
y_shuffled=y[shuffled_index]
x1_dev=x1_shuffled[dev_idx:]
x2_dev=x2_shuffled[dev_idx:]
y_dev=y_shuffled[dev_idx:]
del x1_shuffled
del y_shuffled
with open('validation.txt'+str(i),'w') as f:
for text1,text2,label in zip(x1_dev,x2_dev,y_dev):
f.write(str(label)+"\t"+text1+"\t"+text2+"\n")
f.close()
del x1_dev
del y_dev
# Data Preparatopn
# ==================================================
示例6: assemble_batch
# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def assemble_batch(story_fns, num_answer_words, format_spec):
stories = []
for sfn in story_fns:
with gzip.open(sfn,'rb') as f:
cvtd_story, _, _, _ = pickle.load(f)
stories.append(cvtd_story)
sents, graphs, queries, answers = zip(*stories)
cvtd_sents = np.array(sents, np.int32)
cvtd_queries = np.array(queries, np.int32)
max_ans_len = max(len(a) for a in answers)
cvtd_answers = np.stack([convert_answer(answer, num_answer_words, format_spec, max_ans_len) for answer in answers])
num_new_nodes, new_node_strengths, new_node_ids, next_edges = zip(*graphs)
num_new_nodes = np.stack(num_new_nodes)
new_node_strengths = np.stack(new_node_strengths)
new_node_ids = np.stack(new_node_ids)
next_edges = np.stack(next_edges)
return cvtd_sents, cvtd_queries, cvtd_answers, num_new_nodes, new_node_strengths, new_node_ids, next_edges
示例7: create_mnist
# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def create_mnist(tfrecord_dir, mnist_dir):
print('Loading MNIST from "%s"' % mnist_dir)
import gzip
with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file:
images = np.frombuffer(file.read(), np.uint8, offset=16)
with gzip.open(os.path.join(mnist_dir, 'train-labels-idx1-ubyte.gz'), 'rb') as file:
labels = np.frombuffer(file.read(), np.uint8, offset=8)
images = images.reshape(-1, 1, 28, 28)
images = np.pad(images, [(0,0), (0,0), (2,2), (2,2)], 'constant', constant_values=0)
assert images.shape == (60000, 1, 32, 32) and images.dtype == np.uint8
assert labels.shape == (60000,) and labels.dtype == np.uint8
assert np.min(images) == 0 and np.max(images) == 255
assert np.min(labels) == 0 and np.max(labels) == 9
onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32)
onehot[np.arange(labels.size), labels] = 1.0
with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr:
order = tfr.choose_shuffled_order()
for idx in range(order.size):
tfr.add_image(images[order[idx]])
tfr.add_labels(onehot[order])
#----------------------------------------------------------------------------
示例8: create_mnistrgb
# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def create_mnistrgb(tfrecord_dir, mnist_dir, num_images=1000000, random_seed=123):
print('Loading MNIST from "%s"' % mnist_dir)
import gzip
with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file:
images = np.frombuffer(file.read(), np.uint8, offset=16)
images = images.reshape(-1, 28, 28)
images = np.pad(images, [(0,0), (2,2), (2,2)], 'constant', constant_values=0)
assert images.shape == (60000, 32, 32) and images.dtype == np.uint8
assert np.min(images) == 0 and np.max(images) == 255
with TFRecordExporter(tfrecord_dir, num_images) as tfr:
rnd = np.random.RandomState(random_seed)
for idx in range(num_images):
tfr.add_image(images[rnd.randint(images.shape[0], size=3)])
#----------------------------------------------------------------------------
示例9: create_cifar100
# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def create_cifar100(tfrecord_dir, cifar100_dir):
print('Loading CIFAR-100 from "%s"' % cifar100_dir)
import pickle
with open(os.path.join(cifar100_dir, 'train'), 'rb') as file:
data = pickle.load(file, encoding='latin1')
images = data['data'].reshape(-1, 3, 32, 32)
labels = np.array(data['fine_labels'])
assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8
assert labels.shape == (50000,) and labels.dtype == np.int32
assert np.min(images) == 0 and np.max(images) == 255
assert np.min(labels) == 0 and np.max(labels) == 99
onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32)
onehot[np.arange(labels.size), labels] = 1.0
with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr:
order = tfr.choose_shuffled_order()
for idx in range(order.size):
tfr.add_image(images[order[idx]])
tfr.add_labels(onehot[order])
#----------------------------------------------------------------------------
示例10: create_celeba
# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def create_celeba(tfrecord_dir, celeba_dir, cx=89, cy=121):
print('Loading CelebA from "%s"' % celeba_dir)
glob_pattern = os.path.join(celeba_dir, 'img_align_celeba_png', '*.png')
image_filenames = sorted(glob.glob(glob_pattern))
expected_images = 202599
if len(image_filenames) != expected_images:
error('Expected to find %d images' % expected_images)
with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr:
order = tfr.choose_shuffled_order()
for idx in range(order.size):
img = np.asarray(PIL.Image.open(image_filenames[order[idx]]))
assert img.shape == (218, 178, 3)
img = img[cy - 64 : cy + 64, cx - 64 : cx + 64]
img = img.transpose(2, 0, 1) # HWC => CHW
tfr.add_image(img)
#----------------------------------------------------------------------------
示例11: create_gsa_mapping
# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def create_gsa_mapping(path, metadata, sample_name, shuffle):
"""
Creates the binning gold standard/gsa mapping
"""
to_genome = name_to_genome(metadata)
gsa_path = os.path.join(path, "anonymous_gsa.fasta") #
count = 0
if not os.path.exists(gsa_path):
gsa_path = os.path.join(path, "anonymous_gsa.fasta.gz") # if zipped
with gzip.open(gsa_path,'r') as gsa:
for line in gsa:
if line.startswith('>'):
count += 1
with gzip.open(gsa_path,'r') as gsa:
gsa_temp = shuffle_anonymize(gsa, path, to_genome, metadata, sample_name, count, shuffle)
else:
with open(gsa_path,'r') as gsa:
for line in gsa:
if line.startswith('>'):
count += 1
with open(gsa_path,'r') as gsa:
gsa_temp = shuffle_anonymize(gsa, path, to_genome, metadata, sample_name, count, shuffle)
os.rename(gsa_temp, gsa_path)
示例12: read_genomes_list
# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def read_genomes_list(genomes_path, additional_file = None):
genomes_map = {}
total_genomes = 0
if additional_file is not None:
with open(additional_file,'r') as add:
for line in add:
ncbi_id, sci_name, path, novelty = line.strip().split('\t')
if ncbi_id in genomes_map:
genomes_map[ncbi_id][1].append(path)
else:
genomes_map[ncbi_id] = (sci_name, [path], novelty) # this might not be a http path
total_genomes += 1
with open(genomes_path,'r') as genomes:
for line in genomes:
ncbi_id, sci_name, ftp = line.strip().split('\t')
http = ftp.replace("ftp://","http://") # not using ftp address but http (proxies)
if ncbi_id in genomes_map:
genomes_map[ncbi_id][1].append(http)
else:
genomes_map[ncbi_id] = (sci_name, [http], 'known_strain') # sci_name is always the same for same taxid (?)
total_genomes += 1
return genomes_map, total_genomes
示例13: download_genome
# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def download_genome(genome, out_path):
genome_path = os.path.join(out_path,"genomes")
out_name = genome.rstrip().split('/')[-1]
http_address = os.path.join(genome, out_name + "_genomic.fna.gz")
opened = urllib2.urlopen(http_address)
out = os.path.join(genome_path, out_name + ".fa")
tmp_out = os.path.join(genome_path, out_name + "tmp.fa")
out_gz = out + ".gz"
with open(out_gz,'wb') as outF:
outF.write(opened.read())
gf = gzip.open(out_gz)
new_out = open(tmp_out,'wb')
new_out.write(gf.read())
gf.close()
os.remove(out_gz)
new_out.close()
split_by_N(tmp_out, out)
return out
示例14: parse_data
# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def parse_data(path, dataset, flatten):
if dataset != 'train' and dataset != 't10k':
raise NameError('dataset must be train or t10k')
label_file = os.path.join(path, dataset + '-labels-idx1-ubyte')
with open(label_file, 'rb') as file:
_, num = struct.unpack(">II", file.read(8))
labels = np.fromfile(file, dtype=np.int8) # int8
new_labels = np.zeros((num, 10))
new_labels[np.arange(num), labels] = 1
img_file = os.path.join(path, dataset + '-images-idx3-ubyte')
with open(img_file, 'rb') as file:
_, num, rows, cols = struct.unpack(">IIII", file.read(16))
imgs = np.fromfile(file, dtype=np.uint8).reshape(num, rows, cols) # uint8
imgs = imgs.astype(np.float32) / 255.0
if flatten:
imgs = imgs.reshape([num, -1])
return imgs, new_labels
示例15: load_json
# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def load_json(filename, to='auto'):
'''
load_json(filename) yields the object represented by the json file or stream object filename.
The optional argument to may be set to None to indicate that the JSON data should be returned
verbatim rather than parsed by neuropythy's denormalize system.
'''
from neuropythy.util import denormalize as denorm
if pimms.is_str(filename):
try:
with gzip.open(filename, 'rt') as fl: dat = json.load(fl)
except Exception:
with open(filename, 'rt') as fl: dat = json.load(fl)
else:
dat = json.load(filename)
filename = '<stream>'
if to is None: return dat
elif to == 'auto': return denorm(dat)
else: raise ValueError('unrecognized to option: %s' % to)