当前位置: 首页>>代码示例>>Python>>正文


Python gzip.open方法代码示例

本文整理汇总了Python中gzip.open方法的典型用法代码示例。如果您正苦于以下问题:Python gzip.open方法的具体用法?Python gzip.open怎么用?Python gzip.open使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gzip的用法示例。


在下文中一共展示了gzip.open方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: saveState

# 需要导入模块: import gzip [as 别名]
# 或者: from gzip import open [as 别名]
def saveState(dataHub):
    import pickle as pickle
    import gzip

    pickle.dump(dataHub, gzip.open(dataHub.args.save_state, "wb"))
    logging.warn("^"*20 + " saving state to pickle and exiting " + "^"*20) 
开发者ID:svviz,项目名称:svviz,代码行数:8,代码来源:app.py

示例2: main

# 需要导入模块: import gzip [as 别名]
# 或者: from gzip import open [as 别名]
def main(_):
  """Runs `text_utils.simplify_nq_example` over all shards of a split.

  Prints simplified examples to a single gzipped file in the same directory
  as the input shards.
  """
  split = os.path.basename(FLAGS.data_dir)
  outpath = os.path.join(FLAGS.data_dir,
                         "simplified-nq-{}.jsonl.gz".format(split))
  with gzip.open(outpath, "wb") as fout:
    num_processed = 0
    start = time.time()
    for inpath in glob.glob(os.path.join(FLAGS.data_dir, "nq-*-??.jsonl.gz")):
      print("Processing {}".format(inpath))
      with gzip.open(inpath, "rb") as fin:
        for l in fin:
          utf8_in = l.decode("utf8", "strict")
          utf8_out = json.dumps(
              text_utils.simplify_nq_example(json.loads(utf8_in))) + u"\n"
          fout.write(utf8_out.encode("utf8"))
          num_processed += 1
          if not num_processed % 100:
            print("Processed {} examples in {}.".format(num_processed,
                                                        time.time() - start)) 
开发者ID:google-research-datasets,项目名称:natural-questions,代码行数:26,代码来源:simplify_nq_data.py

示例3: loadW2V

# 需要导入模块: import gzip [as 别名]
# 或者: from gzip import open [as 别名]
def loadW2V(self,emb_path, type="bin"):
        print("Loading W2V data...")
        num_keys = 0
        if type=="textgz":
            # this seems faster than gensim non-binary load
            for line in gzip.open(emb_path):
                l = line.strip().split()
                st=l[0].lower()
                self.pre_emb[st]=np.asarray(l[1:])
            num_keys=len(self.pre_emb)
        if type=="text":
            # this seems faster than gensim non-binary load
            for line in open(emb_path):
                l = line.strip().split()
                st=l[0].lower()
                self.pre_emb[st]=np.asarray(l[1:])
            num_keys=len(self.pre_emb)
        else:
            self.pre_emb = Word2Vec.load_word2vec_format(emb_path,binary=True)
            self.pre_emb.init_sims(replace=True)
            num_keys=len(self.pre_emb.vocab)
        print("loaded word2vec len ", num_keys)
        gc.collect() 
开发者ID:dhwajraj,项目名称:deep-siamese-text-similarity,代码行数:25,代码来源:input_helpers.py

示例4: getTsvData

# 需要导入模块: import gzip [as 别名]
# 或者: from gzip import open [as 别名]
def getTsvData(self, filepath):
        print("Loading training data from "+filepath)
        x1=[]
        x2=[]
        y=[]
        # positive samples from file
        for line in open(filepath):
            l=line.strip().split("\t")
            if len(l)<2:
                continue
            if random() > 0.5:
                x1.append(l[0].lower())
                x2.append(l[1].lower())
            else:
                x1.append(l[1].lower())
                x2.append(l[0].lower())
            y.append(int(l[2]))
        return np.asarray(x1),np.asarray(x2),np.asarray(y) 
开发者ID:dhwajraj,项目名称:deep-siamese-text-similarity,代码行数:20,代码来源:input_helpers.py

示例5: dumpValidation

# 需要导入模块: import gzip [as 别名]
# 或者: from gzip import open [as 别名]
def dumpValidation(self,x1_text,x2_text,y,shuffled_index,dev_idx,i):
        print("dumping validation "+str(i))
        x1_shuffled=x1_text[shuffled_index]
        x2_shuffled=x2_text[shuffled_index]
        y_shuffled=y[shuffled_index]
        x1_dev=x1_shuffled[dev_idx:]
        x2_dev=x2_shuffled[dev_idx:]
        y_dev=y_shuffled[dev_idx:]
        del x1_shuffled
        del y_shuffled
        with open('validation.txt'+str(i),'w') as f:
            for text1,text2,label in zip(x1_dev,x2_dev,y_dev):
                f.write(str(label)+"\t"+text1+"\t"+text2+"\n")
            f.close()
        del x1_dev
        del y_dev
    
    # Data Preparatopn
    # ================================================== 
开发者ID:dhwajraj,项目名称:deep-siamese-text-similarity,代码行数:21,代码来源:input_helpers.py

示例6: assemble_batch

# 需要导入模块: import gzip [as 别名]
# 或者: from gzip import open [as 别名]
def assemble_batch(story_fns, num_answer_words, format_spec):
    stories = []
    for sfn in story_fns:
        with gzip.open(sfn,'rb') as f:
            cvtd_story, _, _, _ = pickle.load(f)
        stories.append(cvtd_story)
    sents, graphs, queries, answers = zip(*stories)
    cvtd_sents = np.array(sents, np.int32)
    cvtd_queries = np.array(queries, np.int32)
    max_ans_len = max(len(a) for a in answers)
    cvtd_answers = np.stack([convert_answer(answer, num_answer_words, format_spec, max_ans_len) for answer in answers])
    num_new_nodes, new_node_strengths, new_node_ids, next_edges = zip(*graphs)
    num_new_nodes = np.stack(num_new_nodes)
    new_node_strengths = np.stack(new_node_strengths)
    new_node_ids = np.stack(new_node_ids)
    next_edges = np.stack(next_edges)
    return cvtd_sents, cvtd_queries, cvtd_answers, num_new_nodes, new_node_strengths, new_node_ids, next_edges 
开发者ID:hexahedria,项目名称:gated-graph-transformer-network,代码行数:19,代码来源:ggtnn_train.py

示例7: create_mnist

# 需要导入模块: import gzip [as 别名]
# 或者: from gzip import open [as 别名]
def create_mnist(tfrecord_dir, mnist_dir):
    print('Loading MNIST from "%s"' % mnist_dir)
    import gzip
    with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file:
        images = np.frombuffer(file.read(), np.uint8, offset=16)
    with gzip.open(os.path.join(mnist_dir, 'train-labels-idx1-ubyte.gz'), 'rb') as file:
        labels = np.frombuffer(file.read(), np.uint8, offset=8)
    images = images.reshape(-1, 1, 28, 28)
    images = np.pad(images, [(0,0), (0,0), (2,2), (2,2)], 'constant', constant_values=0)
    assert images.shape == (60000, 1, 32, 32) and images.dtype == np.uint8
    assert labels.shape == (60000,) and labels.dtype == np.uint8
    assert np.min(images) == 0 and np.max(images) == 255
    assert np.min(labels) == 0 and np.max(labels) == 9
    onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32)
    onehot[np.arange(labels.size), labels] = 1.0
    
    with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr:
        order = tfr.choose_shuffled_order()
        for idx in range(order.size):
            tfr.add_image(images[order[idx]])
        tfr.add_labels(onehot[order])

#---------------------------------------------------------------------------- 
开发者ID:zalandoresearch,项目名称:disentangling_conditional_gans,代码行数:25,代码来源:dataset_tool.py

示例8: create_mnistrgb

# 需要导入模块: import gzip [as 别名]
# 或者: from gzip import open [as 别名]
def create_mnistrgb(tfrecord_dir, mnist_dir, num_images=1000000, random_seed=123):
    print('Loading MNIST from "%s"' % mnist_dir)
    import gzip
    with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file:
        images = np.frombuffer(file.read(), np.uint8, offset=16)
    images = images.reshape(-1, 28, 28)
    images = np.pad(images, [(0,0), (2,2), (2,2)], 'constant', constant_values=0)
    assert images.shape == (60000, 32, 32) and images.dtype == np.uint8
    assert np.min(images) == 0 and np.max(images) == 255
    
    with TFRecordExporter(tfrecord_dir, num_images) as tfr:
        rnd = np.random.RandomState(random_seed)
        for idx in range(num_images):
            tfr.add_image(images[rnd.randint(images.shape[0], size=3)])

#---------------------------------------------------------------------------- 
开发者ID:zalandoresearch,项目名称:disentangling_conditional_gans,代码行数:18,代码来源:dataset_tool.py

示例9: create_cifar100

# 需要导入模块: import gzip [as 别名]
# 或者: from gzip import open [as 别名]
def create_cifar100(tfrecord_dir, cifar100_dir):
    print('Loading CIFAR-100 from "%s"' % cifar100_dir)
    import pickle
    with open(os.path.join(cifar100_dir, 'train'), 'rb') as file:
        data = pickle.load(file, encoding='latin1')
    images = data['data'].reshape(-1, 3, 32, 32)
    labels = np.array(data['fine_labels'])
    assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8
    assert labels.shape == (50000,) and labels.dtype == np.int32
    assert np.min(images) == 0 and np.max(images) == 255
    assert np.min(labels) == 0 and np.max(labels) == 99
    onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32)
    onehot[np.arange(labels.size), labels] = 1.0

    with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr:
        order = tfr.choose_shuffled_order()
        for idx in range(order.size):
            tfr.add_image(images[order[idx]])
        tfr.add_labels(onehot[order])

#---------------------------------------------------------------------------- 
开发者ID:zalandoresearch,项目名称:disentangling_conditional_gans,代码行数:23,代码来源:dataset_tool.py

示例10: create_celeba

# 需要导入模块: import gzip [as 别名]
# 或者: from gzip import open [as 别名]
def create_celeba(tfrecord_dir, celeba_dir, cx=89, cy=121):
    print('Loading CelebA from "%s"' % celeba_dir)
    glob_pattern = os.path.join(celeba_dir, 'img_align_celeba_png', '*.png')
    image_filenames = sorted(glob.glob(glob_pattern))
    expected_images = 202599
    if len(image_filenames) != expected_images:
        error('Expected to find %d images' % expected_images)
    
    with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr:
        order = tfr.choose_shuffled_order()
        for idx in range(order.size):
            img = np.asarray(PIL.Image.open(image_filenames[order[idx]]))
            assert img.shape == (218, 178, 3)
            img = img[cy - 64 : cy + 64, cx - 64 : cx + 64]
            img = img.transpose(2, 0, 1) # HWC => CHW
            tfr.add_image(img)

#---------------------------------------------------------------------------- 
开发者ID:zalandoresearch,项目名称:disentangling_conditional_gans,代码行数:20,代码来源:dataset_tool.py

示例11: create_gsa_mapping

# 需要导入模块: import gzip [as 别名]
# 或者: from gzip import open [as 别名]
def create_gsa_mapping(path, metadata, sample_name, shuffle):
    """
    Creates the binning gold standard/gsa mapping
    """
    to_genome = name_to_genome(metadata)
    gsa_path = os.path.join(path, "anonymous_gsa.fasta") #
    count = 0
    if not os.path.exists(gsa_path):
        gsa_path = os.path.join(path, "anonymous_gsa.fasta.gz") # if zipped
        with gzip.open(gsa_path,'r') as gsa:
            for line in gsa:
                if line.startswith('>'):
                    count += 1
        with gzip.open(gsa_path,'r') as gsa:
            gsa_temp = shuffle_anonymize(gsa, path, to_genome, metadata, sample_name, count, shuffle)
    else:
        with open(gsa_path,'r') as gsa:
            for line in gsa:
                if line.startswith('>'):
                    count += 1
        with open(gsa_path,'r') as gsa:
            gsa_temp = shuffle_anonymize(gsa, path, to_genome, metadata, sample_name, count, shuffle)
    os.rename(gsa_temp, gsa_path) 
开发者ID:CAMI-challenge,项目名称:CAMISIM,代码行数:25,代码来源:create_joint_gs.py

示例12: read_genomes_list

# 需要导入模块: import gzip [as 别名]
# 或者: from gzip import open [as 别名]
def read_genomes_list(genomes_path, additional_file = None):
    genomes_map = {}
    total_genomes = 0
    if additional_file is not None:
        with open(additional_file,'r') as add:
            for line in add:
                ncbi_id, sci_name, path, novelty = line.strip().split('\t')
                if ncbi_id in genomes_map:
                    genomes_map[ncbi_id][1].append(path)
                else:
                    genomes_map[ncbi_id] = (sci_name, [path], novelty) # this might not be a http path
                total_genomes += 1
    with open(genomes_path,'r') as genomes:
        for line in genomes:
            ncbi_id, sci_name, ftp = line.strip().split('\t')
            http = ftp.replace("ftp://","http://") # not using ftp address but http (proxies)
            if ncbi_id in genomes_map:
                genomes_map[ncbi_id][1].append(http)
            else:
                genomes_map[ncbi_id] = (sci_name, [http], 'known_strain') # sci_name is always the same for same taxid (?)
            total_genomes += 1
    return genomes_map, total_genomes 
开发者ID:CAMI-challenge,项目名称:CAMISIM,代码行数:24,代码来源:get_genomes.py

示例13: download_genome

# 需要导入模块: import gzip [as 别名]
# 或者: from gzip import open [as 别名]
def download_genome(genome, out_path):
    genome_path = os.path.join(out_path,"genomes")
    out_name = genome.rstrip().split('/')[-1]
    http_address = os.path.join(genome, out_name + "_genomic.fna.gz")
    opened = urllib2.urlopen(http_address)
    out = os.path.join(genome_path, out_name + ".fa")
    tmp_out = os.path.join(genome_path, out_name + "tmp.fa")
    out_gz = out + ".gz"
    with open(out_gz,'wb') as outF:
        outF.write(opened.read())
    gf = gzip.open(out_gz)
    new_out = open(tmp_out,'wb')
    new_out.write(gf.read())
    gf.close()
    os.remove(out_gz)
    new_out.close()
    split_by_N(tmp_out, out)
    return out 
开发者ID:CAMI-challenge,项目名称:CAMISIM,代码行数:20,代码来源:get_genomes.py

示例14: parse_data

# 需要导入模块: import gzip [as 别名]
# 或者: from gzip import open [as 别名]
def parse_data(path, dataset, flatten):
    if dataset != 'train' and dataset != 't10k':
        raise NameError('dataset must be train or t10k')

    label_file = os.path.join(path, dataset + '-labels-idx1-ubyte')
    with open(label_file, 'rb') as file:
        _, num = struct.unpack(">II", file.read(8))
        labels = np.fromfile(file, dtype=np.int8)  # int8
        new_labels = np.zeros((num, 10))
        new_labels[np.arange(num), labels] = 1

    img_file = os.path.join(path, dataset + '-images-idx3-ubyte')
    with open(img_file, 'rb') as file:
        _, num, rows, cols = struct.unpack(">IIII", file.read(16))
        imgs = np.fromfile(file, dtype=np.uint8).reshape(num, rows, cols)  # uint8
        imgs = imgs.astype(np.float32) / 255.0
        if flatten:
            imgs = imgs.reshape([num, -1])

    return imgs, new_labels 
开发者ID:wdxtub,项目名称:deep-learning-note,代码行数:22,代码来源:utils.py

示例15: load_json

# 需要导入模块: import gzip [as 别名]
# 或者: from gzip import open [as 别名]
def load_json(filename, to='auto'):
    '''
    load_json(filename) yields the object represented by the json file or stream object filename.
    
    The optional argument to may be set to None to indicate that the JSON data should be returned
    verbatim rather than parsed by neuropythy's denormalize system.
    '''
    from neuropythy.util import denormalize as denorm
    if pimms.is_str(filename):
        try:
            with gzip.open(filename, 'rt') as fl: dat = json.load(fl)
        except Exception:
            with open(filename, 'rt') as fl: dat = json.load(fl)
    else:
        dat = json.load(filename)
        filename = '<stream>'
    if to is None: return dat
    elif to == 'auto': return denorm(dat)
    else: raise ValueError('unrecognized to option: %s' % to) 
开发者ID:noahbenson,项目名称:neuropythy,代码行数:21,代码来源:core.py


注:本文中的gzip.open方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。