當前位置: 首頁>>代碼示例>>Python>>正文


Python gzip.open方法代碼示例

本文整理匯總了Python中gzip.open方法的典型用法代碼示例。如果您正苦於以下問題:Python gzip.open方法的具體用法?Python gzip.open怎麽用?Python gzip.open使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在gzip的用法示例。


在下文中一共展示了gzip.open方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: saveState

# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def saveState(dataHub):
    import pickle as pickle
    import gzip

    pickle.dump(dataHub, gzip.open(dataHub.args.save_state, "wb"))
    logging.warn("^"*20 + " saving state to pickle and exiting " + "^"*20) 
開發者ID:svviz,項目名稱:svviz,代碼行數:8,代碼來源:app.py

示例2: main

# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def main(_):
  """Runs `text_utils.simplify_nq_example` over all shards of a split.

  Prints simplified examples to a single gzipped file in the same directory
  as the input shards.
  """
  split = os.path.basename(FLAGS.data_dir)
  outpath = os.path.join(FLAGS.data_dir,
                         "simplified-nq-{}.jsonl.gz".format(split))
  with gzip.open(outpath, "wb") as fout:
    num_processed = 0
    start = time.time()
    for inpath in glob.glob(os.path.join(FLAGS.data_dir, "nq-*-??.jsonl.gz")):
      print("Processing {}".format(inpath))
      with gzip.open(inpath, "rb") as fin:
        for l in fin:
          utf8_in = l.decode("utf8", "strict")
          utf8_out = json.dumps(
              text_utils.simplify_nq_example(json.loads(utf8_in))) + u"\n"
          fout.write(utf8_out.encode("utf8"))
          num_processed += 1
          if not num_processed % 100:
            print("Processed {} examples in {}.".format(num_processed,
                                                        time.time() - start)) 
開發者ID:google-research-datasets,項目名稱:natural-questions,代碼行數:26,代碼來源:simplify_nq_data.py

示例3: loadW2V

# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def loadW2V(self,emb_path, type="bin"):
        print("Loading W2V data...")
        num_keys = 0
        if type=="textgz":
            # this seems faster than gensim non-binary load
            for line in gzip.open(emb_path):
                l = line.strip().split()
                st=l[0].lower()
                self.pre_emb[st]=np.asarray(l[1:])
            num_keys=len(self.pre_emb)
        if type=="text":
            # this seems faster than gensim non-binary load
            for line in open(emb_path):
                l = line.strip().split()
                st=l[0].lower()
                self.pre_emb[st]=np.asarray(l[1:])
            num_keys=len(self.pre_emb)
        else:
            self.pre_emb = Word2Vec.load_word2vec_format(emb_path,binary=True)
            self.pre_emb.init_sims(replace=True)
            num_keys=len(self.pre_emb.vocab)
        print("loaded word2vec len ", num_keys)
        gc.collect() 
開發者ID:dhwajraj,項目名稱:deep-siamese-text-similarity,代碼行數:25,代碼來源:input_helpers.py

示例4: getTsvData

# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def getTsvData(self, filepath):
        print("Loading training data from "+filepath)
        x1=[]
        x2=[]
        y=[]
        # positive samples from file
        for line in open(filepath):
            l=line.strip().split("\t")
            if len(l)<2:
                continue
            if random() > 0.5:
                x1.append(l[0].lower())
                x2.append(l[1].lower())
            else:
                x1.append(l[1].lower())
                x2.append(l[0].lower())
            y.append(int(l[2]))
        return np.asarray(x1),np.asarray(x2),np.asarray(y) 
開發者ID:dhwajraj,項目名稱:deep-siamese-text-similarity,代碼行數:20,代碼來源:input_helpers.py

示例5: dumpValidation

# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def dumpValidation(self,x1_text,x2_text,y,shuffled_index,dev_idx,i):
        print("dumping validation "+str(i))
        x1_shuffled=x1_text[shuffled_index]
        x2_shuffled=x2_text[shuffled_index]
        y_shuffled=y[shuffled_index]
        x1_dev=x1_shuffled[dev_idx:]
        x2_dev=x2_shuffled[dev_idx:]
        y_dev=y_shuffled[dev_idx:]
        del x1_shuffled
        del y_shuffled
        with open('validation.txt'+str(i),'w') as f:
            for text1,text2,label in zip(x1_dev,x2_dev,y_dev):
                f.write(str(label)+"\t"+text1+"\t"+text2+"\n")
            f.close()
        del x1_dev
        del y_dev
    
    # Data Preparatopn
    # ================================================== 
開發者ID:dhwajraj,項目名稱:deep-siamese-text-similarity,代碼行數:21,代碼來源:input_helpers.py

示例6: assemble_batch

# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def assemble_batch(story_fns, num_answer_words, format_spec):
    stories = []
    for sfn in story_fns:
        with gzip.open(sfn,'rb') as f:
            cvtd_story, _, _, _ = pickle.load(f)
        stories.append(cvtd_story)
    sents, graphs, queries, answers = zip(*stories)
    cvtd_sents = np.array(sents, np.int32)
    cvtd_queries = np.array(queries, np.int32)
    max_ans_len = max(len(a) for a in answers)
    cvtd_answers = np.stack([convert_answer(answer, num_answer_words, format_spec, max_ans_len) for answer in answers])
    num_new_nodes, new_node_strengths, new_node_ids, next_edges = zip(*graphs)
    num_new_nodes = np.stack(num_new_nodes)
    new_node_strengths = np.stack(new_node_strengths)
    new_node_ids = np.stack(new_node_ids)
    next_edges = np.stack(next_edges)
    return cvtd_sents, cvtd_queries, cvtd_answers, num_new_nodes, new_node_strengths, new_node_ids, next_edges 
開發者ID:hexahedria,項目名稱:gated-graph-transformer-network,代碼行數:19,代碼來源:ggtnn_train.py

示例7: create_mnist

# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def create_mnist(tfrecord_dir, mnist_dir):
    print('Loading MNIST from "%s"' % mnist_dir)
    import gzip
    with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file:
        images = np.frombuffer(file.read(), np.uint8, offset=16)
    with gzip.open(os.path.join(mnist_dir, 'train-labels-idx1-ubyte.gz'), 'rb') as file:
        labels = np.frombuffer(file.read(), np.uint8, offset=8)
    images = images.reshape(-1, 1, 28, 28)
    images = np.pad(images, [(0,0), (0,0), (2,2), (2,2)], 'constant', constant_values=0)
    assert images.shape == (60000, 1, 32, 32) and images.dtype == np.uint8
    assert labels.shape == (60000,) and labels.dtype == np.uint8
    assert np.min(images) == 0 and np.max(images) == 255
    assert np.min(labels) == 0 and np.max(labels) == 9
    onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32)
    onehot[np.arange(labels.size), labels] = 1.0
    
    with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr:
        order = tfr.choose_shuffled_order()
        for idx in range(order.size):
            tfr.add_image(images[order[idx]])
        tfr.add_labels(onehot[order])

#---------------------------------------------------------------------------- 
開發者ID:zalandoresearch,項目名稱:disentangling_conditional_gans,代碼行數:25,代碼來源:dataset_tool.py

示例8: create_mnistrgb

# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def create_mnistrgb(tfrecord_dir, mnist_dir, num_images=1000000, random_seed=123):
    print('Loading MNIST from "%s"' % mnist_dir)
    import gzip
    with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file:
        images = np.frombuffer(file.read(), np.uint8, offset=16)
    images = images.reshape(-1, 28, 28)
    images = np.pad(images, [(0,0), (2,2), (2,2)], 'constant', constant_values=0)
    assert images.shape == (60000, 32, 32) and images.dtype == np.uint8
    assert np.min(images) == 0 and np.max(images) == 255
    
    with TFRecordExporter(tfrecord_dir, num_images) as tfr:
        rnd = np.random.RandomState(random_seed)
        for idx in range(num_images):
            tfr.add_image(images[rnd.randint(images.shape[0], size=3)])

#---------------------------------------------------------------------------- 
開發者ID:zalandoresearch,項目名稱:disentangling_conditional_gans,代碼行數:18,代碼來源:dataset_tool.py

示例9: create_cifar100

# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def create_cifar100(tfrecord_dir, cifar100_dir):
    print('Loading CIFAR-100 from "%s"' % cifar100_dir)
    import pickle
    with open(os.path.join(cifar100_dir, 'train'), 'rb') as file:
        data = pickle.load(file, encoding='latin1')
    images = data['data'].reshape(-1, 3, 32, 32)
    labels = np.array(data['fine_labels'])
    assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8
    assert labels.shape == (50000,) and labels.dtype == np.int32
    assert np.min(images) == 0 and np.max(images) == 255
    assert np.min(labels) == 0 and np.max(labels) == 99
    onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32)
    onehot[np.arange(labels.size), labels] = 1.0

    with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr:
        order = tfr.choose_shuffled_order()
        for idx in range(order.size):
            tfr.add_image(images[order[idx]])
        tfr.add_labels(onehot[order])

#---------------------------------------------------------------------------- 
開發者ID:zalandoresearch,項目名稱:disentangling_conditional_gans,代碼行數:23,代碼來源:dataset_tool.py

示例10: create_celeba

# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def create_celeba(tfrecord_dir, celeba_dir, cx=89, cy=121):
    print('Loading CelebA from "%s"' % celeba_dir)
    glob_pattern = os.path.join(celeba_dir, 'img_align_celeba_png', '*.png')
    image_filenames = sorted(glob.glob(glob_pattern))
    expected_images = 202599
    if len(image_filenames) != expected_images:
        error('Expected to find %d images' % expected_images)
    
    with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr:
        order = tfr.choose_shuffled_order()
        for idx in range(order.size):
            img = np.asarray(PIL.Image.open(image_filenames[order[idx]]))
            assert img.shape == (218, 178, 3)
            img = img[cy - 64 : cy + 64, cx - 64 : cx + 64]
            img = img.transpose(2, 0, 1) # HWC => CHW
            tfr.add_image(img)

#---------------------------------------------------------------------------- 
開發者ID:zalandoresearch,項目名稱:disentangling_conditional_gans,代碼行數:20,代碼來源:dataset_tool.py

示例11: create_gsa_mapping

# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def create_gsa_mapping(path, metadata, sample_name, shuffle):
    """
    Creates the binning gold standard/gsa mapping
    """
    to_genome = name_to_genome(metadata)
    gsa_path = os.path.join(path, "anonymous_gsa.fasta") #
    count = 0
    if not os.path.exists(gsa_path):
        gsa_path = os.path.join(path, "anonymous_gsa.fasta.gz") # if zipped
        with gzip.open(gsa_path,'r') as gsa:
            for line in gsa:
                if line.startswith('>'):
                    count += 1
        with gzip.open(gsa_path,'r') as gsa:
            gsa_temp = shuffle_anonymize(gsa, path, to_genome, metadata, sample_name, count, shuffle)
    else:
        with open(gsa_path,'r') as gsa:
            for line in gsa:
                if line.startswith('>'):
                    count += 1
        with open(gsa_path,'r') as gsa:
            gsa_temp = shuffle_anonymize(gsa, path, to_genome, metadata, sample_name, count, shuffle)
    os.rename(gsa_temp, gsa_path) 
開發者ID:CAMI-challenge,項目名稱:CAMISIM,代碼行數:25,代碼來源:create_joint_gs.py

示例12: read_genomes_list

# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def read_genomes_list(genomes_path, additional_file = None):
    genomes_map = {}
    total_genomes = 0
    if additional_file is not None:
        with open(additional_file,'r') as add:
            for line in add:
                ncbi_id, sci_name, path, novelty = line.strip().split('\t')
                if ncbi_id in genomes_map:
                    genomes_map[ncbi_id][1].append(path)
                else:
                    genomes_map[ncbi_id] = (sci_name, [path], novelty) # this might not be a http path
                total_genomes += 1
    with open(genomes_path,'r') as genomes:
        for line in genomes:
            ncbi_id, sci_name, ftp = line.strip().split('\t')
            http = ftp.replace("ftp://","http://") # not using ftp address but http (proxies)
            if ncbi_id in genomes_map:
                genomes_map[ncbi_id][1].append(http)
            else:
                genomes_map[ncbi_id] = (sci_name, [http], 'known_strain') # sci_name is always the same for same taxid (?)
            total_genomes += 1
    return genomes_map, total_genomes 
開發者ID:CAMI-challenge,項目名稱:CAMISIM,代碼行數:24,代碼來源:get_genomes.py

示例13: download_genome

# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def download_genome(genome, out_path):
    genome_path = os.path.join(out_path,"genomes")
    out_name = genome.rstrip().split('/')[-1]
    http_address = os.path.join(genome, out_name + "_genomic.fna.gz")
    opened = urllib2.urlopen(http_address)
    out = os.path.join(genome_path, out_name + ".fa")
    tmp_out = os.path.join(genome_path, out_name + "tmp.fa")
    out_gz = out + ".gz"
    with open(out_gz,'wb') as outF:
        outF.write(opened.read())
    gf = gzip.open(out_gz)
    new_out = open(tmp_out,'wb')
    new_out.write(gf.read())
    gf.close()
    os.remove(out_gz)
    new_out.close()
    split_by_N(tmp_out, out)
    return out 
開發者ID:CAMI-challenge,項目名稱:CAMISIM,代碼行數:20,代碼來源:get_genomes.py

示例14: parse_data

# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def parse_data(path, dataset, flatten):
    if dataset != 'train' and dataset != 't10k':
        raise NameError('dataset must be train or t10k')

    label_file = os.path.join(path, dataset + '-labels-idx1-ubyte')
    with open(label_file, 'rb') as file:
        _, num = struct.unpack(">II", file.read(8))
        labels = np.fromfile(file, dtype=np.int8)  # int8
        new_labels = np.zeros((num, 10))
        new_labels[np.arange(num), labels] = 1

    img_file = os.path.join(path, dataset + '-images-idx3-ubyte')
    with open(img_file, 'rb') as file:
        _, num, rows, cols = struct.unpack(">IIII", file.read(16))
        imgs = np.fromfile(file, dtype=np.uint8).reshape(num, rows, cols)  # uint8
        imgs = imgs.astype(np.float32) / 255.0
        if flatten:
            imgs = imgs.reshape([num, -1])

    return imgs, new_labels 
開發者ID:wdxtub,項目名稱:deep-learning-note,代碼行數:22,代碼來源:utils.py

示例15: load_json

# 需要導入模塊: import gzip [as 別名]
# 或者: from gzip import open [as 別名]
def load_json(filename, to='auto'):
    '''
    load_json(filename) yields the object represented by the json file or stream object filename.
    
    The optional argument to may be set to None to indicate that the JSON data should be returned
    verbatim rather than parsed by neuropythy's denormalize system.
    '''
    from neuropythy.util import denormalize as denorm
    if pimms.is_str(filename):
        try:
            with gzip.open(filename, 'rt') as fl: dat = json.load(fl)
        except Exception:
            with open(filename, 'rt') as fl: dat = json.load(fl)
    else:
        dat = json.load(filename)
        filename = '<stream>'
    if to is None: return dat
    elif to == 'auto': return denorm(dat)
    else: raise ValueError('unrecognized to option: %s' % to) 
開發者ID:noahbenson,項目名稱:neuropythy,代碼行數:21,代碼來源:core.py


注:本文中的gzip.open方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。