当前位置: 首页>>代码示例>>Python>>正文


Python common.timesofar函数代码示例

本文整理汇总了Python中utils.common.timesofar函数的典型用法代码示例。如果您正苦于以下问题:Python timesofar函数的具体用法?Python timesofar怎么用?Python timesofar使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了timesofar函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_genome_in_bit

def get_genome_in_bit(chr_fa_folder):
    ''' encode each chromosome fasta sequence into a bitarray,
        and store them in a dictionary with chr numbers as keys
        chr_fa_folder is the folder to put all gzipped fasta files:

        fasta files can be downloaded from NCBI FTP site:

        ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh37.p13/Primary_Assembly/assembled_chromosomes/FASTA/
        chr<i>.fa.gz  (e.g. chr1.fa.gz)

    '''
    chr_bit_d = {}
    chr_range = [str(i) for i in range(1, 23)] + ['X', 'Y', 'MT']
    t0 = time.time()
    for i in chr_range:
        t1 = time.time()
        #file_name = 'hs_ref_GRCh37.p5_chr{}.fa.gz'.format(i)
        file_name = 'chr{}.fa.gz'.format(i)
        print("Loading {}...".format(file_name), end='')
        file_name = os.path.join(chr_fa_folder, file_name)
        with open_anyfile(file_name) as seq_f:
            seq_f.readline()   # skip header
            seq_bit = bitarray()
            for line in seq_f:
                line = line.rstrip('\n')
                line_bit = nuc_to_bit(line)
                seq_bit += line_bit
            chr_bit_d.update({i: seq_bit})
        print("done.[{}]".format(timesofar(t1)))
    print('='*20)
    print("Finished. [{}]".format(timesofar(t0)))

    return chr_bit_d
开发者ID:bainscou,项目名称:myvariant.info,代码行数:33,代码来源:validate.py

示例2: redo_parse_gbff

def redo_parse_gbff(path):
    '''call this function manually to re-start the parsing step and set src_dump.
       This is used when main() is broken at parsing step, then parsing need to be re-started
       after the fix.
    '''
    #mark the download starts
    src_dump = get_src_dump()

    t0 = time.time()
    t_download = timesofar(t0)
    t1 = time.time()
    #mark parsing starts
    src_dump.update({'_id': 'entrez'}, {'$set': {'status': 'parsing'}})
    parse_gbff(path)
    t_parsing = timesofar(t1)
    t_total = timesofar(t0)

    #mark the download finished successfully
    _updates = {
        'status': 'success',
        'time': {
            'download': t_download,
            'parsing': t_parsing,
            'total': t_total
        },
        'pending_to_upload': True    # a flag to trigger data uploading
    }

    src_dump.update({'_id': 'entrez'}, {'$set': _updates})
开发者ID:putmantime,项目名称:mygeneinfo_gh,代码行数:29,代码来源:dl_entrez.py

示例3: doc_feeder

    def doc_feeder(self, index_type=None, index_name=None, step=10000, verbose=True, query=None, scroll='10m', **kwargs):
        conn = self.conn
        index_name = index_name or self.ES_INDEX_NAME
        doc_type = index_type or self.ES_INDEX_TYPE

        n = self.count(query=query)['count']
        cnt = 0
        t0 = time.time()
        if verbose:
            print('\ttotal docs: {}'.format(n))

        _kwargs = kwargs.copy()
        _kwargs.update(dict(size=step, index=index_name, doc_type=doc_type))
        res = helpers.scan(conn, query=query, scroll=scroll, **_kwargs)
        t1 = time.time()
        for doc in res:
            if verbose and cnt % step == 0:
                if cnt != 0:
                    print('done.[%.1f%%,%s]' % (cnt*100./n, timesofar(t1)))
                print('\t{}-{}...'.format(cnt+1, min(cnt+step, n)), end='')
                t1 = time.time()
            yield doc
            cnt += 1
        if verbose:
            print('done.[%.1f%%,%s]' % (cnt*100./n, timesofar(t1)))
            print("Finished! [{}]".format(timesofar(t0)))
开发者ID:putmantime,项目名称:mygeneinfo_gh,代码行数:26,代码来源:es.py

示例4: load_contig

def load_contig(contig):
    '''save cadd contig into mongodb collection.
       should be an iterable.
    '''
    # if CADD_INPUT == "exome":
    # CADD_INPUT = exome
    tabix = pysam.Tabixfile(whole_genome)
    src_db = get_src_db()
    target_coll = src_db["cadd"]
    t0 = time.time()
    cnt = 0
    docs = (doc for doc in fetch_generator(tabix, contig))
    doc_list = []
    for doc in docs:
        doc_list.append(doc)
        cnt += 1
        if len(doc_list) == 100:
            target_coll.insert(doc_list, manipulate=False, check_keys=False, w=0)
            doc_list = []
        if cnt % 100000 == 0:
            print(cnt, timesofar(t0))
    if doc_list:
        target_coll.insert(doc_list, manipulate=False, check_keys=False, w=0)
    print("successfully loaded cadd chromosome %s into mongodb" % contig)
    print("total docs: {}; total time: {}".format(cnt, timesofar(t0)))
开发者ID:mjuchler,项目名称:myvariant.info,代码行数:25,代码来源:cadd_parser.py

示例5: doc_feeder

def doc_feeder(collection, step=1000, s=None, e=None, inbatch=False, query=None, batch_callback=None, fields=None):
    '''A iterator for returning docs in a collection, with batch query.
       additional filter query can be passed via "query", e.g.,
       doc_feeder(collection, query={'taxid': {'$in': [9606, 10090, 10116]}})
       batch_callback is a callback function as fn(cnt, t), called after every batch
       fields is optional parameter passed to find to restrict fields to return.
    '''
    src = get_src_db()
    if type(collection) == str:
        cur = src[collection].find()
    else:
        cur = collection.find()
    n = cur.count()
    s = s or 0
    e = e or n
    print('Retrieving {} documents from database "{}".'.format(n, collection))
    t0 = time.time()
    if inbatch:
        doc_li = []
    cnt = 0
    t1 = time.time()
    try:
        if s:
            cur.skip(s)
            cnt = s
            print("Skipping {} documents.".format(s))
        if e:
            cur.limit(e - (s or 0))
        cur.batch_size(step)
        print("Processing {}-{} documents...".format(cnt + 1, min(cnt + step, e)), end='')
        for doc in cur:
            if inbatch:
                doc_li.append(doc)
            else:
                yield doc
            cnt += 1
            if cnt % step == 0:
                if inbatch:
                    yield doc_li
                    doc_li = []
                print('Done.[%.1f%%,%s]' % (cnt * 100. / n, timesofar(t1)))
                if batch_callback:
                    batch_callback(cnt, time.time()-t1)
                if cnt < e:
                    t1 = time.time()
                    print("Processing {}-{} documents...".format(cnt + 1, min(cnt + step, e)), end='')
        if inbatch and doc_li:
            #Important: need to yield the last batch here
            yield doc_li

        #print 'Done.[%s]' % timesofar(t1)
        print('Done.[%.1f%%,%s]' % (cnt * 100. / n, timesofar(t1)))
        print("=" * 20)
        print('Finished.[total time: {}]'.format(timesofar(t0)))
    finally:
        cur.close()
开发者ID:bainscou,项目名称:myvariant.info,代码行数:56,代码来源:mongo.py

示例6: two_docs_iterator

def two_docs_iterator(b1, b2, id_list, step=10000):
    t0 = time.time()
    n = len(id_list)
    for i in range(0, n, step):
        t1 = time.time()
        print "Processing %d-%d documents..." % (i + 1, min(i + step, n)),
        _ids = id_list[i:i+step]
        iter1 = b1.mget_from_ids(_ids, asiter=True)
        iter2 = b2.mget_from_ids(_ids, asiter=True)
        for doc1, doc2 in zip(iter1, iter2):
            yield doc1, doc2
        print 'Done.[%.1f%%,%s]' % (i*100./n, timesofar(t1))
    print "="*20
    print 'Finished.[total time: %s]' % timesofar(t0)
开发者ID:gkarthik,项目名称:mygene.hub,代码行数:14,代码来源:diff.py

示例7: main

    def main(self, index, collection, diff_filepath, validate=False, wait=60):
        self._index = index
        self._esi._index = index
        diff = loadobj(diff_filepath)
        source_collection = diff['source']
        add_list = self.add(source_collection, diff['add'])
        delete_list = self.delete(collection, diff['delete'])
        update_list = self.update(diff['update'])
        t00 = time()
        print('Adding new {} docs...'.format(len(diff['add'])))
        t0 = time()
        bulk(self._es, add_list)
        print("Done. [{}]".format(timesofar(t0)))
        print('Deleting {} docs'.format(len(diff['delete'])))
        t0 = time()
        bulk(self._es, delete_list)
        print("Done. [{}]".format(timesofar(t0)))
        print('Updating {} docs'.format(len(diff['update'])))
        t0 = time()
        bulk(self._es, update_list)
        print("Done. [{}]".format(timesofar(t0)))
        print("="*20)
        print("Finished! [{}]".format(timesofar(t00)))
        if validate:
	    print('Waiting {}s to let ES to finish...'.format(wait), end="")
            sleep(wait)
            print("Done.")
            print("Validating...")
            t0 = time()
            q = {
                "query": {
                    "constant_score": {
                        "filter": {
                            "exists": {
                                "field": collection
                            }
                        }
                    }
                }
            }
            data = self._esi.doc_feeder(query=q, _source=collection)
            temp_collection = collection + '_temp_' + get_random_string()
            self._src[temp_collection].drop()
            load_source(temp_collection, src_data=data)
            c1 = get_backend(source_collection, 'mongodb')
            c2 = get_backend(temp_collection, 'mongodb')
            diff_result = diff_collections(c1, c2, use_parallel=False)
            self._src[temp_collection].drop()
            print("Done. [{}]".format(t0))
            return diff_result
开发者ID:dmcgoldrick,项目名称:myvariant.info,代码行数:50,代码来源:es_sync.py

示例8: apply_changes

    def apply_changes(self, changes, verify=True, noconfirm=False):
        if verify:
            self.pre_verify_changes(changes)

        if not (noconfirm or ask('\nContinue to apply changes?') == 'Y'):
            print("Aborted.")
            return -1
        #src = self.get_source_collection(changes)
        step = self.step
        _db = get_target_db()
        source_col = _db[changes['source']]
        src = GeneDocMongoDBBackend(source_col)
        target = GeneDocESBackend(self)
        _timestamp = changes['timestamp']

        def _add_docs(ids):
            i = 0
            for _ids in iter_n(ids, step):
                t1 = time.time()
                _doc_li = src.mget_from_ids(_ids)
                for _doc in _doc_li:
                    _doc['_timestamp'] = _timestamp
                    i += 1
                target.insert(_doc_li)
                print('\t{}\t{}'.format(i, timesofar(t1)))

        t0 = time.time()
        if changes['add']:
            print("Adding {} new docs...".format(len(changes['add'])))
            t00 = time.time()
            _add_docs(changes['add'])
            print("done. [{}]".format(timesofar(t00)))
        if changes['delete']:
            print("Deleting {} discontinued docs...".format(len(changes['delete'])), end='')
            t00 = time.time()
            target.remove_from_ids(changes['delete'], step=step)
            print("done. [{}]".format(timesofar(t00)))
        if changes['update']:
            print("Updating {} existing docs...".format(len(changes['update'])))
            t00 = time.time()
            ids = [x['_id'] for x in changes['update']]
            _add_docs(ids)
            print("done. [{}]".format(timesofar(t00)))

        target.finalize()

        print("\n")
        print("Finished.", timesofar(t0))
开发者ID:gkarthik,项目名称:mygene.hub,代码行数:48,代码来源:es_sync.py

示例9: run_jobs_on_ipythoncluster

def run_jobs_on_ipythoncluster(worker, task_list, shutdown_ipengines_after_done=False):

    t0 = time.time()
    rc = Client(CLUSTER_CLIENT_JSON)
    lview = rc.load_balanced_view()
    print "\t# nodes in use: {}".format(len(lview.targets or rc.ids))
    lview.block = False

    print "\t# of tasks: {}".format(len(task_list))
    print "\tsubmitting...",
    job = lview.map_async(worker, task_list)
    print "done."
    try:
        job.wait_interactive()
    except KeyboardInterrupt:
        #handle "Ctrl-C"
        if ask("\nAbort all submitted jobs?") == 'Y':
            lview.abort()
            print "Aborted, all submitted jobs are cancelled."
        else:
            print "Aborted, but your jobs are still running on the cluster."
        return

    if len(job.result) != len(task_list):
        print "WARNING:\t# of results returned ({}) != # of tasks ({}).".format(len(job.result), len(task_list))
    print "\ttotal time: {}".format(timesofar(t0))

    if shutdown_ipengines_after_done:
        print "\tshuting down all ipengine nodes...",
        lview.shutdown()
        print 'Done.'
    return job.result
开发者ID:gkarthik,项目名称:mygene.hub,代码行数:32,代码来源:parallel.py

示例10: merge

    def merge(self, step=100000, restart_at=0):
        t0 = time.time()
        self.validate_src_collections()
        self.log_building_start()
        try:
            if self.using_ipython_cluster:
                self._merge_ipython_cluster(step=step)
            else:
                self._merge_local(step=step, restart_at=restart_at)

            if self.target.name == 'es':
                print "Updating metadata...",
                self.update_mapping_meta()

            t1 = round(time.time() - t0, 0)
            t = timesofar(t0)
            self.log_src_build({'status': 'success',
                                'time': t,
                                'time_in_s': t1,
                                'timestamp': datetime.now()})

        finally:
            #do a simple validation here
            if getattr(self, '_stats', None):
                print "Validating..."
                target_cnt = self.target.count()
                if target_cnt == self._stats['total_genes']:
                    print "OK [total count={}]".format(target_cnt)
                else:
                    print "Warning: total count of gene documents does not match [{}, should be {}]".format(target_cnt, self._stats['total_genes'])

            if self.merge_logging:
                sys.stdout.close()
开发者ID:gkarthik,项目名称:mygene.hub,代码行数:33,代码来源:builder.py

示例11: load_x

def load_x(idx, fieldname, cvt_fn=None):
    print('DATA_FOLDER: ' + DATA_FOLDER)
    DATAFILE = os.path.join(DATA_FOLDER, 'idmapping_selected.tab.gz')
    load_start(DATAFILE)
    t0 = time.time()
    xli = []
    for ld in tabfile_feeder(DATAFILE, header=1):
        ld = listitems(ld, *(2,19,idx))    # GeneID Ensembl(Gene) target_value
        for value in dupline_seperator(dupline=ld,
                                       dup_sep='; '):
            xli.append(value)

    ensembl2geneid = list2dict(list_nondup([(x[1], x[0]) for x in xli if x[0]!='' and x[1]!='']), 0, alwayslist=True)
    xli2 = []
    for entrez_id, ensembl_id, x_value in xli:
        if x_value:
            if cvt_fn:
                x_value = cvt_fn(x_value)
            if entrez_id:
                xli2.append((entrez_id, x_value))
            elif ensembl_id:
                entrez_id = ensembl2geneid.get(ensembl_id, None)
                if entrez_id:
                    for _eid in entrez_id:
                        xli2.append((_eid, x_value))
                else:
                    xli2.append((ensembl_id, x_value))

    gene2x = list2dict(list_nondup(xli2), 0)
    fn = lambda value: {fieldname: sorted(value) if type(value) is types.ListType else value}
    gene2x = value_convert(gene2x, fn, traverse_list=False)
    load_done('[%d, %s]' % (len(gene2x), timesofar(t0)))

    return gene2x
开发者ID:gkarthik,项目名称:mygene.hub,代码行数:34,代码来源:uniprot_base.py

示例12: run2

def run2():
    from databuild.esbuilder import ESIndexerBase
    esb = ESIndexerBase()
    doc_d = build(sources)
    t0 = time.time()
    esb.build_index(doc_d)
    print 'Done[%s]' % timesofar(t0)
开发者ID:gkarthik,项目名称:mygene.hub,代码行数:7,代码来源:__init__.py

示例13: update_index

def update_index(changes, sync_src, sync_target, noconfirm=False):
    # changes['_add'] = changes['delete']
    # changes['_delete'] = changes['add']
    # changes['delete'] = changes['_delete']
    # changes['add'] = changes['_add']
    # del changes['_add']
    # del changes['_delete']

    print "\t{}\trecords will be added.".format(len(changes['add']))
    print "\t{}\trecords will be deleted.".format(len(changes['delete']))
    print "\t{}\trecords will be updated.".format(len(changes['update']))

    print
    print '\tsync_src:\t{:<45}{}'.format(sync_src.target_collection.name,
                                             sync_src.name)
    print '\tsync_target\t{:<45}{}'.format(sync_target.target_esidxer.ES_INDEX_NAME,
                                               sync_target.name)

    if noconfirm or ask("Continue?")=='Y':
        t00 = time.time()
        es_idxer = sync_target.target_esidxer

        if len(changes['add']) > 0:
            print "Adding {} new records...".format(len(changes['add']))
            t0 = time.time()
            _q = {'_id': {'$in': changes['add']}}
            for docs in doc_feeder(sync_src.target_collection, step=1000, inbatch=True, query=_q):
                es_idxer.add_docs(docs)
            print "Done. [{}]".format(timesofar(t0))

        if len(changes['delete']) > 0:
            print "Deleting {} old records...".format(len(changes['delete']))
            t0 = time.time()
            es_idxer.delete_docs(changes['delete'])
            print "Done. [{}]".format(timesofar(t0))

        if len(changes['update']) > 0:
            print "Updating {} existing records...".format(len(changes['update']))
            t0 = time.time()
            ids = [d['_id'] for d in changes['update']]
            _q = {'_id': {'$in': ids}}
            for docs in doc_feeder(sync_src.target_collection, step=1000, inbatch=True, query=_q):
                es_idxer.add_docs(docs)
            print "Done. [{}]".format(timesofar(t0))
        print '='*20
        print 'Finished. [{}]'.format(timesofar(t00))
开发者ID:gkarthik,项目名称:mygene.hub,代码行数:46,代码来源:indexer.py

示例14: main

def main():
    no_confirm = True   # set it to True for running this script automatically without intervention.

    if not os.path.exists(DATA_FOLDER):
        os.makedirs(DATA_FOLDER)
    else:
        if not (no_confirm or len(os.listdir(DATA_FOLDER)) == 0 or ask('DATA_FOLDER (%s) is not empty. Continue?' % DATA_FOLDER) == 'Y'):
            sys.exit()

    log_f, logfile = safewfile(os.path.join(DATA_FOLDER, 'entrez_dump.log'), prompt=(not no_confirm), default='O')
    sys.stdout = LogPrint(log_f, timestamp=True)
    sys.stderr = sys.stdout

    #mark the download starts
    src_dump = get_src_dump()
    doc = {'_id': 'entrez',
           'timestamp': timestamp,
           'data_folder': DATA_FOLDER,
           'logfile': logfile,
           'status': 'downloading'}
    src_dump.save(doc)
    t0 = time.time()
    try:
        download(DATA_FOLDER, no_confirm=no_confirm)
        t_download = timesofar(t0)
        t1 = time.time()
        #mark parsing starts
        src_dump.update({'_id': 'entrez'}, {'$set': {'status': 'parsing'}})
        parse_gbff(DATA_FOLDER)
        t_parsing = timesofar(t1)
        t_total = timesofar(t0)
    finally:
        sys.stdout.close()

    #mark the download finished successfully
    _updates = {
        'status': 'success',
        'time': {
            'download': t_download,
            'parsing': t_parsing,
            'total': t_total
        },
        'pending_to_upload': True    # a flag to trigger data uploading
    }

    src_dump.update({'_id': 'entrez'}, {'$set': _updates})
开发者ID:putmantime,项目名称:mygeneinfo_gh,代码行数:46,代码来源:dl_entrez.py

示例15: load

    def load(self, genedoc_d=None, update_data=True, update_master=True, test=False, step=10000):
        if not self.temp_collection:
            self.make_temp_collection()

        self.temp_collection.drop()       # drop all existing records just in case.

        if update_data:
            genedoc_d = genedoc_d or self.load_genedoc()

            print("Uploading to the DB...", end='')
            t0 = time.time()
            # for doc in self.doc_iterator(genedoc_d, batch=False):
            #     if not test:
            #         doc.save()
            for doc_li in self.doc_iterator(genedoc_d, batch=True, step=step):
                if not test:
                    self.temp_collection.insert(doc_li, manipulate=False, check_keys=False)
            print('Done[%s]' % timesofar(t0))
            self.switch_collection()

            if getattr(self, 'ENTREZ_GENEDOC_ROOT', False):
                print('Uploading "geneid_d" to GridFS...', end='')
                t0 = time.time()
                geneid_d = self.get_geneid_d()
                dump2gridfs(geneid_d, self.__collection__ + '__geneid_d.pyobj', self.db)
                print('Done[%s]' % timesofar(t0))
            if getattr(self, 'ENSEMBL_GENEDOC_ROOT', False):
                print('Uploading "mapping2entrezgene" to GridFS...', end='')
                t0 = time.time()
                x2entrezgene_list = self.get_mapping_to_entrez()
                dump2gridfs(x2entrezgene_list, self.__collection__ + '__2entrezgene_list.pyobj', self.db)
                print('Done[%s]' % timesofar(t0))

        if update_master:
            # update src_master collection
            if not test:
                _doc = {"_id": unicode(self.__collection__),
                        "name": unicode(self.__collection__),
                        "timestamp": datetime.datetime.now()}
                for attr in ['ENTREZ_GENEDOC_ROOT', 'ENSEMBL_GENEDOC_ROOT', 'id_type']:
                    if hasattr(self, attr):
                        _doc[attr] = getattr(self, attr)
                if hasattr(self, 'get_mapping'):
                    _doc['mapping'] = getattr(self, 'get_mapping')()

                conn.GeneDocSourceMaster(_doc).save()
开发者ID:putmantime,项目名称:mygeneinfo_gh,代码行数:46,代码来源:__init__.py


注:本文中的utils.common.timesofar函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。