当前位置: 首页>>代码示例>>Python>>正文


Python utils.grouper函数代码示例

本文整理汇总了Python中utils.grouper函数的典型用法代码示例。如果您正苦于以下问题:Python grouper函数的具体用法?Python grouper怎么用?Python grouper使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了grouper函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: compute_descriptors

def compute_descriptors(infile, descriptor_types):
    """Reads low-level descriptors from DenseTracks."""

    LEN_LINE = 436

    POS_IDXS = [1, 2, 0]        # Position coordinates (X, Y, T).
    NORM_POS_IDXS = [7, 8, 9]   # Normalized position coordinates (X, Y, T).

    dense_tracks = subprocess.Popen(
        [DENSE_TRACK, infile],
        stdout=subprocess.PIPE)

    for lines in grouper(dense_tracks.stdout, NR_DESCRIPTORS):
        all_descs = np.vstack([
            map(float, line.split())
            for line in lines
            if line is not None]
        ).astype(np.float32)

        assert all_descs.shape[0] <= NR_DESCRIPTORS
        assert all_descs.shape[1] == LEN_LINE

        positions = all_descs[:, POS_IDXS]
        normalized_positions = all_descs[:, NORM_POS_IDXS]
        descriptors = {
            desc_type: all_descs[:, DESC_IDXS[desc_type]]
            for desc_type in descriptor_types}

        yield positions, normalized_positions, descriptors
开发者ID:martin-xavier,项目名称:medpackage,代码行数:29,代码来源:densetrack_to_fisher_shot_errorprotect.py

示例2: main

def main():
    logger = configure_logging('parse_serverstatus')
    client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database)
    with open(args.input_file, 'r') as f:
        for line_number, chunk in enumerate(grouper(f, args.batch_size)):
            # print(line_number)
            json_points = []
            for line in chunk:
                # zip_longest will backfill any missing values with None, so we need to handle this, otherwise we'll miss the last batch
                if line:
                    try:
                        server_status_json = json.loads(line)
                        # print((line_number + 0) * _BATCH_SIZE)
                        # print((line_number + 1) * _BATCH_SIZE)
                        common_metric_data = get_metrics("serverstatus", server_status_json, common_metrics, line_number)
                        json_points.append(create_point(*common_metric_data))
                        wiredtiger_metric_data = get_metrics("serverstatus_wiredtiger", server_status_json, wiredtiger_metrics, line_number)
                        json_points.append(create_point(*wiredtiger_metric_data))
                        # for metric_data in get_metrics(server_status_json, common_metrics, line_number):
                        #     import ipdb; ipdb.set_trace()
                        #     print(json_points)
                        #     json_points.append(create_point(*metric_data))
                        # # for metric in get_metrics(server_status_json, wiredtiger_metrics, line_number):
                        #     json_points.append(create_point(*metric))
                        # for metric in get_metrics(server_status_json, mmapv1_metrics, line_number):
                        #     json_points.append(create_point(*metric))
                    except ValueError:
                        logger.error("Line {} does not appear to be valid JSON - \"{}\"".format(line_number, line.strip()))
            write_points(logger, client, json_points, line_number)
开发者ID:jimoleary,项目名称:mongo-insight,代码行数:29,代码来源:parse_serverstatus.py

示例3: main

def main(args):
    global DEBUG
    if len(args) == 1:
        # no args - repl
        while True:
            print 'que?>',
            try:
                print google_it(raw_input())
            except EOFError:
                break
            except:
                import traceback
                traceback.print_exc()
    else:
        # test mode
        DEBUG = False
        print 'Loading testfile...'
        tests = filter(bool, open(args[1]).read().split('\n'))

        print len(tests), 'tests'
        for clue, answer in utils.grouper(2, tests):
            clue = clue.split('~!clue')[1]
            answer = answer.split("~!answer")[1]
            try:
                print '----------------------------------------------------------------'
                print 'clue:', clue
                print 'correct:', answer
                print 'eubank:', google_it(clue)
            except KeyboardInterrupt:
                sys.exit(0)
            except:
                import traceback
                traceback.print_exc()
开发者ID:andrewgjohnson-forks,项目名称:Eubank,代码行数:33,代码来源:jeopardy.py

示例4: main

def main():
    description = 'Split a FASTA file into multiple subfiles.'
    parser = ArgumentParser(description=description,
                            parents=[get_default_argument_parser()])
    parser.add_argument('-f', '--in-format',
                        default=_DEFAULT_FMT,
                        help="A biopython file format string.")
    parser.add_argument('-n', '--num-files', type=int,
                        default=_DEFAULT_N,
                        help=("The number of splits. "
                              "DEFAULT=%d") % _DEFAULT_N)
    parser.add_argument('in_path', nargs='?', default=None,
                        help=("The path of the file to be read in. "
                              "If no argument given, reads from STDIN."))
    parser.add_argument('out_pattern', default=None,
                        help=("Output file names format string. "
                              "Must contain one '%%d' for the file number."))
    args = parser.parse_args()

    if args.in_path is None:
        record_parser = SeqIO.parse(sys.stdin, args.in_format)
    else:
        record_parser = SeqIO.parse(args.in_path, args.in_format)

    write_multithread(grouper(record_parser, 100),
                      lambda recs, handle:
                          SeqIO.write(recs, handle, args.in_format),
                      args.out_pattern, n=args.num_files)
开发者ID:bsmith89,项目名称:rrnum,代码行数:28,代码来源:split_seqs.py

示例5: train

    def train(self, sentences, total_words=None, word_count=0, chunksize=100):
        """
        Update the model's neural weights from a sequence of sentences (can be a once-only generator stream).
        Each sentence must be a list of utf8 strings.

        """
        logger.info("training model on %i vocabulary and %i features" % (len(self.vocab), self.layer1_size))
        if not self.vocab:
            raise RuntimeError("you must first build vocabulary before training the model")

        start, next_report = time.time(), 1.0
        if not total_words:
            total_words = sum(v.count for v in self.vocab.itervalues())
        # convert input string lists to Vocab objects (or None for OOV words)
        no_oov = ([self.vocab.get(word, None) for word in sentence] for sentence in sentences)
        # run in chunks of e.g. 100 sentences (= 1 job) 
        for job in utils.grouper(no_oov, chunksize):
            # update the learning rate before every job
            alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * word_count / total_words))
            # how many words did we train on? out-of-vocabulary (unknown) words do not count
            job_words = sum(train_sentences(self, sentence, alpha) for sentence in job)
            word_count += job_words
            # report progress
            elapsed = time.time() - start
            if elapsed >= next_report:
                logger.info("PROGRESS: at %.2f%% words, alpha %.05f, %.0f words/s" %
                    (100.0 * word_count / total_words, alpha, word_count / elapsed if elapsed else 0.0))
                next_report = elapsed + 1.0  # don't flood the log, wait at least a second between progress reports
        elapsed = time.time() - start
        logger.info("training on %i words took %.1fs, %.0f words/s" %
            (word_count, elapsed, word_count / elapsed if elapsed else 0.0))
        return word_count
开发者ID:nudles,项目名称:word2vec,代码行数:32,代码来源:word2vec.py

示例6: __iter__

 def __iter__(self):
     if self.chunksize:
         for chunk in utils.grouper(self.corpus, self.chunksize):
             for transformed in self.obj.__getitem__(chunk, chunksize=None):
                 yield transformed
     else:
         for doc in self.corpus:
             yield self.obj[doc]
开发者ID:Anikacyp,项目名称:gensim,代码行数:8,代码来源:interfaces.py

示例7: train

    def train(self, sentences, total_words=None, word_count=0, chunksize=100):
        """
        Update the model's neural weights from a sequence of sentences (can be a once-only generator stream).
        Each sentence must be a list of utf8 strings.

        """
        logger.info("training model with %i workers on %i vocabulary and %i features" % (self.workers, len(self.vocab), self.layer1_size))

        if not self.vocab:
            raise RuntimeError("you must first build vocabulary before training the model")

        start, next_report = time.time(), [1.0]
        word_count, total_words = [word_count], total_words or sum(v.count for v in self.vocab.itervalues())
        jobs = Queue(maxsize=2 * self.workers)  # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :(
        lock = threading.Lock()  # for shared state (=number of words trained so far, log reports...)

        def worker_train():
            """Train the model, lifting lists of sentences from the jobs queue."""
            work = matutils.zeros_aligned(self.layer1_size, dtype=REAL)  # each thread must have its own work memory

            while True:
                job = jobs.get()
                if job is None:  # data finished, exit
                    break
                # update the learning rate before every job
                alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * word_count[0] / total_words))
                # how many words did we train on? out-of-vocabulary (unknown) words do not count
                job_words = sum(train_sentence(self, sentence, alpha, work) for sentence in job)
                with lock:
                    word_count[0] += job_words
                    elapsed = time.time() - start
                    if elapsed >= next_report[0]:
                        logger.info("PROGRESS: at %.2f%% words, alpha %.05f, %.0f words/s" %
                            (100.0 * word_count[0] / total_words, alpha, word_count[0] / elapsed if elapsed else 0.0))
                        next_report[0] = elapsed + 1.0  # don't flood the log, wait at least a second between progress reports

        workers = [threading.Thread(target=worker_train) for _ in xrange(self.workers)]
        for thread in workers:
            thread.daemon = True  # make interrupting the process with ctrl+c easier
            thread.start()

        # convert input strings to Vocab objects (or None for OOV words), and start filling the jobs queue
        no_oov = ([self.vocab.get(word, None) for word in sentence] for sentence in sentences)
        for job_no, job in enumerate(utils.grouper(no_oov, chunksize)):
            logger.debug("putting job #%i in the queue, qsize=%i" % (job_no, jobs.qsize()))
            jobs.put(job)
        logger.info("reached the end of input; waiting to finish %i outstanding jobs" % jobs.qsize())
        for _ in xrange(self.workers):
            jobs.put(None)  # give the workers heads up that they can finish -- no more work!

        for thread in workers:
            thread.join()

        elapsed = time.time() - start
        logger.info("training on %i words took %.1fs, %.0f words/s" %
            (word_count[0], elapsed, word_count[0] / elapsed if elapsed else 0.0))

        return word_count[0]
开发者ID:MorLong,项目名称:word2vec-1,代码行数:58,代码来源:word2vec.py

示例8: import_json

def import_json():
    for g in grouper(1000,sys.stdin):
        try:
            Model.database.bulk_save([json.loads(l) for l in g if l])
        except BulkSaveError as err:
            if any(d['error']!='conflict' for d in err.errors):
                raise
            else:
                logging.warn("conflicts for %r",[d['id'] for d in err.errors])
开发者ID:JeffAMcGee,项目名称:localcrawl,代码行数:9,代码来源:admin.py

示例9: read_slr

def read_slr(fh):
    stats = fh.readline()
    seqs = []

    for l in utils.grouper(fh, 2):
        name = l[0].rstrip()
        seq = l[1].rstrip()
        seqs.append(SeqRecord(id=name, seq=Seq(seq), description=""))
        
    return seqs
开发者ID:pomeranz,项目名称:tree_stats,代码行数:10,代码来源:prepare_fubar.py

示例10: __init__

 def __init__(self, horn_pointing=False, siamfile=None):
     self.horn_pointing = horn_pointing
     if siamfile is None:
         siamfile = private.siam
     f = open(siamfile)
     lines = f.readlines()
     self.siam = {}
     for line in grouper(4,lines[1:]):
         chtag = line[0].split()[0]
         m = np.array(np.matrix(';'.join(line[1:])))
         self.siam[chtag] = m
开发者ID:tskisner,项目名称:planck,代码行数:11,代码来源:pointingtools.py

示例11: import_old_json

def import_old_json():
    for g in grouper(1000,sys.stdin):
        docs = [json.loads(l) for l in g if l]
        for d in docs:
            del d['doc_type']
            for k,v in d.iteritems():
                if k[-2:]=='id' or k in ('rtt','rtu'):
                    d[k]=v[1:]
            for field in ['ats','fols','frs']:
                if field in d and isinstance(d[field],list):
                    d[field] = [u[1:] for u in d[field]]
        Model.database.bulk_save(docs)
开发者ID:JeffAMcGee,项目名称:localcrawl,代码行数:12,代码来源:admin.py

示例12: xfory

def xfory(price_info, units):
    """ function to discount per groups. if you pay Y you get X """
    total = 0
    x = price_info.get('x')
    y = price_info.get('y')
    price = price_info.get('unitPrice')

    for group in grouper(x, range(0, units)):
        has_discount = len(group) == x
        per_unit = price if not has_discount else y / x * price
        total = total + (per_unit * len(group))

    return total / units
开发者ID:abelgvidal,项目名称:exercises,代码行数:13,代码来源:rules.py

示例13: command_service

    def command_service(self, rawCommand):
        """
        Parse raw input and execute specified function with args

        :param rawCommand: csv string from Matlab/Simulink of the form:
                'command, namedArg1, arg1, namedArg2, arg2, ..., namedArgN, argN'
        :return: the command and arguments as a dictionary
        """
        pack = [x.strip() for x in split('[,()]*', rawCommand.strip())]
        raw_cmd = pack[0]
        argDict = {key: literal_eval(value) for key, value in utils.grouper(pack[1:], 2)}
        cmd = self.mapInterface.commands[raw_cmd]
        ret = cmd(**argDict)
        logger.info("Command '{}' run with args {}".format(raw_cmd, argDict))
        return raw_cmd, ret
开发者ID:friend0,项目名称:world_engine,代码行数:15,代码来源:server.py

示例14: train

    def train(self,triples, total_triples=None, triples_count = 0, chunksize=1000):
        if not self.vocab or not self.vocab_rel:
            raise RuntimeError("you must first build entity and relation vocabulary before training the model")
        start,next_report = time.time(),[1.0]
        triples_count = [triples_count]
        total_triples = total_triples or int(sum(1 for v in triples))
        jobs = Queue(maxsize=2*self.workers)
        lock = threading.Lock()

        def worker_train():
            work = zeros(self.layer1_size, dtype=REAL)
            detR = zeros((self.layer1_size,self.layer1_size),dtype=REAL)
            # neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL)
            while True:
                job = jobs.get()
                if job is None:
                    break
                alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * triples_count[0] / total_triples))
                job_triples = self._get_job_triples(alpha,job,work,detR)
                with lock:
                    triples_count[0] += job_triples
                    elapsed = time.time() - start
                    if elapsed>= next_report[0]:
                        logger.info("PROGRESS: at %.2f%% triplrs, alpha %.05f, %.0f triples/s" %
                            (100.0 * triples_count[0] / total_triples, alpha, triples_count[0] / elapsed if elapsed else 0.0))
                        next_report[0] = elapsed + 1.0

        workers = [threading.Thread(target=worker_train) for _ in xrange(self.workers)]
        for thread in workers:
            thread.daemon = True  # make interrupting the process with ctrl+c easier
            thread.start()

        # convert input strings to Vocab objects (eliding OOV/downsampled words), and start filling the jobs queue
        for job_no, job in enumerate(utils.grouper(self._prepare_triples(triples), chunksize)):
            logger.debug("putting job #%i in the queue, qsize=%i" % (job_no, jobs.qsize()))
            jobs.put(job)
        logger.info("reached the end of input; waiting to finish %i outstanding jobs" % jobs.qsize())
        for _ in xrange(self.workers):
            jobs.put(None)  # give the workers heads up that they can finish -- no more work!

        for thread in workers:
            thread.join()

        elapsed = time.time() - start
        logger.info("training on %i triples took %.1fs, %.0f triples/s" %
            (triples_count[0], elapsed, triples_count[0] / elapsed if elapsed else 0.0))
        self.syn0norm = None
        return triples_count[0]
开发者ID:v-shinc,项目名称:KB2Vec,代码行数:48,代码来源:kb2vec.py

示例15: fetch_edges

def fetch_edges():
    Edges.database = connect("houtx_edges")
    User.database = connect("away_user")
    old_edges = set(int(row['id']) for row in Edges.database.paged_view("_all_docs",endkey="_"))
    uids = set(_users_from_scores())-old_edges
    settings.pdb()
    for g in grouper(100,uids):
        for user in twitter.user_lookup(g):
            if user is None or user.protected: continue
            try:
                edges = twitter.get_edges(user._id)
            except restkit.errors.Unauthorized:
                logging.warn("unauthorized!")
                continue
            except restkit.errors.ResourceNotFound:
                logging.warn("resource not found!?")
                continue
            edges.save()
            user.save()
            sleep_if_needed()
开发者ID:JeffAMcGee,项目名称:localcrawl,代码行数:20,代码来源:admin.py


注:本文中的utils.grouper函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。