本文整理汇总了Python中utils.grouper函数的典型用法代码示例。如果您正苦于以下问题:Python grouper函数的具体用法?Python grouper怎么用?Python grouper使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了grouper函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: compute_descriptors
def compute_descriptors(infile, descriptor_types):
"""Reads low-level descriptors from DenseTracks."""
LEN_LINE = 436
POS_IDXS = [1, 2, 0] # Position coordinates (X, Y, T).
NORM_POS_IDXS = [7, 8, 9] # Normalized position coordinates (X, Y, T).
dense_tracks = subprocess.Popen(
[DENSE_TRACK, infile],
stdout=subprocess.PIPE)
for lines in grouper(dense_tracks.stdout, NR_DESCRIPTORS):
all_descs = np.vstack([
map(float, line.split())
for line in lines
if line is not None]
).astype(np.float32)
assert all_descs.shape[0] <= NR_DESCRIPTORS
assert all_descs.shape[1] == LEN_LINE
positions = all_descs[:, POS_IDXS]
normalized_positions = all_descs[:, NORM_POS_IDXS]
descriptors = {
desc_type: all_descs[:, DESC_IDXS[desc_type]]
for desc_type in descriptor_types}
yield positions, normalized_positions, descriptors
示例2: main
def main():
logger = configure_logging('parse_serverstatus')
client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database)
with open(args.input_file, 'r') as f:
for line_number, chunk in enumerate(grouper(f, args.batch_size)):
# print(line_number)
json_points = []
for line in chunk:
# zip_longest will backfill any missing values with None, so we need to handle this, otherwise we'll miss the last batch
if line:
try:
server_status_json = json.loads(line)
# print((line_number + 0) * _BATCH_SIZE)
# print((line_number + 1) * _BATCH_SIZE)
common_metric_data = get_metrics("serverstatus", server_status_json, common_metrics, line_number)
json_points.append(create_point(*common_metric_data))
wiredtiger_metric_data = get_metrics("serverstatus_wiredtiger", server_status_json, wiredtiger_metrics, line_number)
json_points.append(create_point(*wiredtiger_metric_data))
# for metric_data in get_metrics(server_status_json, common_metrics, line_number):
# import ipdb; ipdb.set_trace()
# print(json_points)
# json_points.append(create_point(*metric_data))
# # for metric in get_metrics(server_status_json, wiredtiger_metrics, line_number):
# json_points.append(create_point(*metric))
# for metric in get_metrics(server_status_json, mmapv1_metrics, line_number):
# json_points.append(create_point(*metric))
except ValueError:
logger.error("Line {} does not appear to be valid JSON - \"{}\"".format(line_number, line.strip()))
write_points(logger, client, json_points, line_number)
示例3: main
def main(args):
global DEBUG
if len(args) == 1:
# no args - repl
while True:
print 'que?>',
try:
print google_it(raw_input())
except EOFError:
break
except:
import traceback
traceback.print_exc()
else:
# test mode
DEBUG = False
print 'Loading testfile...'
tests = filter(bool, open(args[1]).read().split('\n'))
print len(tests), 'tests'
for clue, answer in utils.grouper(2, tests):
clue = clue.split('~!clue')[1]
answer = answer.split("~!answer")[1]
try:
print '----------------------------------------------------------------'
print 'clue:', clue
print 'correct:', answer
print 'eubank:', google_it(clue)
except KeyboardInterrupt:
sys.exit(0)
except:
import traceback
traceback.print_exc()
示例4: main
def main():
description = 'Split a FASTA file into multiple subfiles.'
parser = ArgumentParser(description=description,
parents=[get_default_argument_parser()])
parser.add_argument('-f', '--in-format',
default=_DEFAULT_FMT,
help="A biopython file format string.")
parser.add_argument('-n', '--num-files', type=int,
default=_DEFAULT_N,
help=("The number of splits. "
"DEFAULT=%d") % _DEFAULT_N)
parser.add_argument('in_path', nargs='?', default=None,
help=("The path of the file to be read in. "
"If no argument given, reads from STDIN."))
parser.add_argument('out_pattern', default=None,
help=("Output file names format string. "
"Must contain one '%%d' for the file number."))
args = parser.parse_args()
if args.in_path is None:
record_parser = SeqIO.parse(sys.stdin, args.in_format)
else:
record_parser = SeqIO.parse(args.in_path, args.in_format)
write_multithread(grouper(record_parser, 100),
lambda recs, handle:
SeqIO.write(recs, handle, args.in_format),
args.out_pattern, n=args.num_files)
示例5: train
def train(self, sentences, total_words=None, word_count=0, chunksize=100):
"""
Update the model's neural weights from a sequence of sentences (can be a once-only generator stream).
Each sentence must be a list of utf8 strings.
"""
logger.info("training model on %i vocabulary and %i features" % (len(self.vocab), self.layer1_size))
if not self.vocab:
raise RuntimeError("you must first build vocabulary before training the model")
start, next_report = time.time(), 1.0
if not total_words:
total_words = sum(v.count for v in self.vocab.itervalues())
# convert input string lists to Vocab objects (or None for OOV words)
no_oov = ([self.vocab.get(word, None) for word in sentence] for sentence in sentences)
# run in chunks of e.g. 100 sentences (= 1 job)
for job in utils.grouper(no_oov, chunksize):
# update the learning rate before every job
alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * word_count / total_words))
# how many words did we train on? out-of-vocabulary (unknown) words do not count
job_words = sum(train_sentences(self, sentence, alpha) for sentence in job)
word_count += job_words
# report progress
elapsed = time.time() - start
if elapsed >= next_report:
logger.info("PROGRESS: at %.2f%% words, alpha %.05f, %.0f words/s" %
(100.0 * word_count / total_words, alpha, word_count / elapsed if elapsed else 0.0))
next_report = elapsed + 1.0 # don't flood the log, wait at least a second between progress reports
elapsed = time.time() - start
logger.info("training on %i words took %.1fs, %.0f words/s" %
(word_count, elapsed, word_count / elapsed if elapsed else 0.0))
return word_count
示例6: __iter__
def __iter__(self):
if self.chunksize:
for chunk in utils.grouper(self.corpus, self.chunksize):
for transformed in self.obj.__getitem__(chunk, chunksize=None):
yield transformed
else:
for doc in self.corpus:
yield self.obj[doc]
示例7: train
def train(self, sentences, total_words=None, word_count=0, chunksize=100):
"""
Update the model's neural weights from a sequence of sentences (can be a once-only generator stream).
Each sentence must be a list of utf8 strings.
"""
logger.info("training model with %i workers on %i vocabulary and %i features" % (self.workers, len(self.vocab), self.layer1_size))
if not self.vocab:
raise RuntimeError("you must first build vocabulary before training the model")
start, next_report = time.time(), [1.0]
word_count, total_words = [word_count], total_words or sum(v.count for v in self.vocab.itervalues())
jobs = Queue(maxsize=2 * self.workers) # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :(
lock = threading.Lock() # for shared state (=number of words trained so far, log reports...)
def worker_train():
"""Train the model, lifting lists of sentences from the jobs queue."""
work = matutils.zeros_aligned(self.layer1_size, dtype=REAL) # each thread must have its own work memory
while True:
job = jobs.get()
if job is None: # data finished, exit
break
# update the learning rate before every job
alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * word_count[0] / total_words))
# how many words did we train on? out-of-vocabulary (unknown) words do not count
job_words = sum(train_sentence(self, sentence, alpha, work) for sentence in job)
with lock:
word_count[0] += job_words
elapsed = time.time() - start
if elapsed >= next_report[0]:
logger.info("PROGRESS: at %.2f%% words, alpha %.05f, %.0f words/s" %
(100.0 * word_count[0] / total_words, alpha, word_count[0] / elapsed if elapsed else 0.0))
next_report[0] = elapsed + 1.0 # don't flood the log, wait at least a second between progress reports
workers = [threading.Thread(target=worker_train) for _ in xrange(self.workers)]
for thread in workers:
thread.daemon = True # make interrupting the process with ctrl+c easier
thread.start()
# convert input strings to Vocab objects (or None for OOV words), and start filling the jobs queue
no_oov = ([self.vocab.get(word, None) for word in sentence] for sentence in sentences)
for job_no, job in enumerate(utils.grouper(no_oov, chunksize)):
logger.debug("putting job #%i in the queue, qsize=%i" % (job_no, jobs.qsize()))
jobs.put(job)
logger.info("reached the end of input; waiting to finish %i outstanding jobs" % jobs.qsize())
for _ in xrange(self.workers):
jobs.put(None) # give the workers heads up that they can finish -- no more work!
for thread in workers:
thread.join()
elapsed = time.time() - start
logger.info("training on %i words took %.1fs, %.0f words/s" %
(word_count[0], elapsed, word_count[0] / elapsed if elapsed else 0.0))
return word_count[0]
示例8: import_json
def import_json():
for g in grouper(1000,sys.stdin):
try:
Model.database.bulk_save([json.loads(l) for l in g if l])
except BulkSaveError as err:
if any(d['error']!='conflict' for d in err.errors):
raise
else:
logging.warn("conflicts for %r",[d['id'] for d in err.errors])
示例9: read_slr
def read_slr(fh):
stats = fh.readline()
seqs = []
for l in utils.grouper(fh, 2):
name = l[0].rstrip()
seq = l[1].rstrip()
seqs.append(SeqRecord(id=name, seq=Seq(seq), description=""))
return seqs
示例10: __init__
def __init__(self, horn_pointing=False, siamfile=None):
self.horn_pointing = horn_pointing
if siamfile is None:
siamfile = private.siam
f = open(siamfile)
lines = f.readlines()
self.siam = {}
for line in grouper(4,lines[1:]):
chtag = line[0].split()[0]
m = np.array(np.matrix(';'.join(line[1:])))
self.siam[chtag] = m
示例11: import_old_json
def import_old_json():
for g in grouper(1000,sys.stdin):
docs = [json.loads(l) for l in g if l]
for d in docs:
del d['doc_type']
for k,v in d.iteritems():
if k[-2:]=='id' or k in ('rtt','rtu'):
d[k]=v[1:]
for field in ['ats','fols','frs']:
if field in d and isinstance(d[field],list):
d[field] = [u[1:] for u in d[field]]
Model.database.bulk_save(docs)
示例12: xfory
def xfory(price_info, units):
""" function to discount per groups. if you pay Y you get X """
total = 0
x = price_info.get('x')
y = price_info.get('y')
price = price_info.get('unitPrice')
for group in grouper(x, range(0, units)):
has_discount = len(group) == x
per_unit = price if not has_discount else y / x * price
total = total + (per_unit * len(group))
return total / units
示例13: command_service
def command_service(self, rawCommand):
"""
Parse raw input and execute specified function with args
:param rawCommand: csv string from Matlab/Simulink of the form:
'command, namedArg1, arg1, namedArg2, arg2, ..., namedArgN, argN'
:return: the command and arguments as a dictionary
"""
pack = [x.strip() for x in split('[,()]*', rawCommand.strip())]
raw_cmd = pack[0]
argDict = {key: literal_eval(value) for key, value in utils.grouper(pack[1:], 2)}
cmd = self.mapInterface.commands[raw_cmd]
ret = cmd(**argDict)
logger.info("Command '{}' run with args {}".format(raw_cmd, argDict))
return raw_cmd, ret
示例14: train
def train(self,triples, total_triples=None, triples_count = 0, chunksize=1000):
if not self.vocab or not self.vocab_rel:
raise RuntimeError("you must first build entity and relation vocabulary before training the model")
start,next_report = time.time(),[1.0]
triples_count = [triples_count]
total_triples = total_triples or int(sum(1 for v in triples))
jobs = Queue(maxsize=2*self.workers)
lock = threading.Lock()
def worker_train():
work = zeros(self.layer1_size, dtype=REAL)
detR = zeros((self.layer1_size,self.layer1_size),dtype=REAL)
# neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL)
while True:
job = jobs.get()
if job is None:
break
alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * triples_count[0] / total_triples))
job_triples = self._get_job_triples(alpha,job,work,detR)
with lock:
triples_count[0] += job_triples
elapsed = time.time() - start
if elapsed>= next_report[0]:
logger.info("PROGRESS: at %.2f%% triplrs, alpha %.05f, %.0f triples/s" %
(100.0 * triples_count[0] / total_triples, alpha, triples_count[0] / elapsed if elapsed else 0.0))
next_report[0] = elapsed + 1.0
workers = [threading.Thread(target=worker_train) for _ in xrange(self.workers)]
for thread in workers:
thread.daemon = True # make interrupting the process with ctrl+c easier
thread.start()
# convert input strings to Vocab objects (eliding OOV/downsampled words), and start filling the jobs queue
for job_no, job in enumerate(utils.grouper(self._prepare_triples(triples), chunksize)):
logger.debug("putting job #%i in the queue, qsize=%i" % (job_no, jobs.qsize()))
jobs.put(job)
logger.info("reached the end of input; waiting to finish %i outstanding jobs" % jobs.qsize())
for _ in xrange(self.workers):
jobs.put(None) # give the workers heads up that they can finish -- no more work!
for thread in workers:
thread.join()
elapsed = time.time() - start
logger.info("training on %i triples took %.1fs, %.0f triples/s" %
(triples_count[0], elapsed, triples_count[0] / elapsed if elapsed else 0.0))
self.syn0norm = None
return triples_count[0]
示例15: fetch_edges
def fetch_edges():
Edges.database = connect("houtx_edges")
User.database = connect("away_user")
old_edges = set(int(row['id']) for row in Edges.database.paged_view("_all_docs",endkey="_"))
uids = set(_users_from_scores())-old_edges
settings.pdb()
for g in grouper(100,uids):
for user in twitter.user_lookup(g):
if user is None or user.protected: continue
try:
edges = twitter.get_edges(user._id)
except restkit.errors.Unauthorized:
logging.warn("unauthorized!")
continue
except restkit.errors.ResourceNotFound:
logging.warn("resource not found!?")
continue
edges.save()
user.save()
sleep_if_needed()