本文整理汇总了Python中multiprocessing.pool.Pool.imap_unordered方法的典型用法代码示例。如果您正苦于以下问题:Python Pool.imap_unordered方法的具体用法?Python Pool.imap_unordered怎么用?Python Pool.imap_unordered使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类multiprocessing.pool.Pool
的用法示例。
在下文中一共展示了Pool.imap_unordered方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
# 需要导入模块: from multiprocessing.pool import Pool [as 别名]
# 或者: from multiprocessing.pool.Pool import imap_unordered [as 别名]
def parse(document, pages, parse_refs=True,
progress_monitor=NullProgressMonitor(),
pool_size=DEFAULT_POOL_SIZE):
progress_monitor.start('Parsing Pages', pool_size + 1)
# Prepare input
pages = [(page.local_url, page.url) for page in
pages.values() if page.local_url is not None]
pages_chunks = chunk_it(pages, pool_size)
inputs = []
for pages_chunk in pages_chunks:
inputs.append((document.parser, document.pk, parse_refs, pages_chunk))
# Close connection to allow the new processes to create their own.
connection.close()
# Split work
progress_monitor.info('Sending {0} chunks to worker pool'
.format(len(inputs)))
pool = Pool(pool_size)
for result in pool.imap_unordered(sub_process_parse, inputs, 1):
progress_monitor.work('Parsed 1/{0} of the pages'.\
format(pool_size), 1)
# Word Count
word_count = 0
for page in document.pages.all():
word_count += page.word_count
document.word_count = word_count
document.save()
progress_monitor.work('Counted Total Words', 1)
pool.close()
progress_monitor.done()
示例2: query_tweets
# 需要导入模块: from multiprocessing.pool import Pool [as 别名]
# 或者: from multiprocessing.pool.Pool import imap_unordered [as 别名]
def query_tweets(query, limit=None, begindate=dt.date(2006, 3, 21), enddate=dt.date.today(), poolsize=20, lang=''):
no_days = (enddate - begindate).days
if poolsize > no_days:
# Since we are assigning each pool a range of dates to query,
# the number of pools should not exceed the number of dates.
poolsize = no_days
dateranges = [begindate + dt.timedelta(days=elem) for elem in linspace(0, no_days, poolsize+1)]
if limit:
limit_per_pool = (limit // poolsize)+1
else:
limit_per_pool = None
queries = ['{} since:{} until:{}'.format(query, since, until)
for since, until in zip(dateranges[:-1], dateranges[1:])]
all_tweets = []
try:
pool = Pool(poolsize)
logger.info('queries: {}'.format(queries))
try:
for new_tweets in pool.imap_unordered(partial(query_tweets_once, limit=limit_per_pool, lang=lang), queries):
all_tweets.extend(new_tweets)
logger.info('Got {} tweets ({} new).'.format(
len(all_tweets), len(new_tweets)))
except KeyboardInterrupt:
logger.info('Program interrupted by user. Returning all tweets '
'gathered so far.')
finally:
pool.close()
pool.join()
return all_tweets
示例3: main
# 需要导入模块: from multiprocessing.pool import Pool [as 别名]
# 或者: from multiprocessing.pool.Pool import imap_unordered [as 别名]
def main():
# non deterministic process pool
from multiprocessing.pool import Pool
p = Pool(10)
run1 = [a for a in p.imap_unordered(echo, xrange(10))]
run2 = [a for a in p.imap_unordered(echo, xrange(10))]
run3 = [a for a in p.imap_unordered(echo, xrange(10))]
run4 = [a for a in p.imap_unordered(echo, xrange(10))]
print(run1, run2, run3, run4)
print(run1 == run2 == run3 == run4)
# deterministic gevent pool
from gevent.pool import Pool
p = Pool(10)
run1 = [a for a in p.imap_unordered(echo, xrange(10))]
run2 = [a for a in p.imap_unordered(echo, xrange(10))]
run3 = [a for a in p.imap_unordered(echo, xrange(10))]
run4 = [a for a in p.imap_unordered(echo, xrange(10))]
print(run1, run2, run3, run4)
print(run1 == run2 == run3 == run4)
示例4: extract_all_plaintext
# 需要导入模块: from multiprocessing.pool import Pool [as 别名]
# 或者: from multiprocessing.pool.Pool import imap_unordered [as 别名]
def extract_all_plaintext(filenames, out_folder=PLAINTEXT_FOLDER):
print "EXTRACTING PLAINTEXT FROM {0} FILES INTO {1}".format(len(filenames),out_folder)
#Zip the filename input with the output folder
tuple_input = zip(filenames, [out_folder]*len(filenames))
pool = Pool(processes=util.CPU_COUNT)
#pool = Pool(processes=1)
num_tasks = len(filenames)
for i, _ in enumerate(pool.imap_unordered(__extract_plaintext_as_tuple, tuple_input), 1):
sys.stderr.write('\rdone {0:%}'.format(i/num_tasks))
pool.close()
print "\nDONE"
示例5: StoreMode
# 需要导入模块: from multiprocessing.pool import Pool [as 别名]
# 或者: from multiprocessing.pool.Pool import imap_unordered [as 别名]
def StoreMode(corpus):
for dataset in datasets:
print "Storing news stories for the %s set:" % dataset
urls_filename = "%s/wayback_%s_urls.txt" % (corpus, dataset)
urls = ReadUrls(urls_filename)
p = Pool()
stories = p.imap_unordered(StoreMapper, izip(urls, repeat(corpus)))
progress_bar = ProgressBar(len(urls))
for story in stories:
if story:
WriteStory(story, corpus)
progress_bar.Increment()
示例6: run
# 需要导入模块: from multiprocessing.pool import Pool [as 别名]
# 或者: from multiprocessing.pool.Pool import imap_unordered [as 别名]
def run(config_uri, app_name=None, username=None, types=(), batch_size=500, processes=None):
# multiprocessing.get_context is Python 3 only.
from multiprocessing import get_context
from multiprocessing.pool import Pool
# Loading app will have configured from config file. Reconfigure here:
logging.getLogger('snovault').setLevel(logging.DEBUG)
testapp = internal_app(config_uri, app_name, username)
connection = testapp.app.registry[CONNECTION]
uuids = [str(uuid) for uuid in connection.__iter__(*types)]
transaction.abort()
logger.info('Total items: %d' % len(uuids))
pool = Pool(
processes=processes,
initializer=initializer,
initargs=(config_uri, app_name, username),
context=get_context('forkserver'),
)
all_results = []
try:
for result in pool.imap_unordered(worker, batched(uuids, batch_size), chunksize=1):
results = result['results']
errors = sum(error for item_type, path, update, error in results)
updated = sum(update for item_type, path, update, error in results)
logger.info('Batch: Updated %d of %d (errors %d)' %
(updated, len(results), errors))
all_results.extend(results)
finally:
pool.terminate()
pool.join()
def result_item_type(result):
# Ensure we always return a string
return result[0] or ''
for item_type, results in itertools.groupby(
sorted(all_results, key=result_item_type), key=result_item_type):
results = list(results)
errors = sum(error for item_type, path, update, error in results)
updated = sum(update for item_type, path, update, error in results)
logger.info('Collection %s: Updated %d of %d (errors %d)' %
(item_type, updated, len(results), errors))
示例7: GenerateMode
# 需要导入模块: from multiprocessing.pool import Pool [as 别名]
# 或者: from multiprocessing.pool.Pool import imap_unordered [as 别名]
def GenerateMode(corpus, context_token_limit):
for dataset in datasets:
print 'Generating questions for the %s set:' % dataset
urls_filename = '%s/wayback_%s_urls.txt' % (corpus, dataset)
urls = ReadUrls(urls_filename)
p = Pool()
question_context_lists = p.imap_unordered(
GenerateMapper, izip(urls, repeat(corpus), repeat(context_token_limit)))
progress_bar = ProgressBar(len(urls))
for question_context_list in question_context_lists:
if question_context_list:
for question_context in question_context_list:
WriteQuestionContext(question_context, corpus, dataset)
progress_bar.Increment()
示例8: echo
# 需要导入模块: from multiprocessing.pool import Pool [as 别名]
# 或者: from multiprocessing.pool.Pool import imap_unordered [as 别名]
import os
import time
def echo(i):
time.sleep(0.001)
print os.getpid()
return i
# Non Deterministic Process Pool
from multiprocessing.pool import Pool
p = Pool(10)
run1 = [a for a in p.imap_unordered(echo, xrange(10))]
run2 = [a for a in p.imap_unordered(echo, xrange(10))]
run3 = [a for a in p.imap_unordered(echo, xrange(10))]
run4 = [a for a in p.imap_unordered(echo, xrange(10))]
print( run1 == run2 == run3 == run4 )
print
print
# Deterministic Gevent Pool
from gevent.pool import Pool
p = Pool(10)
run1 = [a for a in p.imap_unordered(echo, xrange(10))]
run2 = [a for a in p.imap_unordered(echo, xrange(10))]
run3 = [a for a in p.imap_unordered(echo, xrange(10))]
run4 = [a for a in p.imap_unordered(echo, xrange(10))]
示例9: load_labels
# 需要导入模块: from multiprocessing.pool import Pool [as 别名]
# 或者: from multiprocessing.pool.Pool import imap_unordered [as 别名]
label_indeces = load_labels()
raw_features = load_raw_features()
print "Loaded {0} features".format(len(raw_features))
print "Grouping prevectors by base_url"
sites = {}
site_labels = {}
for dp in data_points:
if dp['base_url'] not in sites:
sites[dp['base_url']] = {}
site_labels[dp['base_url']] = dp['label']
sites[dp['base_url']][dp['offset']] = {"code": dp['code'], "content_ssdeep": dp['content_ssdeep']}
print "Vectorizing {0} base urls".format(len(sites))
labels = []
names = []
vectors = []
pool = Pool(processes=cpu_count(), initializer=preload_process, initargs=(sites,))
for vector, site in pool.imap_unordered(compute_vectors, sites.keys()):
if site_labels[site] in labels_to_ignore:
continue
vectors.append(vector)
labels.append(site_labels[site])
names.append(site)
print "Vector for {0} completed".format(site)
with open("raw_feature_vectors.json", "w") as f:
json.dump({"labels": labels, "names": names, "vectors": vectors}, f)
示例10: main
# 需要导入模块: from multiprocessing.pool import Pool [as 别名]
# 或者: from multiprocessing.pool.Pool import imap_unordered [as 别名]
def main(force_reanalyze=False, include_hidden=False,
dry_run=False, gain_type='auto',
jobs=default_job_count(),
quiet=False, verbose=False,
*music_directories
):
"""Add replaygain tags to your music files."""
if quiet:
logging.basicConfig(level=logging.WARN)
elif verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
# Some pesky functions used below will catch KeyboardInterrupts
# inappropriately, so install an alternate handler that bypasses
# KeyboardInterrupt instead.
def signal_handler(sig, frame):
print "Canceled."
os.kill(os.getpid(), signal.SIGTERM)
original_handler = signal.signal(signal.SIGINT, signal_handler)
track_class = RGTrack
if dry_run:
logging.warn('This script is running in "dry run" mode, so no files will actually be modified.')
track_class = RGTrackDryRun
if len(music_directories) == 0:
logging.error("You did not specify any music directories or files. Exiting.")
sys.exit(1)
logging.info("Searching for music files in the following directories:\n%s", "\n".join(music_directories),)
tracks = [ track_class(f) for f in get_all_music_files(music_directories, ignore_hidden=(not include_hidden)) ]
# Filter out tracks for which we can't get the length
for t in tracks[:]:
try:
len(t)
except Exception:
logging.error("Track %s appears to be invalid. Skipping.", t.filename)
tracks.remove(t)
if len(tracks) == 0:
logging.error("Failed to find any tracks in the directories you specified. Exiting.")
sys.exit(1)
track_sets = RGTrackSet.MakeTrackSets(tracks)
# Remove the earlier bypass of KeyboardInterrupt
signal.signal(signal.SIGINT, original_handler)
logging.info("Beginning analysis")
handler = TrackSetHandler(force=force_reanalyze, gain_type=gain_type)
# For display purposes, calculate how much granularity is required
# to show visible progress at each update
total_length = sum(len(ts) for ts in track_sets)
min_step = min(len(ts) for ts in track_sets)
places_past_decimal = max(0,int(math.ceil(-math.log10(min_step * 100.0 / total_length))))
update_string = '%.' + str(places_past_decimal) + 'f%% done'
import gst
pool = None
try:
if jobs == 1:
# Sequential
handled_track_sets = imap(handler, track_sets)
else:
# Parallel
pool = Pool(jobs)
handled_track_sets = pool.imap_unordered(handler,track_sets)
processed_length = 0
percent_done = 0
for ts in handled_track_sets:
processed_length = processed_length + len(ts)
percent_done = 100.0 * processed_length / total_length
logging.info(update_string, percent_done)
logging.info("Analysis complete.")
except KeyboardInterrupt:
if pool is not None:
logging.debug("Terminating process pool")
pool.terminate()
pool = None
raise
finally:
if pool is not None:
logging.debug("Closing transcode process pool")
pool.close()
if dry_run:
logging.warn('This script ran in "dry run" mode, so no files were actually modified.')
pass
示例11: echo
# 需要导入模块: from multiprocessing.pool import Pool [as 别名]
# 或者: from multiprocessing.pool.Pool import imap_unordered [as 别名]
import time
def echo(i):
time.sleep(0.001)
return i
from multiprocessing.pool import Pool
p = Pool(10)
print [a for a in p.imap_unordered(echo, xrange(10))]
print [a for a in p.imap_unordered(echo, xrange(10))]
print [a for a in p.imap_unordered(echo, xrange(10))]
print [a for a in p.imap_unordered(echo, xrange(10))]
# ^ Is this distribution random ?
from gevent.pool import Pool
p = Pool(10)
print [a for a in p.imap_unordered(echo, xrange(10))]
print [a for a in p.imap_unordered(echo, xrange(10))]
print [a for a in p.imap_unordered(echo, xrange(10))]
print [a for a in p.imap_unordered(echo, xrange(10))]
示例12: imap_unordered
# 需要导入模块: from multiprocessing.pool import Pool [as 别名]
# 或者: from multiprocessing.pool.Pool import imap_unordered [as 别名]
def imap_unordered(self, func, iterable, chunksize=1):
"""
Override multiprocessing.Pool.imap_unordered() method such that it logs full exception stack trace from child process.
"""
return Pool.imap_unordered(self, LogExceptions(func), iterable, chunksize)
示例13: task
# 需要导入模块: from multiprocessing.pool import Pool [as 别名]
# 或者: from multiprocessing.pool.Pool import imap_unordered [as 别名]
def task(pid):
print('Starting task %d' % (pid,))
time.sleep(random.randint(0,5))
print('Finished task %d' % (pid,))
return pid**2
p = Pool(processes=5)
#result = p.apply(task, [1])
#async_result = p.apply_async(task,[1])
#print async_result.ready()
#result = async_result.get()
#print result
#mapresult = p.map(task,xrange(0,10))
#print mapresult
#async_mapresult = p.map_async(task,xrange(0,10))
#print async_mapresult.ready()
#result = async_mapresult.get()
#print result
#imapresult = p.imap(task,xrange(0,10))
#for result in imapresult:
# print result
imapresult_unordered = p.imap_unordered(task,xrange(0,10))
for result in imapresult_unordered:
print result
示例14: defaultdict
# 需要导入模块: from multiprocessing.pool import Pool [as 别名]
# 或者: from multiprocessing.pool.Pool import imap_unordered [as 别名]
import inspect
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from collections import defaultdict, Counter
from multiprocessing.pool import Pool
from itertools import cycle
completed = []
container = defaultdict(list)
pool = Pool()
sim_list = [func for name,func in inspect.getmembers(simulations, inspect.isfunction) if name.startswith('sim_')]
try:
print('press CTRL-c to stop generating samples')
it = pool.imap_unordered(f, cycle(sim_list))
while 1:
sim, result = it.next(timeout=SIMULATION_TIMEOUT)
completed.append(sim)
sys.stdout.write('.')
for p, wins in result.items():
container[p].append( (sim, wins) )
except KeyboardInterrupt:
pool.close()
print('stopping all simulations...')
finally:
pool.terminate()
pool.join()
示例15: namedtuple
# 需要导入模块: from multiprocessing.pool import Pool [as 别名]
# 或者: from multiprocessing.pool.Pool import imap_unordered [as 别名]
from operator import attrgetter
from collections import namedtuple
from multiprocessing.pool import Pool
from jinja2 import Environment, FileSystemLoader
import requests
import feedparser
import config
SearchResult = namedtuple('SearchResult', ['title', 'url'])
pool = Pool(5)
if __name__ == '__main__':
feeds = pool.imap_unordered(feedparser.parse, config.SEARCH_FEEDS)
entries = chain.from_iterable(map(attrgetter('entries'), feeds))
unique_entries = dict((v['link'], v) for v in entries).values()
results = [SearchResult(entry.title, entry.link) for entry in unique_entries]
if results:
env = Environment(autoescape=True, loader=FileSystemLoader('templates'))
template = env.get_template('notification.html')
email_msg = template.render(title=config.EMAIL_SUBJECT, results=results)
requests.post(config.MAILGUN_URL,
auth=("api", config.MAILGUN_KEY),
data={
"from": config.MAILGUN_EMAIL_SENDER,
"to": config.SEND_NOTIFICATIONS_TO,
"subject": config.EMAIL_SUBJECT,