本文整理汇总了Python中multiprocessing.Pool.map_async方法的典型用法代码示例。如果您正苦于以下问题:Python Pool.map_async方法的具体用法?Python Pool.map_async怎么用?Python Pool.map_async使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类multiprocessing.Pool
的用法示例。
在下文中一共展示了Pool.map_async方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: csv_to_solr
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import map_async [as 别名]
def csv_to_solr(fl, endpoint='http://dev-search:8983/solr/main', num_topics=999, reset_callback=None):
if reset_callback is not None:
print "Resetting (no way back now!)"
reset_callback()
print 'generating updates'
initialize_doc = dict([('topic_%d_tf' % i, {'set': 0}) for i in range(1, num_topics)])
p = Pool(processes=8)
line_groupings = [[]]
grouping_counter = 0
total_lines = 0
for line in fl:
line_groupings[grouping_counter].append(line)
if len(line_groupings[grouping_counter]) >= 10000:
if grouping_counter == 3:
curr_lines = sum(map(len, line_groupings))
total_lines += curr_lines
print 'processing line groups for', curr_lines, 'lines', total_lines, 'total'
groupings = [(endpoint, initialize_doc, line_groupings[i]) for i in range(0, len(line_groupings))]
print p.map_async(process_linegroup, groupings).get()
grouping_counter = 0
line_groupings = [[]]
else:
grouping_counter += 1
line_groupings.append([])
groupings = [(endpoint, initialize_doc, line_groupings[i]) for i in range(0, len(line_groupings))]
print p.map_async(process_linegroup, groupings).get()
print "Committing..."
requests.post('%s/update?commit=true' % endpoint, headers={'Content-type': 'application/json'})
return True
示例2: convert_document
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import map_async [as 别名]
def convert_document(pdf_filename,
resolution=200,
delete_files=True,
page_delineation='\n--------\n',
verbose=False,
temp_dir=str(uuid.uuid4()),password='',
thumb_prefix='thumb_page_',
pool_count=2):
just_pdf_filename = os.path.basename(pdf_filename)
temp_pdf_filename = '{0}/{1}'.format(temp_dir, just_pdf_filename)
shutil.copyfile(pdf_filename, temp_pdf_filename)
filename = decrypt_pdf(temp_pdf_filename, temp_dir, password)
filenames = split_pdf(filename, temp_dir)
for filename in filenames:
__pdf_filenames.put(filename)
pool = Pool()
pool.map_async(
_yapot_worker,
[(tid, just_pdf_filename, temp_dir, resolution) for
tid in range(0, pool_count)],
)
while __text_filenames.qsize() != len(filenames):
time.sleep(1)
text_filenames = []
try:
while(1):
text_filenames.append(__text_filenames.get_nowait())
except:
pass
text = build_output_text(text_filenames, page_delineation)
if delete_files:
cleanup_yapot(temp_dir)
return text
示例3: filter_wids
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import map_async [as 别名]
def filter_wids(wids, refresh=False):
p = Pool(processes=8)
wids = [x[0] for x in p.map_async(exists, wids).get() if x[1]]
if not refresh:
wids = [x[0] for x in p.map_async(not_processed, wids).get() if x[1]]
return wids
示例4: main
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import map_async [as 别名]
def main():
args = parse_options()
viewpoint = args.observation_point
viewpoint = Vector(*viewpoint)
vertex_count, face_count, lines_count = importutils.analyze_file(args.input_file)
print 'Vertices: %d, Primitives: %d' % (vertex_count, face_count)
faces = importutils.get_faces(args.input_file)
print 'File imported.'
pool = Pool(args.jobs)
try:
result = pool.map_async(geometryutils.build_triangles, faces, 10000)
except KeyboardInterrupt:
pool.terminate()
print 'Program stopped.'
return
triangles = result.get()
triangles = chain.from_iterable(triangles)
print 'Triangles generated.'
try:
process_data = ((t, viewpoint, args.wavelength) for t in triangles)
result = pool.map_async(processor.try_process_triangle, process_data)
except KeyboardInterrupt:
pool.terminate()
print 'Program stopped.'
return
data = result.get()
data = filter(lambda x: x, data)
print 'Model processed.'
processor.write_triangles_data(data, args.output_file)
print 'Data written into %s' % args.output_file
示例5: brutePlugin
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import map_async [as 别名]
def brutePlugin(pluginlist,foundplug,hosti,pathi,porti,securei,psize):
global host
host = hosti
global port
port = porti
global secure
secure = securei
global plugfound
plugfound = foundplug
global path
path = pathi
f = open(plugfound,'w').close()
listsize = (len(pluginlist))
# manage pool
if (psize == 0): psize = 5
if (list <= psize): chunksize = 1
else: chunksize = ((listsize / psize) + (listsize % psize))
print("Plugin list size: %d\tChunk size: %d\tPool size: %d" % ((listsize),chunksize,psize))
print("Plugin bruteforcing started")
pool = Pool(processes=psize)
for chunk in itertools.izip(grouper(pluginlist,chunksize)): pool.map_async(worker,chunk)
pool.close()
try:
while(len(active_children()) > 0): # how many active children do we have
sleep(2)
ignore = active_children()
except KeyboardInterrupt: exit('CTRL^C caught, exiting...\n\n')
print("Plugin bruteforce complete")
示例6: rc
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import map_async [as 别名]
def rc(rf, alphabet, numOfThreads):
tryn=0
counterTmp = 0
printCounter = 1000
listBasic = []
if rf.endswith('.rar'):
funcChosen = unrar
elif rf.endswith('.zip') or rf.endswith('.7z') :
funcChosen = zipFileUnzip
for a in range(1,len(alphabet)+1):
for b in itertools.product(alphabet,repeat=a):
k="".join(b)
k=re.escape(k)
listBasic.append(k)
tryn+=1
if len(listBasic) == numOfThreads:
pool = Pool(numOfThreads)
pool.map_async(funcChosen, listBasic, callback = exitPass)
pool.close()
if resultPass:
timeWasted = time.time()-start
print 'Found! Password is '+resultPass
print "It took " +str(round(time.time()-start,3))+" seconds"
print "Speed: "+str(round(tryn/float(timeWasted),2))+" passwords/sec"
print "Tried "+str(tryn)+" passwords"
exit()
listBasic = []
counterTmp+=1
if counterTmp >= printCounter:
print 'Trying combination number '+str(tryn)+':'+str(k)
timeWasted = round(time.time()-start,2)
if timeWasted > 0:
print "It took already " +str(timeWasted) +" seconds. Speed: "+str(round(tryn/float(timeWasted),2))+" passwords/sec"
counterTmp=0
示例7: pool_multiprocess_index
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import map_async [as 别名]
def pool_multiprocess_index(file_or_urls,shard_size=10000):
pool = Pool(processes=3)
for file_ref in file_or_urls:
reader = pymarc.MARCReader(open(file_ref,'rb'))
print("Start-up multiprocess pool")
pool.map_async(index_shard,reader,shard_size)
print("Finished multiprocess")
示例8: bruteUser
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import map_async [as 别名]
def bruteUser(userlist, psize, hosti, pathi, porti, securei, userfound):
global host
host = hosti
global port
port = porti
global secure
secure = securei
global userout
userout = userfound
global path
path = pathi
f = open(userout, "w").close()
usersize = len(userlist)
# manage pool
if usersize <= psize:
chunksize = 1
else:
chunksize = (usersize / psize) + (usersize % psize)
print("Userlist size: %d\tChunk size: %d\tPool size: %d" % (usersize, chunksize, psize))
print("Bruteforcing usernames")
pool = Pool(processes=psize)
for chunk in itertools.izip(grouper(userlist, chunksize)):
pool.map_async(worker, chunk)
pool.close()
try:
while len(active_children()) > 0: # how many active children do we have
sleep(2)
ignore = active_children()
except KeyboardInterrupt:
exit("CTRL^C caught, exiting...\n\n")
print("Username bruteforce complete")
示例9: __init__
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import map_async [as 别名]
def __init__(self, subID):
"""
THINGS TO UNDERSTAND:
- when adding simulation attributes, why is the list nested (need to call simObjList[0])??
"""
# Create instance of class from superclass
Subject.__init__(self, subID)
# Prepare to process in parallel
# Simulation descriptors
simDescriptors = ['A_SD2F_RepGRF', 'A_SD2F_RepKIN', 'A_SD2S_RepGRF', 'A_SD2S_RepKIN',
'A_Walk_RepGRF', 'A_Walk_RepKIN', 'U_SD2F_RepGRF', 'U_SD2F_RepKIN',
'U_SD2S_RepGRF', 'U_SD2S_RepKIN', 'U_Walk_RepGRF', 'U_Walk_RepKIN']
# List of simulation names
simNames = [subID + '_' + descriptor for descriptor in simDescriptors]
# Initialize global variable for simulation objects
initializeSimList()
# Start worker pool
pool = Pool(processes=12)
# Run parallel processes to process simulations and append object to global list
pool.map_async(runParallel, simNames, callback=updateSimList)
# Clean up spawned processes
pool.close()
pool.join()
# Add simulations as attributes to subject object
for simObj in simObjList[0]:
setattr(self, simObj.simName, simObj)
# Display message to user
print 'Time elapsed for processing subject ' + self.subID + ': ' + str(int(time.time()-self.startTime)) + ' seconds'
示例10: main
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import map_async [as 别名]
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
args = parse_command_line(argv)
loglevel = getattr(logging,args.log)
logging.basicConfig(format='%(asctime)s.%(msecs)03d %(levelname)s %(name)s: %(message)s', level=loglevel, datefmt='%Y-%m-%d %H:%M:%S')
branchingPoints = ['ee100','em100','mm100','et100','mt100','tt100','BP1','BP2','BP3','BP4']
masses = _3L_MASSES if args.analysis=='Hpp3l' else _4L_MASSES
if args.do4l: masses = _4L_MASSES
if args.doBoth: masses = _4L_MASSES
if not args.allMasses: masses = [args.mass]
if not args.allBranchingPoints: branchingPoints = [args.branchingPoint]
poolArgs = [[m,b] for m in masses for b in branchingPoints]
if len(poolArgs)==1:
job = poolArgs[0]
BPWrapper((args.analysis,args.channel,args.period,job[0],job[1],args.bgMode,args.scaleFactor,args.doAlphaTest,args.unblind,args.do4l,args.doBoth,args.cut,args.skipTau))
else:
p = Pool(8)
try:
p.map_async(BPWrapper, [(args.analysis,args.channel,args.period,job[0],job[1],args.bgMode,args.scaleFactor,args.doAlphaTest,args.unblind,args.do4l,args.doBoth,args.cut,args.skipTau) for job in poolArgs]).get(999999)
except KeyboardInterrupt:
p.terminate()
print 'limits cancelled'
sys.exit(1)
return 0
示例11: _get_images_from_pdf
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import map_async [as 别名]
def _get_images_from_pdf(pdf_filename, resolution, verbose, delete_files,
temp_dir, make_thumbs, thumb_size, thumb_dir, thumb_prefix, pool_count=1):
success = False
try:
if verbose == True:
print "Splitting PDF into multiple pdf's for processing ..."
# make sure there is a place to put our temporary pdfs
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# make sure if we are going to make thumbs, the folde rexists
if make_thumbs == True:
if not os.path.exists(thumb_dir):
os.makedirs(thumb_dir)
# read input pdf
inputpdf = PdfFileReader(open(pdf_filename, "rb"))
if inputpdf.getIsEncrypted():
inputpdf.decrypt('')
if verbose == True:
print "Writing out %i pages ..." % inputpdf.numPages
# create all of the temporary pdfs
for i in xrange(inputpdf.numPages):
output = PdfFileWriter()
output.addPage(inputpdf.getPage(i))
#print output.resolvedObjects
filename = "{0}/document-page-{1}.pdf".format(temp_dir,i)
with open(filename, "wb") as outputStream:
output.write(outputStream)
__pdf_queue.put(i)
if verbose == True:
print "Dispatching pdf workers ..."
# spin up our workers to convert the pdfs to images
#pool_count = 4
pool = Pool()
pool.map_async(
_pdf_converter_worker,
[(x, resolution, verbose, delete_files,
temp_dir, make_thumbs, thumb_size,
thumb_dir, thumb_prefix) for \
x in range(pool_count)]
)
while __pdf_texts.qsize() != inputpdf.numPages:
time.sleep(.25)
if verbose == True:
print "Done converting PDF."
success = True
except Exception, e:
print str(e)
示例12: SGDNOMAD
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import map_async [as 别名]
def SGDNOMAD(data, movies_, eta_ = 0.01, lambduh_ = 0.1, lambduh_w_ = 0.1, rank = 10, maxit = 10):
global latentShape, weightShape, biasShape, userOffset, movieOffset, mp_arr, mp_w, mp_b, eta, lambduh, lambduh_w, counter, qsize, movies
movies = movies_.tocsr()
t1 = time.time()
eta = eta_
lambduh = lambduh_
lambduh_w = lambduh_w_
userOffset = 0
movieOffset = data.shape[0]
# Allocate shared memory across processors for latent variable
latentShape = (sum(data.shape), rank)
mp_arr = mp.Array(ctypes.c_double, latentShape[0] * latentShape[1])
latent = np.frombuffer(mp_arr.get_obj()).reshape(latentShape)
weightShape = (latentShape[0], movies.shape[1])
mp_w = mp.Array(ctypes.c_double, weightShape[0] * weightShape[1])
weights = np.frombuffer(mp_w.get_obj()).reshape(weightShape)
biasShape = (latentShape[0], 1)
mp_b = mp.Array(ctypes.c_double, biasShape[0] * biasShape[1])
biases = np.frombuffer(mp_b.get_obj()).reshape(biasShape)
counter = mp.Value('i', 0)
qsize = mp.Array('i', [0] * FLAGS.cores)
# Initialize latent variable so that expectation equals average rating
avgRating = data.sum() / data.nnz
latent[:] = np.random.rand(latentShape[0], latentShape[1]) * math.sqrt(avgRating / rank / 0.25)
weights[:] = np.zeros(weightShape)
biases[:] = np.zeros(biasShape)
slices = slice(data, FLAGS.cores)
rowSlices = rowSlice(data, FLAGS.cores)
p2 = Pool(FLAGS.cores)
p = Pool(FLAGS.cores)
it = 0
printLog(0, 0, time.time() - t1, RMSE2(slices, data.nnz, p2))
manager = mp.Manager()
queues = [manager.Queue() for x in range(FLAGS.cores)]
colList = np.round(np.linspace(0, data.shape[1], (FLAGS.cores) * 20 + 1)).astype(int)
#for i in range(data.shape[1]):
#queues[np.random.randint(0, FLAGS.cores)].put(i)
for i in range(len(colList) - 1):
r = np.random.randint(0, FLAGS.cores)
queues[r].put((colList[i], colList[i+1]))
qsize[r] += 1
p.map_async(updateNOMAD, [(i, a, b, queues) for i, a, b in rowSlices])
countPerEpoch = FLAGS.cores * (len(colList) - 1)
start = time.time()
#print [q.qsize() for q in queues]
print [q for q in qsize]
示例13: process_batch
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import map_async [as 别名]
def process_batch(self):
global batch_list
# Wait for splitting to finish and reinitialize new Pool
self.split_pool.close()
self.split_pool.join()
self.split_pool = Pool(processes=self.pool_size)
# Filter array for None values
batch_list = [x for x in batch_list if x is not None]
# Get category of each word based on keywords
process_pool = Pool(processes=self.pool_size)
keyword_result = process_pool.map_async(get_keyword_categories, batch_list)
# Get category of each word using conceptnet
#conceptnet_pool = Pool(processes=self.pool_size)
conceptnet_result = process_pool.map_async(get_conceptnet_categories, batch_list)
# Get NER categories
#NER_pool = Pool(processes=self.pool_size)
NER_result = process_pool.map_async(get_NER_categories, batch_list)
# Wait for processes in the batch to finish
print "Keyword"
sys.stdout.flush()
keyword_result = keyword_result.get()
#while(not conceptnet_result.ready()):
# print conceptnet_result._number_left
print "NER"
sys.stdout.flush()
NER_result = NER_result.get()
print "Concept net"
sys.stdout.flush()
conceptnet_result = conceptnet_result.get()
#conceptnet_result = process_pool.map(get_conceptnet_categories, batch_list)
# Merge results from each type of category
for i in range(0,len(keyword_result)):
keyword_result[i].update(conceptnet_result[i])
keyword_result[i].update(NER_result[i])
# Build category values based on values of other columns
keyword_result[i].update(get_columnval_categories(keyword_result[i]))
# Build and write column values for CSV file
for i in range(0,len(self.temp_row)):
val_row = []
for column in self.headers:
val_row.append(keyword_result[i][column])
cur_row = self.temp_row[i] + val_row
self.file_writer.writerow(cur_row)
# Reset temporary containers
self.temp_row = []
batch_list = [None] * self.batch
示例14: get_feature_data
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import map_async [as 别名]
def get_feature_data(args):
bucket = connect_s3().get_bucket('nlp-data')
widlines = bucket.get_key('datafiles/topwams.txt').get_contents_as_string().split("\n")
wids = filter(lambda x: x, widlines)[:args.num_wikis]
log("Loading entities and heads for ", len(wids), "wikis")
pool = Pool(processes=args.num_processes)
r = pool.map_async(get_data, wids)
r.wait()
wiki_data = defaultdict(dict, r.get())
log("Getting data from API")
widstrings = [','.join(wids[i:i+20]) for i in range(0, len(wids), 20)]
r = pool.map_async(get_wiki_data_from_api, widstrings)
for grouping in r.get():
if type(grouping) != dict:
continue
for wiki_id, api_data in grouping.items():
wiki_data[wiki_id]['api_data'] = api_data
log("Turning data into features")
wiki_ids, data_dicts = zip(*wiki_data.items())
log("Working on", len(wiki_ids))
r = pool.map_async(data_to_features, data_dicts)
r.wait()
wid_to_features = zip(wiki_ids, r.get())
log(len(set([value for _, values in wid_to_features for value in values])), "features")
return dict(wid_to_features)
示例15: run_ntuples
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import map_async [as 别名]
def run_ntuples(analysis, channel, period, samples, loglevel, **kwargs):
'''Run a given analyzer for the analysis'''
logger = logging.getLogger(__name__)
test = kwargs.pop('test',False)
metShift = kwargs.pop('metShift','')
ntup_dir = './ntuples/%s_%iTeV_%s' % (analysis, period, channel)
python_mkdir(ntup_dir)
root_dir, sample_names = get_sample_names(analysis,period,samples,**kwargs)
filelists = {}
for sample in sample_names:
sampledir = '%s/%s' % (root_dir, sample)
filelists[sample] = ['%s/%s' % (sampledir, x) for x in os.listdir(sampledir)]
if len(sample_names)==1 or test: # only one, its a test, dont use map
name = sample_names[0]
outname = "%s/%s.root" % (ntup_dir, name)
if test: outname = 'test.root'
run_analyzer((analysis, channel, name, filelists[name], outname, period, metShift, loglevel))
return 0
p = Pool(8)
try:
p.map_async(run_analyzer, [(analysis, channel, name, filelists[name], "%s/%s.root" % (ntup_dir, name), period, metShift, loglevel) for name in sample_names]).get(999999)
except KeyboardInterrupt:
p.terminate()
logger.info('Analyzer cancelled')
sys.exit(1)
return 0