本文整理汇总了Python中multiprocessing.Pool.join方法的典型用法代码示例。如果您正苦于以下问题:Python Pool.join方法的具体用法?Python Pool.join怎么用?Python Pool.join使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类multiprocessing.Pool
的用法示例。
在下文中一共展示了Pool.join方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: averageSimilarityMatrix
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import join [as 别名]
def averageSimilarityMatrix(dictOfClassesLocal, dictOfWeightsLocal,title="Cluster similarity matrix",savePlot=False):
global bar, progressCount, dictOfClasses, dictOfWeights, arguments, distances
dictOfClasses = dictOfClassesLocal
dictOfWeights = dictOfWeightsLocal
print "Computing similarity matrix..."
bar = progressbar.ProgressBar(maxval=len(dictOfClasses.keys())**2, widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])
bar.start()
progressCount = 0
# Initialise task matrix
arguments = []
distances = []
for i,k in zip(range(len(dictOfClasses.keys())),reversed(sorted(dictOfClasses.keys()))):
arguments.append([])
distances.append([])
for j in sorted(dictOfClasses.keys()):
arguments[i].append((dictOfClasses[k],dictOfClasses[j],dictOfWeights[k],dictOfWeights[j]))
distances[i].append([])
# Distribute tasks
poo = Pool()
for i in range(len(dictOfClasses.keys())**2):
poo.apply_async(interClassDistance,(i,),callback=updateResultMatrix)
poo.close()
poo.join()
bar.finish()
plot.plotSimilarityMatrix(distances,sorted(dictOfClasses.keys()),title,savePlot)
示例2: expand_all_commits
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import join [as 别名]
def expand_all_commits(code_dir, target_dir, only_year=None):
print code_dir
uname_lookup_by_year_q = load_uname_lookup_by_year_q()
latest_submissions = get_latest_submissions(code_dir)
num_students = len(latest_submissions)
def get_commit_args(args):
i, student = args
latest_submit = latest_submissions[student]
student_dir = os.path.join(code_dir, latest_submit)
year_q = get_submit_time(student_dir)
if (not year_q) or only_year != year_q: return (-1,'','',-1,'',-1)
year_target_dir = os.path.join(target_dir, year_q)
if year_q not in uname_lookup_by_year_q or \
latest_submit not in uname_lookup_by_year_q[year_q]:
add_uname_to_lookup(latest_submit, year_q, uname_lookup_by_year_q)
student_id = uname_lookup_by_year_q[year_q][latest_submit]
#if student_id != '2012010247': return (-1,'','',-1,'',-1)
return i, student, student_dir, student_id, year_target_dir, num_students
students = sorted(latest_submissions.keys())
zipped_args = map(get_commit_args, enumerate(students))
non_students = [student for i, student in enumerate(students) if zipped_args[i][0] == -1]
#print "unsuccessful"
#print '\n'.join([latest_submissions[student] for student in non_students])
pool = ThreadPool(8)
results = pool.map(thread_process_commit, zipped_args)
pool.close()
pool.join()
export_uname_lookup_by_year_q(uname_lookup_by_year_q)
示例3: main
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import join [as 别名]
def main():
global output_doc_path
if output_doc_path == '':
output_doc_path = os.path.join(os.path.split(input_doc_path)[0], 'outputTinypng')
if not os.path.exists(output_doc_path):
os.mkdir(output_doc_path)
for parent,dirnames,filenames in os.walk(input_doc_path): #三个参数:分别返回1.父目录 2.所有文件夹名字(不含路径) 3.所有文件名字
for dirname in dirnames: #输出文件夹信息
# print("parent is:" + parent)
# print("dirname is" + dirname)
outDir = os.path.join(output_doc_path,os.path.relpath(os.path.join(parent,dirname),input_doc_path))
if not os.path.exists(outDir):
os.mkdir(outDir)
for filename in filenames: #输出文件信息
# print("parent is:" + parent)
# print("filename is:" + filename)
filePaths.append(os.path.join(parent,filename))
pngFilePaths = filter(lambda x:os.path.splitext(x)[1]=='.png' or os.path.splitext(x)[1]=='.jpg',filePaths)
print('Parent process %s.' % os.getpid())
p = Pool(poolLimite)
for fileName in pngFilePaths:
p.apply_async(getTinyPng, args=(fileName,))
print('Waiting for all subprocesses done...')
p.close()
p.join()
print('All subprocesses done.')
示例4: getData
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import join [as 别名]
def getData():
if os.path.isfile("chat_urls.p"):
chat_urls = pickle.load( open( "chat_urls.p", "rb" ) )
else:
chat_urls = {}
for user in users:
chat_urls[user] = get_urls(user)
teams_url = "http://espn.go.com/mlb/teams"
pickle.dump( chat_urls, open( "chat_urls.p", "wb" ) )
# for user in chat_urls:
# urls = chat_urls[user]
# for url in urls:
# getLog(url)
logDB = {}
for user in chat_urls:
logDB[user] = {}
p = Pool(20)
i=0
manager = Manager()
db = manager.dict()
for user in chat_urls:
for url in chat_urls[user]:
i+=1
p.apply_async(addLogData, args=(url,db))
p.close()
p.join()
out = db._getvalue()
outfile = open("rawChat.txt","wb")
for url in out:
outfile.write(out[url]+"\n")
示例5: calcRawScores
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import join [as 别名]
def calcRawScores(fastaFilePath,numThreads,geneNames,gapOpen, gapExtend, matrix, scoresO):
'''Get a global alignment based raw score for every edge in scoresO.'''
# load sequences
protFnL=glob.glob(fastaFilePath)
seqD=genomes.loadProt(protFnL)
# make list of sets of arguments to be passed to p.map. There
# should be numThreads sets.
argumentL = [([],seqD,gapOpen, gapExtend, matrix) for i in range(numThreads)]
i=0
for g1,g2 in scoresO.iterateEdgesByEndNodes():
edgeNum = scoresO.endNodesToEdge(g1,g2)
edgeT = edgeNum,geneNames.numToName(g1),geneNames.numToName(g2)
argumentL[i%numThreads][0].append(edgeT)
i+=1
# run
p=Pool(numThreads)
scoresLL = p.map(rawScoreGroup, argumentL)
p.close()
p.join()
# store in scoresO
for scoresL in scoresLL:
for edgeNum,sc in scoresL:
scoresO.addScoreByEdge(edgeNum,sc,'rawSc')
return scoresO
示例6: train_word2id
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import join [as 别名]
def train_word2id():
"""把训练集的所有词转成对应的id。"""
time0 = time.time()
print('Processing train data.')
df_train = pd.read_csv('../raw_data/question_train_set.txt', sep='\t', usecols=[0, 2, 4],
names=['question_id', 'word_title', 'word_content'], dtype={'question_id': object})
print('training question number %d ' % len(df_train))
# 没有 content 的问题用 title 来替换
na_content_indexs = list()
for i in tqdm(xrange(len(df_train))):
word_content = df_train.word_content.values[i]
if type(word_content) is float:
na_content_indexs.append(i)
print('There are %d train questions without content.' % len(na_content_indexs))
for na_index in tqdm(na_content_indexs):
df_train.at[na_index, 'word_content'] = df_train.at[na_index, 'word_title']
# 没有 title 的问题, 丢弃
na_title_indexs = list()
for i in xrange(len(df_train)):
word_title = df_train.word_title.values[i]
if type(word_title) is float:
na_title_indexs.append(i)
print('There are %d train questions without title.' % len(na_title_indexs))
df_train = df_train.drop(na_title_indexs)
print('After dropping, training question number(should be 2999952) = %d' % len(df_train))
# 转为 id 形式
p = Pool()
train_title = np.asarray(p.map(get_id4words, df_train.word_title.values))
np.save('../data/wd_train_title.npy', train_title)
train_content = np.asarray(p.map(get_id4words, df_train.word_content.values))
np.save('../data/wd_train_content.npy', train_content)
p.close()
p.join()
print('Finished changing the training words to ids. Costed time %g s' % (time.time() - time0))
示例7: dirImgProcess
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import join [as 别名]
def dirImgProcess(path):
global workerPool, workerOutput, theGreatIndex
workerPool = Pool()
workerOutput = []
work = []
theGreatIndex = {}
pagenumber = 0
for (dirpath, dirnames, filenames) in os.walk(path):
for afile in filenames:
if getImageFileName(afile) is not None:
pagenumber += 1
work.append([afile, dirpath, options])
if GUI:
GUI.progressBarTick.emit(str(pagenumber))
if len(work) > 0:
for i in work:
workerPool.apply_async(func=fileImgProcess, args=(i, ), callback=fileImgProcess_tick)
workerPool.close()
workerPool.join()
if GUI and not GUI.conversionAlive:
rmtree(os.path.join(path, '..', '..'), True)
raise UserWarning("Conversion interrupted.")
if len(workerOutput) > 0:
rmtree(os.path.join(path, '..', '..'), True)
raise RuntimeError("One of workers crashed. Cause: " + workerOutput[0])
else:
rmtree(os.path.join(path, '..', '..'), True)
raise UserWarning("Source directory is empty.")
示例8: enumerate_all_subgraphs_upto_size_k_parallel
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import join [as 别名]
def enumerate_all_subgraphs_upto_size_k_parallel(document_graph, k, num_of_workers=4):
"""
returns all subgraphs of a DiscourseDocumentGraph (i.e. a MultiDiGraph)
with up to k nodes. This is a trivially parallelized version of
enumerate_all_subgraphs_upto_size_k()
"""
document_nodes = len(document_graph)
if k > document_nodes:
k = document_nodes
int_graph = nx.convert_node_labels_to_integers(nx.DiGraph(document_graph),
first_label=1,
label_attribute='node_id')
pool = Pool(processes=num_of_workers) # number of CPUs
results = [pool.apply_async(enumerate_all_size_k_subgraphs, args=(int_graph, i))
for i in xrange(1, k+1)]
pool.close()
pool.join()
subgraphs = []
for result in results:
tmp_result = result.get()
if isinstance(tmp_result, list):
subgraphs.extend(tmp_result)
else:
subgraphs.append(tmp_result)
return subgraphs
示例9: MultiProcessScheduler
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import join [as 别名]
class MultiProcessScheduler(LocalScheduler):
def __init__(self, threads):
LocalScheduler.__init__(self)
self.threads = threads
self.tasks = {}
from multiprocessing import Pool
self.pool = Pool(self.threads or 2)
def start(self):
pass
def submitTasks(self, tasks):
def callback(args):
logger.debug("got answer: %s", args)
tid, reason, result, update = args
task = self.tasks.pop(tid)
self.taskEnded(task, reason, result, update)
for task in tasks:
logger.debug("put task async: %s", task)
self.tasks[task.id] = task
self.pool.apply_async(run_task_in_process,
[task, self.nextAttempId(), env.environ],
callback=callback)
def stop(self):
self.pool.terminate()
self.pool.join()
logger.debug("process pool stopped")
示例10: worker
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import join [as 别名]
def worker(self, db, lista):
'''
Metodo per eseguire il processo di ricerca dei plugin in multithread
Multithread method for online search
'''
# Make the Pool of workers
processes = 20
#WARNING: con la fibra posso arrivare a 20 senza errori, con adsl massimo 4 worker!
pool = Pool(processes)
# Open the urls in their own threads and return the results
pluglist = pool.map(onlinePluginSearch, lista)
#close the pool and wait for the work to finish
pool.close()
pool.join()
#parsa il risultato (lista con tuple) e metti tutto in una stringa (result) e aggiorna cache
result = ''
for item in pluglist:
if item[1] !=[]:
for plug in item[1]:
db.updateCache(item[0], plug)
result = result + plug + ','
numbers = result.count(',') + 1
print("Number of available pflugins: %s" % numbers)
print("Adding to policy plugins: 19506,10287,12634 for credential checks and ping target.")
result = result + "19506,10287,12634"
#aggiungo sempre questi 3 plug-in per verificare se il target e' alive
return result
示例11: get_classify
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import join [as 别名]
def get_classify():
classify = {"type1": "美食", "type2": "休闲娱乐", "type4-sub_type1": "酒店", "type5": "购物", "type6": "办卡送礼",
"type7": "旅游", "type9": "汽车", "type10": "时尚丽人", "type11": "生活服务",
"type4-sub_type2": "出行", "type4-sub_type3": "出行"}
for name in classify:
total_num_of_page = get_num_of_page('http://www.rong360.com/credit/f-youhui-' + name)
print classify[name] + ": " + str(total_num_of_page)
thread_num = 20 # num of process
section_size = 50
section = total_num_of_page / section_size
if total_num_of_page % section_size > 0:
section += 1
for k in range(section):
begin = k * section_size + 1
end = begin + section_size - 1
end = min(end, total_num_of_page)
print "start to get summary pages from " + str(begin) + " to " + str(end) + \
", each summary page contains 20 detail content pages."
manager = multiprocessing.Manager()
queue = manager.Queue() # a queue storing index of url
queue.put(begin - 1) # Initialization of url index
page_queue = manager.Queue() # a queue storing end of urls
# start multiprocess to get urls
pool = Pool(thread_num)
for i in range(thread_num):
pool.apply_async(get_page_url, args=(queue, end, page_queue, '-' + name))
pool.close()
pool.join()
print 'num of total pages: ' + str(page_queue.qsize())
store_data.insert_column("classify", classify[name], page_queue)
示例12: datasetToNumpy
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import join [as 别名]
def datasetToNumpy(dataset,sliceSel=None,chunksize=1):
size = dataset.size
n = dataset.shape[0]
if (sliceSel is None): sliceSel = slice(0,n,1)
isMultiProcessUseful = size> (1024*1024)
if (chunksize > 1) and isMultiProcessUseful:
# subdivide indices in chunksize
start,stop,step = sliceSel.indices(n)
nC = int(float(stop-start)/step/chunksize+0.5)
print(nC)
args = []
for i in range(nC):
s1 = start+i*(chunksize*step)
s2 = start+(i+1)*(chunksize*step)
print(i,s1,s2)
args.append( (dataset,slice(s1,s2,step) ) )
print(args)
raw_input("Not working yet, use chunksize = 1")
p = Pool(); # 16-43 ms overhead
res = p.map_async(f,args,chunksize=1)
p.close()
p.join()
data = np.asarray(res.get())
else:
data = dataset[sliceSel]
return data
示例13: main
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import join [as 别名]
def main(args):
"""
Main function
:param args: argparse dict
:return: None
"""
print "Start reindexing from {0} to {1} with batch size of {2} and {3} worker processes".format(
args.source_index, args.destination_index, args.batch_size, args.processes
)
client = Elasticsearch()
print "connected to elastic search at http://localhost:9200"
docs = scan(client, index=args.source_index)
count = 0
queue = Queue(args.batch_size) # don't fill up queue too much
pool = Pool(args.processes, worker_main, (queue, args.source_index, args.destination_index, args.batch_size))
for doc in docs:
count += 1
if count % args.batch_size == 0:
print "put {0}".format(count)
queue.put(doc, True)
print "put {0}".format(count)
# send stop messages
for i in range(args.processes):
queue.put(Stop, True)
pool.close()
pool.join()
示例14: multiprocessing
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import join [as 别名]
def multiprocessing(self, map_func, callback=None):
"""
Pass a function to perform on each volume of the feature reader, using
multiprocessing (map), then process the combined outputs (reduce).
map_func
Function to run on each individual volume. Takes as input a tuple
containing a feature_reader and volume path, from which a volume can be
created. Returns a (key, value) tuple.
def do_something_on_vol(args):
fr, path = args
vol = fr.create_volume(path)
# Do something with 'vol'
return (key, value)
"""
# Match process count to cpu count
p = Pool()
# f = self._wrap_func(func)
results = p.map(map_func, self._mp_paths(), chunksize=5)
# , callback=callback)
p.close()
p.join()
return results
示例15: multi_remote_exec_cmd
# 需要导入模块: from multiprocessing import Pool [as 别名]
# 或者: from multiprocessing.Pool import join [as 别名]
def multi_remote_exec_cmd(hosts, username, cmd):
pool = Pool(processes=5)
for host in hosts:
username, password, ip, port = get_connect_item(username, host)
pool.apply_async(remote_exec_cmd, (ip, port, username, password, cmd))
pool.close()
pool.join()