本文整理汇总了Python中multiprocessing.JoinableQueue.qsize方法的典型用法代码示例。如果您正苦于以下问题:Python JoinableQueue.qsize方法的具体用法?Python JoinableQueue.qsize怎么用?Python JoinableQueue.qsize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类multiprocessing.JoinableQueue
的用法示例。
在下文中一共展示了JoinableQueue.qsize方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_urls
# 需要导入模块: from multiprocessing import JoinableQueue [as 别名]
# 或者: from multiprocessing.JoinableQueue import qsize [as 别名]
def load_urls(self, **kwargs):
urls = ["https://www.atagar.com/echo.php"] * 100
urls_queue = Queue()
for url in urls:
filename = os.path.join(self.results_directory, "doc_%03d.txt" % (urls_queue.qsize() + 1))
urls_queue.put((url, filename))
self.urls_queue = urls_queue
self.num_urls = int(urls_queue.qsize())
示例2: queueManager
# 需要导入模块: from multiprocessing import JoinableQueue [as 别名]
# 或者: from multiprocessing.JoinableQueue import qsize [as 别名]
def queueManager(numProc, myList, function, *args):
'''queueManager(numProc, myList, function, *args):
generic function used to start worker processes via the multiprocessing Queue object
numProc - number of processors to use
myList - a list of objects to be iterated over
function - target function
*args - additional arguments to pass to function
Return - an unordered list of the results from myList
'''
qIn = Queue()
qOut = JoinableQueue()
if args:
arguments = (qIn, qOut,) + args
else:
arguments = (qIn, qOut,)
results = []
# reduce processer count if proc count > files
i = 0
for l in myList:
qIn.put((i,l))
i += 1
for _ in range(numProc):
p = Process(target = function, args = arguments).start()
sys.stdout.write("Progress: {:>3}%".format(0)
)
curProgress = 0
lastProgress = 0
while qOut.qsize() < len(myList):
#sys.stdout.write("\b\b\b\b{:>3}%".format(int(ceil(100*qOut.qsize()/len(myList)))))
curProgress = int(ceil(100*qOut.qsize()/len(myList)))
if curProgress - lastProgress > 10:
lastProgress += 10
sys.stdout.write("\nProgress: {:>3}%".format(lastProgress))
sys.stdout.flush()
sys.stdout.write("\nProgress: {:>3}%".format(100))
#sys.stdout.write("\b\b\b\b{:>3}%".format(100))
sys.stdout.write("\n")
for _ in range(len(myList)):
# indicate done results processing
results.append(qOut.get())
qOut.task_done()
#tell child processes to stop
for _ in range(numProc):
qIn.put('STOP')
orderedRes = [None]*len(results)
for i, res in results:
orderedRes[i] = res
qOut.join()
qIn.close()
qOut.close()
return orderedRes
示例3: test_basic
# 需要导入模块: from multiprocessing import JoinableQueue [as 别名]
# 或者: from multiprocessing.JoinableQueue import qsize [as 别名]
def test_basic():
in_queue = JoinableQueue()
algolia_reader = Algoliaio("MyAppID", "MyKey", 1000)
algolia_reader.scan_and_queue(in_queue, p_index="INT_Rubriques",p_query=None, p_connect_timeout=30, p_read_timeout=60)
assert in_queue.qsize() > 2600
示例4: test_basic
# 需要导入模块: from multiprocessing import JoinableQueue [as 别名]
# 或者: from multiprocessing.JoinableQueue import qsize [as 别名]
def test_basic():
in_queue = JoinableQueue()
mongo_reader = Mongoio(p_host='localhost',p_port='27017',p_user='activite',p_password='passactivite',p_base='ACTIVITE',p_rs_xtra_nodes=['localhost:27018','localhost:27019'],p_rs_name='rs0')
mongo_reader.scan_and_queue(in_queue,p_collection='rubriques', p_query={})
assert in_queue.qsize() > 2600
示例5: apply_mt
# 需要导入模块: from multiprocessing import JoinableQueue [as 别名]
# 或者: from multiprocessing.JoinableQueue import qsize [as 别名]
def apply_mt(self, xs, parallelism, **kwargs):
"""Run the UDF multi-threaded using python multiprocessing"""
if snorkel_conn_string.startswith('sqlite'):
raise ValueError('Multiprocessing with SQLite is not supported. Please use a different database backend,'
' such as PostgreSQL.')
# Fill a JoinableQueue with input objects
in_queue = JoinableQueue()
for x in xs:
in_queue.put(x)
# If the UDF has a reduce step, we collect the output of apply in a
# Queue. This is also used to track progress via the the UDF sentinel
out_queue = JoinableQueue()
# Keep track of progress counts
total_count = in_queue.qsize()
count = 0
# Start UDF Processes
for i in range(parallelism):
udf = self.udf_class(in_queue=in_queue, out_queue=out_queue,
add_to_session=(self.reducer is None), **self.udf_init_kwargs)
udf.apply_kwargs = kwargs
self.udfs.append(udf)
# Start the UDF processes, and then join on their completion
for udf in self.udfs:
udf.start()
while any([udf.is_alive() for udf in self.udfs]) and count < total_count:
y = out_queue.get()
# Update progress whenever an item was processed
if y == UDF.TASK_DONE_SENTINEL:
count += 1
if self.pb is not None:
self.pb.update(1)
# If there is a reduce step, do now on this thread
elif self.reducer is not None:
self.reducer.reduce(y, **kwargs)
out_queue.task_done()
else:
raise ValueError("Got non-sentinel output without reducer.")
if self.reducer is None:
for udf in self.udfs:
udf.join()
else:
self.reducer.session.commit()
self.reducer.session.close()
# Flush the processes
self.udfs = []
示例6: test_basic
# 需要导入模块: from multiprocessing import JoinableQueue [as 别名]
# 或者: from multiprocessing.JoinableQueue import qsize [as 别名]
def test_basic():
in_queue = JoinableQueue()
mysql_reader = Mysqlio('localhost','3600','test','root','')
mysql_reader.scan_and_queue(in_queue,"SELECT * FROM swallow")
assert in_queue.qsize() == 3
res = []
while not in_queue.empty():
res.append(in_queue.get())
expected_res = [{'id':1,'libelle':'test'},{'id':2,'libelle':'john'},{'id':3,'libelle':'woo'}]
assert res == expected_res
示例7: corpus_analyser
# 需要导入模块: from multiprocessing import JoinableQueue [as 别名]
# 或者: from multiprocessing.JoinableQueue import qsize [as 别名]
def corpus_analyser(corpus_path, process_count):
print('Scanning corpora...')
file_queue = JoinableQueue()
hearst_dict = Manager().dict()
word_count = Value('i', 0)
for root, subFolders, files in os.walk(corpus_path):
for current_file in files:
if current_file.endswith(".txt"):
file_queue.put(os.path.join(root, current_file))
file_count = file_queue.qsize()
print "{0} files found.\n".format(file_count)
sys.stdout.write("\r0.00%\tWord count: 0")
def worker(process_id):
while not file_queue.empty():
current_path = file_queue.get()
with open(current_path, 'r') as current_file:
data = ' '.join(current_file.read().replace('\n', ' ').split())
data_tokenized = word_tokenize(data)
word_count.value += len(data_tokenized)
data_tagged = pos_tag(data_tokenized)
hearst_patterns(data_tagged, hearst_dict)
percentage = 100.0 - ((float(file_queue.qsize()) / float(file_count)) * 100.0)
sys.stdout.write("\r{0:.2f}%\tWord count: {1}".format(percentage, word_count.value))
sys.stdout.flush()
file_queue.task_done()
for pid in range(process_count):
process = Process(target=worker, args=(pid,))
process.daemon = True
process.start()
file_queue.join()
print "\n"
return hearst_dict
示例8: run
# 需要导入模块: from multiprocessing import JoinableQueue [as 别名]
# 或者: from multiprocessing.JoinableQueue import qsize [as 别名]
def run(this):
Factory.pg("configuration test...")
this.test() # check configuration
source = this.source
fout_name = this.fout_name
func = this.func
fparam = this.fparam
num_workers = this.num_workers
worker = this.worker
progressor = this.progressor
# queue settings
Factory.pg("arranging source elements...")
from multiprocessing import JoinableQueue,Process
in_queue = JoinableQueue()
for item in source:
in_queue.put(item)
# worker progressing
progressor = Process(target=progressor, args=(in_queue, in_queue.qsize()))
import time
start_time = time.time()
progressor.start()
# worker settings
fouts, workers = [], []
for w_id in xrange(num_workers):
fouts.append(open("%s_part%d"%(fout_name,w_id),"w"))
workers.append(Process(target=worker, args=(w_id, in_queue, func, fparam, fouts[w_id])))
workers[w_id].start()
# post processing
in_queue.join()
for w_id in xrange(num_workers):
workers[w_id].terminate()
progressor.terminate()
end_time = time.time()
Factory.pg("working done (%.1fs lapsed)"%(end_time - start_time), br=True)
import os
os.system("cat %s_part* > %s"%(fout_name,fout_name))
os.system("rm -f %s_part*"%(fout_name))
示例9: JoinableQueue
# 需要导入模块: from multiprocessing import JoinableQueue [as 别名]
# 或者: from multiprocessing.JoinableQueue import qsize [as 别名]
shape = image.shape
f = h5py.File('/dev/shm/test.h5','w')
data = f.create_dataset('data', (len(minc_volumes),) + image.shape,
chunks=(CHUNKSIZE,) + image.shape, compression='gzip')
names = f.create_dataset('names', (len(minc_volumes),),
dtype=h5py.special_dtype(vlen=unicode))
vf.close()
q = JoinableQueue()
p = Process(target=enqueue_volumes, args=(q,minc_volumes))
p.start()
while True:
item = q.get(block=True)
q.task_done()
if item is None:
break
i, j, zeros, namelist = item
print 'data[{}:{},:] = zeros[:{},:]'.format(i, i+j+1, j+1)
data[i:i+j+1,:] = zeros[:j+1,:]
print 'names[{}:{}] = namelist'.format(i, i+j+1)
print "qsize:", q.qsize()
names[i:i+j+1] = namelist
f.close()
示例10: main
# 需要导入模块: from multiprocessing import JoinableQueue [as 别名]
# 或者: from multiprocessing.JoinableQueue import qsize [as 别名]
def main(factor = 2):
#E.G: if total cores is 2 , no of processes to be spawned is 2 * factor
files_to_download = JoinableQueue()
result_queue = JoinableQueue()
time_taken = JoinableQueue()
time_taken_to_read_from_queue = JoinableQueue()
with open('downloads.txt', 'r') as f:
for to_download in f:
files_to_download.put_nowait(to_download.split('\n')[0])
files_to_download_size = files_to_download.qsize()
cores = cpu_count()
no_of_processes = cores * factor
for i in xrange(no_of_processes):
files_to_download.put_nowait(None)
jobs = []
start = datetime.datetime.now()
for name in xrange(no_of_processes):
p = Process(target = download, args = (files_to_download, result_queue,\
time_taken, time_taken_to_read_from_queue,name))
p.start()
jobs.append(p)
for job in jobs:
job.join()
print result_queue.qsize()
total_downloaded_urls = 0
try:
while 1:
r = result_queue.get_nowait()
total_downloaded_urls += r
except Empty:
pass
try:
while 1:
"""
locals() keeps track of all variable, functions, class etc.
datetime object is different from int, one cannot perform
0 + datetime.datetime.now(), if when we access the queue which
contains time objects first time, total_time will be set to
first time
"""
if 'total_time' in locals():
total_time += time_taken.get_nowait()
else:
total_time = time_taken.get_nowait()
except Empty:
print("{0} processes on {1} core machine took {2} time to download {3}\
urls".format(no_of_processes, cores, total_time, \
total_downloaded_urls))
try:
while 1:
if 'queue_reading_time' in locals():
queue_reading_time += time_taken_to_read_from_queue.get_nowait()
else:
queue_reading_time = time_taken_to_read_from_queue.get_nowait()
except Empty:
print("{0} processes on {1} core machine took {2} time to read {3}\
urls from queue".format(no_of_processes, cores,queue_reading_time\
,files_to_download_size))
示例11: FindText
# 需要导入模块: from multiprocessing import JoinableQueue [as 别名]
# 或者: from multiprocessing.JoinableQueue import qsize [as 别名]
#.........这里部分代码省略.........
if detected is None:
break
try:
line = str(line, detected, "replace")
except LookupError:
pass
if re_text.match(line) is not None:
result_queue.put(f_path)
# logger.debug("matched file = %s " % f_path)
break
except UnicodeDecodeError as unicode_e:
logger.error("UnicodeDecodeError %s, %s" % (str(unicode_e), traceback.format_exc()))
except IOError as io_e:
logger.error("IOError %s, %s" % (str(io_e), traceback.format_exc()))
except Exception as other_e:
logger.error("Exception %s, %s" % (str(other_e), traceback.format_exc()))
finally:
file_queue.task_done()
else:
time.sleep(REQUEST_DELAY)
try:
self.logger.debug("findText started with timeout = %s" % TIMEOUT_LIMIT)
time_limit = int(time.time()) + TIMEOUT_LIMIT
# Launches a number of worker threads to perform operations using the queue of inputs
for i in range(self.NUM_WORKING_PROCESSES):
p = Process(
target=worker, args=(self.re_text, self.file_queue, self.result_queue, self.logger, time_limit)
)
p.start()
proc = psutil.Process(p.pid)
proc.ionice(psutil.IOPRIO_CLASS_IDLE)
proc.nice(20)
self.logger.debug(
"Search worker #%s, set ionice = idle and nice = 20 for pid %s" % (str(i), str(p.pid))
)
self.processes.append(p)
abs_path = self.get_abs_path(self.path)
self.logger.debug("FM FindText worker run(), abs_path = %s" % abs_path)
if not os.path.exists(abs_path):
raise Exception("Provided path not exist")
self.on_running(self.status_id, pid=self.pid, pname=self.name)
for current, dirs, files in os.walk(abs_path):
for f in files:
try:
file_path = os.path.join(current, f)
self.file_queue.put(file_path)
except UnicodeDecodeError as e:
self.logger.error("UnicodeDecodeError %s, %s" % (str(e), traceback.format_exc()))
except IOError as e:
self.logger.error("IOError %s, %s" % (str(e), traceback.format_exc()))
except Exception as e:
self.logger.error("Exception %s, %s" % (str(e), traceback.format_exc()))
while int(time.time()) <= time_limit:
self.logger.debug(
"file_queue size = %s , empty = %s (timeout: %s/%s)"
% (self.file_queue.qsize(), self.file_queue.empty(), str(int(time.time())), time_limit)
)
if self.file_queue.empty():
self.logger.debug("join() file_queue until workers done jobs")
self.file_queue.join()
break
else:
time.sleep(REQUEST_DELAY)
if int(time.time()) > time_limit:
self.is_alive["status"] = False
for p in self.processes:
try:
self.logger.debug("FM FindText terminate worker process, pid = %s" % p.pid)
kill(p.pid, signal.SIGKILL, self.logger)
except OSError:
self.logger.error("FindText unable to terminate worker process, pid = %s" % p.pid)
if self.is_alive["status"] is True:
while not self.result_queue.empty():
file_path = self.result_queue.get()
self.result.append(self._make_file_info(file_path))
self.on_success(self.status_id, data=self.result, pid=self.pid, pname=self.name)
else:
result = {"error": True, "message": "Operation timeout exceeded", "traceback": ""}
self.on_error(self.status_id, result, pid=self.pid, pname=self.name)
except Exception as e:
result = {"error": True, "message": str(e), "traceback": traceback.format_exc()}
self.on_error(self.status_id, result, pid=self.pid, pname=self.name)
示例12: print
# 需要导入模块: from multiprocessing import JoinableQueue [as 别名]
# 或者: from multiprocessing.JoinableQueue import qsize [as 别名]
print("Getting list of connected research articles")
article_PMIDs = r.smembers('linked_articles')
article_URLs = r.mget(['{0}:URL'.format(PMID) for PMID in article_PMIDs])
uncatalogged = Queue()
for args in zip(article_URLs, article_PMIDs):
uncatalogged.put(args)
if DEBUG:
# Run one process
print("DEBUG: Running single threaded.")
parallel_worker()
else:
print("Starting pool")
NUM_WORKERS = 7
pool = Pool(NUM_WORKERS)
results = [pool.apply_async(parallel_worker) for i in range(NUM_WORKERS)]
print("Running progress capture.")
while (True):
remaining = uncatalogged.qsize()
print "Waiting for", remaining, "tasks to complete..."
time.sleep(0.5)
# print [result.get() for result in results]
uncatalogged.join()
print 'Done'
示例13: JoinableQueue
# 需要导入模块: from multiprocessing import JoinableQueue [as 别名]
# 或者: from multiprocessing.JoinableQueue import qsize [as 别名]
import nltk
import os
import sys
import re
import pickle
nltk.data.path.append(os.path.join(os.path.dirname(__file__), 'nltk'))
file_list = JoinableQueue()
for root, subFolders, files in os.walk(os.path.join(os.path.dirname(__file__), 'corpus', 'plain')):
for current_file in files:
if current_file.endswith(".txt"):
file_list.put(os.path.join(root, current_file))
file_count = file_list.qsize()
def worker():
while not file_list.empty():
input_file = file_list.get()
with open(input_file, 'r') as c_file:
contents = c_file.read()
contents = contents.replace(' {2,}', '')
contents = re.sub('\s{2,}', ' ', contents)
tokens = [word for sent in nltk.sent_tokenize(contents) for word in nltk.word_tokenize(sent)]
pos_tagged = nltk.pos_tag(tokens)
pos_tagged = nltk.ne_chunk(pos_tagged)
示例14: Queue
# 需要导入模块: from multiprocessing import JoinableQueue [as 别名]
# 或者: from multiprocessing.JoinableQueue import qsize [as 别名]
self.outputqueue.put( self.compute(n))
if __name__ == '__main__':
begintime = time.time()
inputqueue = Queue()
outputqueue = Queue()
threads = int(sys.argv[1])
feeder = Feeder(inputqueue,outputqueue, threads)
feeder.start()
duration = time.time() - begintime
print("Feeder started (" + str(duration) + "s)")
for _ in range(0,threads):
processor = Processor(inputqueue,outputqueue)
processor.start()
inputqueue.join()
duration = time.time() - begintime
print("Inputqueue done (" + str(duration) + "s)")
outputqueue.put(None)
print("Outputqueue length (" + str(outputqueue.qsize()) + ")")
feeder.join()
duration = time.time() - begintime
print("Outputqueue done (" + str(duration) + "s)")
示例15: search
# 需要导入模块: from multiprocessing import JoinableQueue [as 别名]
# 或者: from multiprocessing.JoinableQueue import qsize [as 别名]
def search(output_dict, rules_file):
rules = [rule.split(' | ') for rule in pickle.load(open(rules_file, 'rb'))]
file_list = JoinableQueue()
word_dict = Manager().dict()
for root, subFolders, files in os.walk(os.path.join(os.path.dirname(__file__), 'corpus', 'tagged')):
for current_file in files:
if current_file.endswith(".pickle"):
file_list.put(os.path.join(root, current_file))
#break # TODO remove (only for testing with one file)
file_count = file_list.qsize()
def worker():
def rule_parser(tagged_data):
parser = nltk.RegexpParser('''
NP: {<NN|NNS|NNP|NNPS|NE>}
NPs: {<NP> (<,|CC> <NP>)+}
''')
return parser.parse(tagged_data)
def get_nltk_word(data):
if isinstance(data, nltk.tree.Tree):
if isinstance(data[0], tuple):
return data[0][0]
else:
return data[0]
else:
return data[0]
def add_to_dict(hypernym, hyponym):
if not hyponym in word_dict.keys():
old_list = word_dict.get(hypernym)
if not old_list:
old_list = [hyponym]
else:
if not hyponym in old_list:
old_list.append(hyponym)
word_dict[hypernym] = old_list
def apply_rules(data, position):
for rule in rules:
# search right side
if rule[0] == 'HYPERNYM':
possible_hypernym = get_nltk_word(data[position])
error = False
word_count = 1
for word in rule[1:-1]:
try:
if word != get_nltk_word(data[position + word_count]):
error = True
word_count += 1
except IndexError:
pass
try:
if not error:
if isinstance(data[position + word_count], nltk.tree.Tree):
if data[position + word_count].node == 'NP' and rule[-1] == 'NP':
add_to_dict(possible_hypernym, data[position + word_count][0][0])
break
elif data[position + word_count].node == 'NPs' and rule[-1] == 'NPs':
for node in data[position + word_count]:
if isinstance(node, nltk.tree.Tree):
add_to_dict(possible_hypernym, node[0][0])
break
except IndexError:
pass
# search left side
elif rule[-1] == 'HYPERNYM':
possible_hypernym = get_nltk_word(data[position])
error = False
word_count = -1
nrule = list(rule)
nrule.reverse()
for word in nrule[1:-1]:
try:
if word != get_nltk_word(data[position + word_count]):
error = False
word_count -= 1
except IndexError:
pass
try:
if not error:
if isinstance(data[position + word_count], nltk.tree.Tree):
if data[position + word_count].node == 'NP' and rule[-1] == 'NP':
add_to_dict(possible_hypernym, data[position + word_count][0][0])
break
elif data[position + word_count].node == 'NPs' and rule[-1] == 'NPs':
for node in data[position + word_count]:
if isinstance(node, nltk.tree.Tree):
#.........这里部分代码省略.........