当前位置: 首页>>代码示例>>Python>>正文


Python Queue.task_done方法代码示例

本文整理汇总了Python中tornado.queues.Queue.task_done方法的典型用法代码示例。如果您正苦于以下问题:Python Queue.task_done方法的具体用法?Python Queue.task_done怎么用?Python Queue.task_done使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在tornado.queues.Queue的用法示例。


在下文中一共展示了Queue.task_done方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: TornadoQuerierBase

# 需要导入模块: from tornado.queues import Queue [as 别名]
# 或者: from tornado.queues.Queue import task_done [as 别名]
class TornadoQuerierBase(object):

    def __init__(self):
        self.tasks = TornadoQueue()

    def gen_task(self):
        raise NotImplementError()

    def run_task(self, task):
        raise NotImplementError()

    def prepare(self):
        self.running = True

    def cleanup(self):
        self.running = False

    @coroutine
    def run_worker(self, worker_id, f):
        while self.tasks.qsize() > 0:
            task = yield self.tasks.get()
            LOG.debug('worker[%d]: current task is %s' % (worker_id, task))
            try:
                yield f(task)
                pass
            except Exception as e:
                LOG.warning(str(e))
            finally:
                self.tasks.task_done()
                task = None
        LOG.debug('worker[%d]: all tasks done %s' % (worker_id, self.tasks))

    @coroutine
    def start(self, num_workers=1):

        self.prepare()

        # add tasks
        tasks = yield self.gen_task()
        for task in tasks:
            yield self.tasks.put(task)

        # start shoot workers
        for worker_id in range(num_workers):
            LOG.debug('starting worker %d' % worker_id)
            self.run_worker(worker_id, self.run_task)

        yield self.tasks.join()
        self.cleanup()
开发者ID:jianingy,项目名称:watchgang,代码行数:51,代码来源:libwatcher.py

示例2: TopicAppllication

# 需要导入模块: from tornado.queues import Queue [as 别名]
# 或者: from tornado.queues.Queue import task_done [as 别名]
class TopicAppllication(tornado.web.Application):

    def __init__(self):
        handlers = [
            url(r'/', MainHandler)
        ]
        self.queue = Queue(maxsize=10)
        super(TopicAppllication, self).__init__(handlers=handlers, debug=True)

    @gen.coroutine
    def consumer(self):
        item = yield self.queue.get()
        try:
            print item
        finally:
            self.queue.task_done()
开发者ID:DashShen,项目名称:Journey,代码行数:18,代码来源:tornao_send.py

示例3: CommandQueue

# 需要导入模块: from tornado.queues import Queue [as 别名]
# 或者: from tornado.queues.Queue import task_done [as 别名]
class CommandQueue():
    def __init__(self):
        self.queue = Queue()

    @gen.coroutine
    def process_command(self):
        while True:
            item = yield self.queue.get()
            try:
                yield gen.sleep(0.1)
                command, view = item
                view.write_message({command[0]: command[1]})
            finally:
                self.queue.task_done()

    def put(self, item):
        self.queue.put(item)
开发者ID:jbenua,项目名称:Flashlight,代码行数:19,代码来源:command_worker.py

示例4: StreamClient

# 需要导入模块: from tornado.queues import Queue [as 别名]
# 或者: from tornado.queues.Queue import task_done [as 别名]
class StreamClient(object):
    MAX_SIZE = 60

    def __init__(self, steam_id):
        self.id = generate_id()
        self.stream_id = steam_id
        self.queue = Queue(StreamClient.MAX_SIZE)

    @coroutine
    def send(self, item):
        yield self.queue.put(item)

    @coroutine
    def fetch(self):
        item = yield self.queue.get()
        self.queue.task_done()
        return item

    def empty(self):
        return self.queue.qsize() == 0
开发者ID:AlexPereverzyev,项目名称:html5stream,代码行数:22,代码来源:stream_client.py

示例5: BatchedStream

# 需要导入模块: from tornado.queues import Queue [as 别名]
# 或者: from tornado.queues.Queue import task_done [as 别名]
class BatchedStream(object):
    """ Mostly obsolete, see BatchedSend """

    def __init__(self, stream, interval):
        self.stream = stream
        self.interval = interval / 1000.0
        self.last_transmission = default_timer()
        self.send_q = Queue()
        self.recv_q = Queue()
        self._background_send_coroutine = self._background_send()
        self._background_recv_coroutine = self._background_recv()
        self._broken = None

        self.pc = PeriodicCallback(lambda: None, 100)
        self.pc.start()

    @gen.coroutine
    def _background_send(self):
        with log_errors():
            while True:
                msg = yield self.send_q.get()
                if msg == "close":
                    break
                msgs = [msg]
                now = default_timer()
                wait_time = self.last_transmission + self.interval - now
                if wait_time > 0:
                    yield gen.sleep(wait_time)
                while not self.send_q.empty():
                    msgs.append(self.send_q.get_nowait())

                try:
                    yield write(self.stream, msgs)
                except StreamClosedError:
                    self.recv_q.put_nowait("close")
                    self._broken = True
                    break

                if len(msgs) > 1:
                    logger.debug("Batched messages: %d", len(msgs))
                for _ in msgs:
                    self.send_q.task_done()

    @gen.coroutine
    def _background_recv(self):
        with log_errors():
            while True:
                try:
                    msgs = yield read(self.stream)
                except StreamClosedError:
                    self.recv_q.put_nowait("close")
                    self.send_q.put_nowait("close")
                    self._broken = True
                    break
                assert isinstance(msgs, list)
                if len(msgs) > 1:
                    logger.debug("Batched messages: %d", len(msgs))
                for msg in msgs:
                    self.recv_q.put_nowait(msg)

    @gen.coroutine
    def flush(self):
        yield self.send_q.join()

    @gen.coroutine
    def send(self, msg):
        if self._broken:
            raise StreamClosedError("Batch Stream is Closed")
        else:
            self.send_q.put_nowait(msg)

    @gen.coroutine
    def recv(self):
        result = yield self.recv_q.get()
        if result == "close":
            raise StreamClosedError("Batched Stream is Closed")
        else:
            raise gen.Return(result)

    @gen.coroutine
    def close(self):
        yield self.flush()
        raise gen.Return(self.stream.close())

    def closed(self):
        return self.stream.closed()
开发者ID:broxtronix,项目名称:distributed,代码行数:88,代码来源:batched.py

示例6: BlogBackup

# 需要导入模块: from tornado.queues import Queue [as 别名]
# 或者: from tornado.queues.Queue import task_done [as 别名]

#.........这里部分代码省略.........
            self._generate_save_dir()

    def _get_user_cookies(self):
        url = target_url + login_page_path
        self.driver.get(url)
        try:
            user_input = self.driver.find_element_by_name('mail')
            passwd_input = self.driver.find_element_by_name('password')
            submit_btn = self.driver.find_element_by_class_name('pr20')
        except NoSuchElementException:
            raise PageHtmlChanged(
                "%s login page structure have changed!" % _domain)

        user_input.send_keys(self.username)
        passwd_input.send_keys(self.passwd)
        submit_btn.click()
        try:
            WebDriverWait(self.driver, 3).until(staleness_of(submit_btn))
        except TimeoutException:
            raise Exception("Wrong username or password!")

        WebDriverWait(self.driver, timeout=10).until(has_page_load)
        try_times = 0
        while True:
            time.sleep(1)
            if url != self.driver.current_url:
                return self.driver.get_cookies()

            try_times += 1
            if try_times > 10:
                raise Exception("Getting cookie info failed!")

    def _get_driver(self):
        if self.phantomjs_path:
            try:
                return webdriver.PhantomJS(
                    executable_path=self.phantomjs_path,
                    service_log_path=os.path.devnull)
            except WebDriverException:
                raise PhantomjsPathError("Phantomjs locate path invalid!")
        else:
            return webdriver.PhantomJS(service_log_path=os.path.devnull)

    def __init__(self, **conf):
        self.username = conf['username']
        self.passwd = conf['passwd']
        self.phantomjs_path = conf.get('phantomjs_path')
        self.save_path = conf.get('save_path')
        self._q = Queue()

        self._parse_save_path()
        self.driver = self._get_driver()
        self._cookies = self._get_user_cookies()

    @gen.coroutine
    def run(self):
        self.__filter_cookies()

        start_url = target_url + blog_path
        yield self._fetch_blog_list_page(start_url)
        for _ in xrange(cpu_count()):
            self._fetch_essay_content()

        yield self._q.join()

    def __filter_cookies(self):
        self._cookies = {k['name']: k['value'] for k in self._cookies if
                         k['domain'] == _domain}

    @gen.coroutine
    def _fetch_blog_list_page(self, page_link):
        ret = requests.get(page_link, cookies=self._cookies)
        d = pq(ret.text)
        link_elements = d('.stream-list__item > .summary > h2 > a')
        for link in link_elements:
            yield self._q.put(d(link).attr('href'))

        next_ele = d('.pagination li.next a')
        if next_ele:
            next_page_url = target_url + next_ele.attr('href')
            self._fetch_blog_list_page(next_page_url)

    @gen.coroutine
    def _fetch_essay_content(self):
        while True:
            try:
                essay_path = yield self._q.get(timeout=1)
                essay_url = target_url + essay_path + edit_suffix
                ret = requests.get(essay_url, cookies=self._cookies)
                d = pq(ret.text)
                title = d("#myTitle").val()
                content = d("#myEditor").text()
                file_name = title + '.md'
                real_file_name = os.path.join(self.save_path, file_name)
                with open(real_file_name, 'w') as f:
                    f.writelines(content.encode('utf8'))
            except gen.TimeoutError:
                raise gen.Return()
            finally:
                self._q.task_done()
开发者ID:quietin,项目名称:seg_backup_script,代码行数:104,代码来源:backup_with_phantomjs.py

示例7: ProjectGroomer

# 需要导入模块: from tornado.queues import Queue [as 别名]
# 或者: from tornado.queues.Queue import task_done [as 别名]
class ProjectGroomer(object):
  """ Cleans up expired transactions for a project. """
  def __init__(self, project_id, coordinator, zk_client, db_access,
               thread_pool):
    """ Creates a new ProjectGroomer.

    Args:
      project_id: A string specifying a project ID.
      coordinator: A GroomingCoordinator.
      zk_client: A KazooClient.
      db_access: A DatastoreProxy.
      thread_pool: A ThreadPoolExecutor.
    """
    self.project_id = project_id

    self._coordinator = coordinator
    self._zk_client = zk_client
    self._tornado_zk = TornadoKazoo(self._zk_client)
    self._db_access = db_access
    self._thread_pool = thread_pool
    self._project_node = '/appscale/apps/{}'.format(self.project_id)
    self._containers = []
    self._inactive_containers = set()
    self._batch_resolver = BatchResolver(self.project_id, self._db_access)

    self._zk_client.ensure_path(self._project_node)
    self._zk_client.ChildrenWatch(self._project_node, self._update_containers)

    self._txid_manual_offset = 0
    self._offset_node = '/'.join([self._project_node, OFFSET_NODE])
    self._zk_client.DataWatch(self._offset_node, self._update_offset)

    self._stop_event = AsyncEvent()
    self._stopped_event = AsyncEvent()

    # Keeps track of cleanup results for each round of grooming.
    self._txids_cleaned = 0
    self._oldest_valid_tx_time = None

    self._worker_queue = AsyncQueue(maxsize=MAX_CONCURRENCY)
    for _ in range(MAX_CONCURRENCY):
      IOLoop.current().spawn_callback(self._worker)

    IOLoop.current().spawn_callback(self.start)

  @gen.coroutine
  def start(self):
    """ Starts the grooming process until the stop event is set. """
    logger.info('Grooming {}'.format(self.project_id))
    while True:
      if self._stop_event.is_set():
        break

      try:
        yield self._groom_project()
      except Exception:
        # Prevent the grooming loop from stopping if an error is encountered.
        logger.exception(
          'Unexpected error while grooming {}'.format(self.project_id))
        yield gen.sleep(MAX_TX_DURATION)

    self._stopped_event.set()

  @gen.coroutine
  def stop(self):
    """ Stops the grooming process. """
    logger.info('Stopping grooming process for {}'.format(self.project_id))
    self._stop_event.set()
    yield self._stopped_event.wait()

  @gen.coroutine
  def _worker(self):
    """ Processes items in the worker queue. """
    while True:
      tx_path, composite_indexes = yield self._worker_queue.get()
      try:
        tx_time = yield self._resolve_txid(tx_path, composite_indexes)
        if tx_time is None:
          self._txids_cleaned += 1

        if tx_time is not None and tx_time < self._oldest_valid_tx_time:
          self._oldest_valid_tx_time = tx_time
      finally:
        self._worker_queue.task_done()

  def _update_offset(self, new_offset, _):
    """ Watches for updates to the manual offset node.

    Args:
      new_offset: A string specifying the new manual offset.
    """
    self._txid_manual_offset = int(new_offset or 0)

  def _update_containers(self, nodes):
    """ Updates the list of active txid containers.

    Args:
      nodes: A list of strings specifying ZooKeeper nodes.
    """
    counters = [int(node[len(CONTAINER_PREFIX):] or 1)
#.........这里部分代码省略.........
开发者ID:cdonati,项目名称:appscale,代码行数:103,代码来源:transaction_groomer.py

示例8: __init__

# 需要导入模块: from tornado.queues import Queue [as 别名]
# 或者: from tornado.queues.Queue import task_done [as 别名]
class Model:
  def __init__(self, config_file):
    self.lock = locks.Lock()
    self.classification_queue = Queue()

    print('loading config %s' % config_file, file=log.v5)
    # Load and setup config
    try:
      self.config = Config.Config()
      self.config.load_file(config_file)
      self.pause_after_first_seq = self.config.float('pause_after_first_seq', 0.2)
      self.batch_size = self.config.int('batch_size', 5000)
      self.max_seqs = self.config.int('max_seqs', -1)
    except Exception:
      print('Error: loading config %s failed' % config_file, file=log.v1)
      raise

    try:
      self.devices = self._init_devices()
    except Exception:
      print('Error: Loading devices for config %s failed' % config_file, file=log.v1)
      raise

    print('Starting engine for config %s' % config_file, file=log.v5)
    self.engine = Engine.Engine(self.devices)
    try:
      self.engine.init_network_from_config(config=self.config)
    except Exception:
      print('Error: Loading network for config %s failed' % config_file, file=log.v1)
      raise

    IOLoop.current().spawn_callback(self.classify_in_background)

    self.last_used = datetime.datetime.now()

  def _init_devices(self):
    """
    Initiates the required devices for a config. Same as the funtion initDevices in
    rnn.py.
    :param config:
    :return: A list with the devices used.
    """
    oldDeviceConfig = ",".join(self.config.list('device', ['default']))
    if "device" in TheanoFlags:
      # This is important because Theano likely already has initialized that device.
      config.set("device", TheanoFlags["device"])
      print("Devices: Use %s via THEANO_FLAGS instead of %s." % (TheanoFlags["device"], oldDeviceConfig), file=log.v4)
    devArgs = get_devices_init_args(self.config)
    assert len(devArgs) > 0
    devices = [Device(**kwargs) for kwargs in devArgs]
    for device in devices:
      while not device.initialized:
        time.sleep(0.25)
    if devices[0].blocking:
      print("Devices: Used in blocking / single proc mode.", file=log.v4)
    else:
      print("Devices: Used in multiprocessing mode.", file=log.v4)
    return devices

  @tornado.gen.coroutine
  def classify_in_background(self):
    while True:
      requests = []
      # fetch first request
      r = yield self.classification_queue.get()
      requests.append(r)
      # grab all other waiting requests
      try:
        while True:
          requests.append(self.classification_queue.get_nowait())
      except QueueEmpty:
        pass

      output_dim = {}
      # Do dataset creation and classification.
      dataset = StaticDataset(data=[r.data for r in requests], output_dim=output_dim)
      dataset.init_seq_order()
      batches = dataset.generate_batches(recurrent_net=self.engine.network.recurrent,
                                         batch_size=self.batch_size, max_seqs=self.max_seqs)

      with (yield self.lock.acquire()):
        ctt = ForwardTaskThread(self.engine.network, self.devices, dataset, batches)
        yield ctt.join()

      try:
        for i in range(dataset.num_seqs):
          requests[i].future.set_result(ctt.result[i])
          self.classification_queue.task_done()
      except Exception as e:
        print('exception', e)
        raise

  @tornado.gen.coroutine
  def classify(self, data):
    self.last_used = datetime.datetime.now()
    request = ClassificationRequest(data)

    yield self.classification_queue.put(request)
    yield request.future

#.........这里部分代码省略.........
开发者ID:rwth-i6,项目名称:returnn,代码行数:103,代码来源:Server.py

示例9: SubscribeListener

# 需要导入模块: from tornado.queues import Queue [as 别名]
# 或者: from tornado.queues.Queue import task_done [as 别名]
class SubscribeListener(SubscribeCallback):
    def __init__(self):
        self.connected = False
        self.connected_event = Event()
        self.disconnected_event = Event()
        self.presence_queue = Queue()
        self.message_queue = Queue()
        self.error_queue = Queue()

    def status(self, pubnub, status):
        if utils.is_subscribed_event(status) and not self.connected_event.is_set():
            self.connected_event.set()
        elif utils.is_unsubscribed_event(status) and not self.disconnected_event.is_set():
            self.disconnected_event.set()
        elif status.is_error():
            self.error_queue.put_nowait(status.error_data.exception)

    def message(self, pubnub, message):
        self.message_queue.put(message)

    def presence(self, pubnub, presence):
        self.presence_queue.put(presence)

    @tornado.gen.coroutine
    def _wait_for(self, coro):
        error = self.error_queue.get()
        wi = tornado.gen.WaitIterator(coro, error)

        while not wi.done():
            result = yield wi.next()

            if wi.current_future == coro:
                raise gen.Return(result)
            elif wi.current_future == error:
                raise result
            else:
                raise Exception("Unexpected future resolved: %s" % str(wi.current_future))

    @tornado.gen.coroutine
    def wait_for_connect(self):
        if not self.connected_event.is_set():
            yield self._wait_for(self.connected_event.wait())
        else:
            raise Exception("instance is already connected")

    @tornado.gen.coroutine
    def wait_for_disconnect(self):
        if not self.disconnected_event.is_set():
            yield self._wait_for(self.disconnected_event.wait())
        else:
            raise Exception("instance is already disconnected")

    @tornado.gen.coroutine
    def wait_for_message_on(self, *channel_names):
        channel_names = list(channel_names)
        while True:
            try: # NOQA
                env = yield self._wait_for(self.message_queue.get())
                if env.channel in channel_names:
                    raise tornado.gen.Return(env)
                else:
                    continue
            finally:
                self.message_queue.task_done()

    @tornado.gen.coroutine
    def wait_for_presence_on(self, *channel_names):
        channel_names = list(channel_names)
        while True:
            try:
                try:
                    env = yield self._wait_for(self.presence_queue.get())
                except: # NOQA E722 pylint: disable=W0702
                    break
                if env.channel in channel_names:
                    raise tornado.gen.Return(env)
                else:
                    continue
            finally:
                self.presence_queue.task_done()
开发者ID:pubnub,项目名称:python,代码行数:82,代码来源:pubnub_tornado.py

示例10: BlogBackup

# 需要导入模块: from tornado.queues import Queue [as 别名]
# 或者: from tornado.queues.Queue import task_done [as 别名]
class BlogBackup(object):
    _default_dir_name = "seg_blog_backup"

    def _generate_save_dir(self):
        cur_dir = os.path.dirname(__file__)
        self.save_path = os.path.join(cur_dir, self._default_dir_name)
        if not os.path.isdir(self.save_path):
            os.mkdir(self.save_path)

    def _parse_save_path(self):
        if self.save_path:
            if os.path.exists(self.save_path) and os.path.isdir(self.save_path):
                return
            else:
                raise BlogSavePathError("'%s' not exists or is not dir!" % self.save_path)
        else:
            self._generate_save_dir()

    @staticmethod
    def parse_token_from_html(content):
        overall_pat = re.compile(r"SF.token =.*?,\s+_\w+ = [\d,\[\]]+;", re.DOTALL)
        overall_res = overall_pat.search(content)
        if overall_res:
            overall_content = overall_res.group()
            # remove /* */ type annotation
            filter_res = re.sub(r"(/\*[/a-zA-Z\d' ]+\*/)", "", overall_content)
            str_list = re.findall(r"(?<!//)'([a-zA-Z\d]+)'", filter_res, re.DOTALL)
            filter_list = re.findall(r"\[(\d+),(\d+)\]", overall_content)
            ret = "".join(str_list)

            if filter_list:
                for m, n in filter_list:
                    ret = ret[: int(m)] + ret[int(n) :]
            if len(ret) == 32:
                return ret

        raise PageHtmlChanged("website login token has changed")

    def _get_user_cookies(self):
        s = requests.Session()
        s.headers.update(headers)
        rep = s.get(target_url)
        post_url = "%s%s?_=%s" % (target_url, login_api_path, self.parse_token_from_html(rep.text))
        data = {"mail": self.username, "password": self.passwd}
        s.post(post_url, data=data)
        return s.cookies

    def __init__(self, **conf):
        self.username = conf["username"]
        self.passwd = conf["passwd"]
        self.save_path = conf.get("save_path")
        self._q = Queue()
        self._cookies = self._get_user_cookies()
        self._parse_save_path()

    @gen.coroutine
    def run(self):
        start_url = target_url + blog_path
        yield self._fetch_blog_list_page(start_url)
        for _ in xrange(cpu_count()):
            self._fetch_essay_content()

        yield self._q.join()

    @gen.coroutine
    def _fetch_blog_list_page(self, page_link):
        ret = requests.get(page_link, cookies=self._cookies)
        d = pq(ret.text)
        link_elements = d(".stream-list__item > .summary > h2 > a")
        for link in link_elements:
            yield self._q.put(d(link).attr("href"))

        next_ele = d(".pagination li.next a")
        if next_ele:
            next_page_url = target_url + next_ele.attr("href")
            self._fetch_blog_list_page(next_page_url)

    @gen.coroutine
    def _fetch_essay_content(self):
        while True:
            try:
                essay_path = yield self._q.get(timeout=1)
                essay_url = target_url + essay_path + edit_suffix
                ret = requests.get(essay_url, cookies=self._cookies)
                d = pq(ret.text)
                title = d("#myTitle").val()
                content = d("#myEditor").text()
                real_file_name = os.path.join(self.save_path, title + ".md")
                logger.info("is backup essay: %s" % title)
                with open(real_file_name, "w") as f:
                    f.writelines(content.encode("utf8"))
            except gen.TimeoutError:
                raise gen.Return()
            finally:
                self._q.task_done()
开发者ID:quietin,项目名称:seg_backup_script,代码行数:97,代码来源:backup_simple.py

示例11: Scraper

# 需要导入模块: from tornado.queues import Queue [as 别名]
# 或者: from tornado.queues.Queue import task_done [as 别名]
class Scraper():

    def __init__(
                self,
                destinations=None,
                transform=None,
                headers={},
                max_clients=50,
                maxsize=50,
                connect_timeout=1200,
                request_timeout=600,):

        """Instantiate a tornado async http client to do multiple concurrent requests"""

        if None in [destinations, transform]:
            sys.stderr.write('You must pass both collection of URLS and a transform function')
            raise SystemExit

        self.max_clients = max_clients
        self.maxsize = maxsize
        self.connect_timeout = connect_timeout
        self.request_timeout = request_timeout

        AsyncHTTPClient.configure("tornado.simple_httpclient.SimpleAsyncHTTPClient", max_clients=self.max_clients)

        self.http_client = AsyncHTTPClient()
        self.queue = Queue(maxsize=50)
        self.destinations = destinations
        self.transform = transform
        self.headers = headers
        self.read(self.destinations)
        self.get(self.transform, self.headers, self.connect_timeout, self.request_timeout, self.http_client)
        self.loop = ioloop.IOLoop.current()
        self.join_future = self.queue.join()

        def done(future):
            self.loop.stop()

        self.join_future.add_done_callback(done)
        self.loop.start()

    @gen.coroutine
    def read(self, destinations):
        for url in destinations:
            yield self.queue.put(url)

    @gen.coroutine
    def get(self, transform, headers, connect_timeout, request_timeout, http_client):
        while True:
            url = yield self.queue.get()
            try:
                request = HTTPRequest(url,
                                    connect_timeout=connect_timeout,
                                    request_timeout=request_timeout,
                                    method="GET",
                                    headers = headers
                )
            except Exception as e:
                sys.stderr.write('Destination {0} returned error {1}'.format(url, str(e) + '\n'))

            future = self.http_client.fetch(request)

            def done_callback(future):
                body = future.result().body
                url = future.result().effective_url
                transform(body, url=url)
                self.queue.task_done()

            try:
                future.add_done_callback(done_callback)
            except Exception as e:
                sys.stderr.write(str(e))
                queue.put(url)
开发者ID:andres-de-castro,项目名称:scraping,代码行数:75,代码来源:scraper.py

示例12: Client

# 需要导入模块: from tornado.queues import Queue [as 别名]
# 或者: from tornado.queues.Queue import task_done [as 别名]
class Client(object):

    def __init__(self, server, name, stream):
        self.server = server
        self.name = name
        self.rooms = {}
        self.stream = stream
        self.inqueue = Queue(maxsize=QUEUE_SIZE)
        self.outqueue = Queue(maxsize=QUEUE_SIZE)

    @coroutine
    def forwarding(self):
        while True:
            msg = yield self.outqueue.get()
            if msg.command == COMMAND_QUIT:
                for _, room in self.rooms.items():
                    yield room.inqueue.put(msg)
            elif msg.command == COMMAND_JOIN:
                room_name = msg.receiver
                room = self.server.get_room(room_name)
                self.rooms[room_name] = room
                yield room.inqueue.put(msg)
            else:
                room = self.rooms[msg.receiver]
                yield room.inqueue.put(msg)
            self.outqueue.task_done()

    @coroutine
    def response(self):
        global SPEED
        while True:
            msg = yield self.inqueue.get()
            if msg.command == COMMAND_QUIT:
                self.stream.close()
                return
            else:
                response = ("%s %s:%s\n" % (datetime.datetime.now(),
                                            msg.sender.name,
                                            msg.content.decode()))\
                    .encode('utf-8')
                try:
                    SPEED += 1
                    yield self.stream.write(response)
                except Exception as e:
                    logging.debug(str(e))
                    self.stream.close()

    @coroutine
    def receive(self):
        while True:
            try:
                line = yield self.stream.read_until(b'\n')
            except Exception as e:
                logging.debug(str(e))
                msg = Message(self, '', COMMAND_QUIT, 'CONNECTION ERROR')
                yield self.outqueue.put(msg)
                return
            data = line.strip().split(b' ')
            if len(data) != 2:
                continue
            room_name, content = data[0], data[1]
            if room_name in self.rooms:
                msg = Message(self, room_name, COMMAND_NORMAL, content)
            else:
                msg = Message(self, room_name, COMMAND_JOIN, content)
            yield self.outqueue.put(msg)
开发者ID:beef9999,项目名称:go-chatroom,代码行数:68,代码来源:py-server.py

示例13: BaseSpider

# 需要导入模块: from tornado.queues import Queue [as 别名]
# 或者: from tornado.queues.Queue import task_done [as 别名]
class BaseSpider(object):
    url_parser = None

    def __init__(self, engine, concurrent=3):
        self.engine = engine
        self.http = httpclient.AsyncHTTPClient()
        self.queue = Queue()
        self.concurrency = concurrent

    @property
    def hostname(self):
        return self.url_parser.hostname

    @property
    def url_root(self):
        return self.url_parser.url_root

    @property
    def base_url(self):
        return self.url_parser.base_url

    @gen.coroutine
    def __worker(self):
        """Consumes the queue."""
        while True:
            yield self.fetch_url()

    @gen.coroutine
    def crawl(self, description, location):
        """Starts crawling the specified URL."""
        url = self.url_parser(description, location)
        self.queue.put(url)
        self.engine.notify_started(self)
        for _ in range(self.concurrency):
            self.__worker()
        yield self.queue.join()
        self.engine.notify_finished(self)

    @gen.coroutine
    def fetch_url(self):
        """Retrieves a URL from the queue and returns the parsed data."""
        url = yield self.queue.get()
        logger.info('fetching %s' % url)
        try:
            response = yield self.http.fetch(url)
            soup = BeautifulSoup(response.body)
            logger.info('got response %s' % url)

            urls = yield self.fetch_links(response, soup)
            for new_url in urls:
                logger.debug('Added %s to queue' % new_url)
                yield self.queue.put(new_url)

            data = yield self.parse_response(response, soup)
            logger.info('Parsed response for %s' % url)
        except (httpclient.HTTPError, ValueError):
            message = 'HTTP Error: (%s)' % url
            self.engine.write_message(message, self.engine.STATUS_ERROR)
        else:
            self.engine.write_data(data)
        finally:
            self.queue.task_done()

    @gen.coroutine
    def fetch_links(self, response, soup):
        """Fetch URLs to be added to the queue."""
        raise gen.Return([])

    def parse_response(self, response, soup):
        """Extract information from the response, return should be a 
        list of dict's.
        
        Sample dict:
        {
            'title': 'Job Title',
            'company': 'Company Name',
            'location': 'City/State/Country',
            'tags': ['tag1', 'tag2', 'tag3'],
            'category': 'Software Developer',
            'origin': 'Name of the origin website',
            'url': 'Link to the complete job description',
        }
        """
        raise NotImplementedError
开发者ID:winstonf88,项目名称:pyjobs,代码行数:86,代码来源:spider.py


注:本文中的tornado.queues.Queue.task_done方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。