本文整理汇总了Python中asyncio.Queue.get方法的典型用法代码示例。如果您正苦于以下问题:Python Queue.get方法的具体用法?Python Queue.get怎么用?Python Queue.get使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类asyncio.Queue
的用法示例。
在下文中一共展示了Queue.get方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: MessageHandler
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import get [as 别名]
class MessageHandler(ws.WS):
def __init__(self):
self.queue = Queue()
def get(self):
return self.queue.get()
def on_message(self, websocket, message):
return self.queue.put(message)
示例2: __init__
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import get [as 别名]
class Listener:
def __init__(self):
self._messages = Queue()
def __call__(self, channel, message):
self._messages.put_nowait((channel, message))
def get(self):
return self._messages.get()
示例3: Message
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import get [as 别名]
class Message(ws.WS):
def __init__(self, loop):
self.queue = Queue(loop=loop)
def get(self):
return self.queue.get()
def on_message(self, websocket, message):
self.queue.put_nowait(message)
示例4: input
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import get [as 别名]
def input(self, fd, dst):
q = Queue()
def cb():
q.put_nowait(os.read(fd, 32))
self.loop.add_reader(fd, cb)
try:
while True:
data = yield from q.get()
if not data:
break
yield from send(dst, BYTES, data)
finally:
self.loop.remove_reader(fd)
示例5: Echo
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import get [as 别名]
class Echo(WS):
def __init__(self, loop=None):
self.queue = Queue(loop=loop)
def get(self):
return self.queue.get()
def on_message(self, ws, message):
self.queue.put_nowait(message)
def on_ping(self, ws, body):
ws.pong(body)
self.queue.put_nowait('PING: %s' % body.decode('utf-8'))
def on_pong(self, ws, body):
self.queue.put_nowait('PONG: %s' % body.decode('utf-8'))
def on_close(self, ws):
self.queue.put_nowait('CLOSE')
示例6: _EventManager
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import get [as 别名]
class _EventManager(object):
def __init__(self):
providers = {}
self.__registration = {}
self.__module_functions = {}
self.__events = Queue()
@coroutine
def handleEvents(self):
while True:
event, args, future = yield from self.__events.get()
logger.debug("Handling event {}".format(event))
for fn, expects in self.__registration[event[0]]:
fire = True
if len(event) - 1 != len(expects):
continue
for i in range(len(event)-1):
ev = event[i+1].lower()
ex = expects[i]
if isinstance(ex, list):
if not any(ev == val.lower() for val in ex):
logger.error("Won't fire")
fire = False
break
else:
if ev.lower() != ex.lower():
fire = False
break
if fire:
logger.debug("Firing event function: {} with {}".format(fn.__name__, args))
ret = fn(event=event, **args)
future.set_result(ret)
@coroutine
def handle_event(self, event, args):
logger.debug('Handling event {}'.format(event))
to_call = []
results = []
for fn, expects in self.__registration[event[0]]:
fire = True
if len(event) -1 != len(expects):
continue
for i in range(len(event)-1):
ev = event[i+1].lower()
ex = expects[i]
if isinstance(ex, list):
if not any(ev == val.lower() for val in ex):
logger.error("Won't fire")
fire = False
break
else:
if ev.lower() != ex.lower():
fire = False
break
if fire:
to_call.append(fn(event=event, **args))
if len(to_call) > 0:
results = yield from gather(*to_call)
return results
def register_class(self, cls):
methods = inspect.getmembers(cls, predicate=inspect.ismethod)
for _, f in methods:
fn = f
event = getattr(fn, '__event__', None)
if event is not None:
logger.debug('Registering {} for {}'.format(fn.__name__, event))
self.register_function(event, fn)
def register_function(self, event, func):
primary = event[0]
expects = []
if len(event) > 1:
expects = event[1:]
if not primary in self.__registration:
self.__registration[primary] = []
self.__registration[primary].append([func, expects])
mod = sys.modules[func.__module__]
if not mod in self.__module_functions:
self.__module_functions[mod] = []
self.__module_functions[mod].append(func)
@coroutine
def fire_event(self, *event, **kwargs):
results = yield from self.handle_event(event, kwargs)
return results
def unregisterModuleFunctions(self, mod):
if not mod in self.__module_functions:
return True
for r in __registration:
self.__registration[r][:] = [i for i,_ in self.__registration[r] if i not in self.__module_functions[mod]]
#.........这里部分代码省略.........
示例7: BrokerProtocolHandler
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import get [as 别名]
class BrokerProtocolHandler(ProtocolHandler):
def __init__(self, plugins_manager: PluginManager, session: Session=None, loop=None):
super().__init__(plugins_manager, session, loop)
self._disconnect_waiter = None
self._pending_subscriptions = Queue(loop=self._loop)
self._pending_unsubscriptions = Queue(loop=self._loop)
@asyncio.coroutine
def start(self):
yield from super().start()
if self._disconnect_waiter is None:
self._disconnect_waiter = futures.Future(loop=self._loop)
@asyncio.coroutine
def stop(self):
yield from super().stop()
if self._disconnect_waiter is not None and not self._disconnect_waiter.done():
self._disconnect_waiter.set_result(None)
@asyncio.coroutine
def wait_disconnect(self):
return (yield from self._disconnect_waiter)
def handle_write_timeout(self):
pass
def handle_read_timeout(self):
if self._disconnect_waiter is not None and not self._disconnect_waiter.done():
self._disconnect_waiter.set_result(None)
@asyncio.coroutine
def handle_disconnect(self, disconnect):
self.logger.debug("Client disconnecting")
if self._disconnect_waiter and not self._disconnect_waiter.done():
self.logger.debug("Setting waiter result to %r" % disconnect)
self._disconnect_waiter.set_result(disconnect)
@asyncio.coroutine
def handle_connection_closed(self):
yield from self.handle_disconnect(None)
@asyncio.coroutine
def handle_connect(self, connect: ConnectPacket):
# Broker handler shouldn't received CONNECT message during messages handling
# as CONNECT messages are managed by the broker on client connection
self.logger.error('%s [MQTT-3.1.0-2] %s : CONNECT message received during messages handling' %
(self.session.client_id, format_client_message(self.session)))
if self._disconnect_waiter is not None and not self._disconnect_waiter.done():
self._disconnect_waiter.set_result(None)
@asyncio.coroutine
def handle_pingreq(self, pingreq: PingReqPacket):
yield from self._send_packet(PingRespPacket.build())
@asyncio.coroutine
def handle_subscribe(self, subscribe: SubscribePacket):
subscription = {'packet_id': subscribe.variable_header.packet_id, 'topics': subscribe.payload.topics}
yield from self._pending_subscriptions.put(subscription)
@asyncio.coroutine
def handle_unsubscribe(self, unsubscribe: UnsubscribePacket):
unsubscription = {'packet_id': unsubscribe.variable_header.packet_id, 'topics': unsubscribe.payload.topics}
yield from self._pending_unsubscriptions.put(unsubscription)
@asyncio.coroutine
def get_next_pending_subscription(self):
subscription = yield from self._pending_subscriptions.get()
return subscription
@asyncio.coroutine
def get_next_pending_unsubscription(self):
unsubscription = yield from self._pending_unsubscriptions.get()
return unsubscription
@asyncio.coroutine
def mqtt_acknowledge_subscription(self, packet_id, return_codes):
suback = SubackPacket.build(packet_id, return_codes)
yield from self._send_packet(suback)
@asyncio.coroutine
def mqtt_acknowledge_unsubscription(self, packet_id):
unsuback = UnsubackPacket.build(packet_id)
yield from self._send_packet(unsuback)
@asyncio.coroutine
def mqtt_connack_authorize(self, authorize: bool):
if authorize:
connack = ConnackPacket.build(self.session.parent, CONNECTION_ACCEPTED)
else:
connack = ConnackPacket.build(self.session.parent, NOT_AUTHORIZED)
yield from self._send_packet(connack)
@classmethod
@asyncio.coroutine
def init_from_connect(cls, reader: ReaderAdapter, writer: WriterAdapter, plugins_manager, loop=None):
"""
:param reader:
:param writer:
:param plugins_manager:
#.........这里部分代码省略.........
示例8: __init__
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import get [as 别名]
class Crawler:
def __init__(self, root_url, max_redirect):
self.max_tasks = 10
self.max_redirect = max_redirect
self.q = Queue()
self.seen_urls = set()
# aiohttp's ClientSession does connection pooling and
# HTTP keep-alives for us.
self.session = aiohttp.ClientSession(loop=loop)
# Put (URL, max_redirect) in the Queue
self.q.put((root_url, self.max_redirect))
@asyncio.coroutine
def crawl(self):
'''Run the crawler untill all work is done.'''
workers = [asyncio.Task(self.work())
for _ in range(self.max_tasks)]
# When all work is done, exit.
yield from self.q.join()
for w in workers:
w.cancel()
@asyncio.coroutine
def work(self):
while True:
url, max_redirect = yield from self.q.get()
# Download page and add new links to self.q
yield from self.fetch(url, max_redirect)
self.q.task_done()
@asyncio.coroutine
def fetch(self, url, max_redirect):
# Handle redirects ourselves.
response = yield from self.session.get(
url, allow_redirects=False)
try:
if is_redirect(response):
if max_redirect > 0:
next_url = response.headers['location']
if next_url in self.seen_urls:
# We have done this before.
return
# Remember we have seen this url.
self.seen_urls.add(next_url)
# Follow the redirect. One less redirect remains.
self.q.put_nowait((next_url, max_redirect -1))
else:
links = yield from self.parse_links(response)
# Python set-logic:
for link in links.difference(self.seen_urls):
self.q.put_nowait((link, self.max_redirect))
self.seen_urls.update(links)
finally:
# Return connection to pool.
yield from response.release()
示例9: Cloner
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import get [as 别名]
class Cloner(object):
def __init__(self, root):
self.visited_urls = []
self.root = self.add_scheme(root)
if len(self.root.host) < 4:
sys.exit('invalid taget {}'.format(self.root.host))
self.target_path = '/opt/snare/pages/{}'.format(self.root.host)
if not os.path.exists(self.target_path):
os.mkdir(self.target_path)
self.new_urls = Queue()
@staticmethod
def add_scheme(url):
if yarl.URL(url).scheme:
new_url = yarl.URL(url)
else:
new_url = yarl.URL('http://' + url)
return new_url
@asyncio.coroutine
def process_link(self, url, check_host=False):
url = yarl.URL(url)
if check_host:
if (url.host != self.root.host or url.fragment
or url in self.visited_urls):
return None
if not url.is_absolute():
url = self.root.join(url)
yield from self.new_urls.put(url)
return url.relative().human_repr()
@asyncio.coroutine
def replace_links(self, data):
soup = BeautifulSoup(data, 'html.parser')
# find all relative links
for link in soup.findAll(href=True):
res = yield from self.process_link(link['href'], check_host=True)
if res is not None:
link['href'] = res
# find all images and scripts
for elem in soup.findAll(src=True):
res = yield from self.process_link(elem['src'])
if res is not None:
elem['src'] = res
# find all action elements
for act_link in soup.findAll(action=True):
res = yield from self.process_link(act_link['action'])
if res is not None:
act_link['action'] = res
# prevent redirects
for redir in soup.findAll(True, attrs={'name': re.compile('redirect.*')}):
redir['value'] = yarl.URL(redir['value']).relative().human_repr()
return soup
@asyncio.coroutine
def get_body(self):
while not self.new_urls.empty():
current_url = yield from self.new_urls.get()
if current_url in self.visited_urls:
continue
self.visited_urls.append(current_url)
if current_url.name:
file_name = current_url.name
elif current_url.raw_path != '/':
file_name = current_url.path.rsplit('/')[1]
else:
file_name = 'index.html'
file_path = os.path.dirname(current_url.path)
if file_path == '/':
file_path = self.target_path
else:
file_path = os.path.join(self.target_path, file_path[1:])
print('path: ', file_path, 'name: ', file_name)
if file_path and not os.path.exists(file_path):
os.makedirs(file_path)
data = None
try:
with aiohttp.Timeout(10.0):
with aiohttp.ClientSession() as session:
response = yield from session.get(current_url)
data = yield from response.read()
except aiohttp.ClientError as client_error:
print(client_error)
else:
response.release()
session.close()
if data is not None:
if re.match(re.compile('.*\.(html|php)'), file_name):
soup = yield from self.replace_links(data)
#.........这里部分代码省略.........
示例10: ProxyResponse
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import get [as 别名]
class ProxyResponse(object):
'''Asynchronous wsgi response.
'''
_started = False
_headers = None
_done = False
def __init__(self, environ, start_response):
self._loop = environ['pulsar.connection']._loop
self.environ = environ
self.start_response = start_response
self.queue = Queue()
def __iter__(self):
while True:
if self._done:
try:
yield self.queue.get_nowait()
except QueueEmpty:
break
else:
yield async(self.queue.get(), loop=self._loop)
def pre_request(self, response, exc=None):
self._started = True
response.bind_event('data_processed', self.data_processed)
return response
def error(self, exc):
if not self._started:
request = wsgi.WsgiRequest(self.environ)
content_type = request.content_types.best_match(
('text/html', 'text/plain'))
uri = self.environ['RAW_URI']
msg = 'Could not find %s' % uri
logger.info(msg=msg)
if content_type == 'text/html':
html = wsgi.HtmlDocument(title=msg)
html.body.append('<h1>%s</h1>' % msg)
data = html.render()
resp = wsgi.WsgiResponse(504, data, content_type='text/html')
elif content_type == 'text/plain':
resp = wsgi.WsgiResponse(504, msg, content_type='text/html')
else:
resp = wsgi.WsgiResponse(504, '')
self.start_response(resp.status, resp.get_headers())
self._done = True
self.queue.put_nowait(resp.content[0])
def data_processed(self, response, exc=None, **kw):
'''Receive data from the requesting HTTP client.'''
status = response.get_status()
if status == '100 Continue':
stream = self.environ.get('wsgi.input') or io.BytesIO()
body = yield stream.read()
response.transport.write(body)
if response.parser.is_headers_complete():
if self._headers is None:
headers = self.remove_hop_headers(response.headers)
self._headers = Headers(headers, kind='server')
# start the response
self.start_response(status, list(self._headers))
body = response.recv_body()
if response.parser.is_message_complete():
self._done = True
self.queue.put_nowait(body)
def remove_hop_headers(self, headers):
for header, value in headers:
if header.lower() not in wsgi.HOP_HEADERS:
yield header, value
示例11: BasePlugin
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import get [as 别名]
class BasePlugin(metaclass=ABCMeta):
'''Core plug-in functionality
A Sphinx plug-in needs to provide a minimim set of services in order to be
useful. Those are defined here, with default implementations where it
makes sense.
'''
# This is a handle to the data bus. It's set when we are registered.
_databus = None
# Type manager handle
_tm = None
def __init__(self, runner, plugins, source = None):
'''Constructor
This is how our plugin pipeline is constructed. Each plugin instance
is created when the input script is read, and they are chained together,
from source to sink, here.
This method _must_ be called with the event loop from which it will be
called in the future, e.g., asyncio.get_event_loop().
'''
# A dict that maps each destination for our data, to the type that the
# destination can consume.
self._sinks = {}
# Retain a pointer to our source, and add ourself to it's list of sinks.
self._source = source
if source:
# Validate that we can process data from this source
sink_types = set(source.sources()).intersection(self.sinks())
if len(sink_types):
source._set_sink(self, sink_types.pop())
else:
err = "{} cannot sink '{}'".format(self, source.sources())
_log.error(err)
raise ImpedenceMismatchError(err)
# Our input queue
self._queue = Queue()
self.runner = runner
self._plugins = plugins
# create_task schedules the execution of the coroutine "run", wrapped
# in a future.
self._task = self.runner.create_task(self.run())
def __getattr__(self, name):
'''Plugin Pipeline Bulding
This method is called when Python can't find a requested attribute. We
use it to create a new plugin instance to add to the pipeline.
'''
if name in self._plugins:
return partial(self._plugins[name], source = self)
else:
raise AttributeError
def _set_sink(self, sink, data_type):
'''Register a sink
Called during initialization to register a sink (destination for our
output).
'''
self._sinks[sink] = data_type
@coroutine
def publish(self, data):
'''Publish data
Called by a plugin to publish data to it's sinks.
'''
for sink, data_type in self._sinks.items():
# Special case 'None', since that's our 'eof'. See the 'done'
# method below.
if data:
data = self.xform_data(data, data_type)
yield from self._databus.publish(data, sink)
@coroutine
def write_data(self, data):
'''Write data to queue
Called by the databus controller to enqueue data from our source.
'''
yield from self._queue.put(data)
@coroutine
def read_data(self):
#.........这里部分代码省略.........
示例12: __init__
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import get [as 别名]
class Messagedispatcher:
def __init__(self, communicator):
self.communicator = communicator
self.messages = {
"direct": {
"status": {
"class": messages.StatusDirect,
"queue": Queue()
},
"pinor": {
"class": messages.PinorDirect,
"queue": Queue()
}
},
"mesh": {
"status": {
"class": messages.StatusMesh,
"queue": Queue()
},
"pinor": {
"class": messages.PinorMesh,
"queue": Queue()
},
"return": {
"class": messages.ReturnMesh,
"queue": Queue()
},
"deploy": {
"class": messages.DeployMesh,
"queue": Queue()
},
"grid": {
"class": messages.GridMesh,
"queue": Queue()
}
}
}
self.mesh_queue = Queue()
@coroutine
def wait_for_message(self, *types):
x = self.messages
for i in types:
x = x[i]
q = x["queue"]
return (yield from q.get())
@coroutine
def get_mesh_message(self):
return (yield from self.mesh_queue.get())
@coroutine
def startup(self):
while True:
meshput = False
msg = yield from self.communicator.receive()
if msg["type"] == "mesh":
meshput = True
x = self.messages
x = x[msg["type"]]
x = x[msg["data"]["datatype"]]
q = x["queue"]
c = x["class"]
emsg = c.from_json(msg)
yield from q.put(emsg)
if meshput:
# print("RECEIVE: " + str(msg) + "\n")
yield from self.mesh_queue.put(emsg)
示例13: __init__
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import get [as 别名]
class Crawler:
"""Crawl a set of URLs.
This manages two sets of URLs: 'urls' and 'done'. 'urls' is a set of
URLs seen, and 'done' is a list of FetchStatistics.
"""
def __init__(self, roots,
exclude=None, strict=True, # What to crawl.
max_redirect=10, max_tries=4, # Per-url limits.
max_tasks=10, *, loop=None):
self.loop = loop or asyncio.get_event_loop()
self.roots = roots
self.exclude = exclude
self.strict = strict
self.max_redirect = max_redirect
self.max_tries = max_tries
self.max_tasks = max_tasks
self.q = Queue(loop=self.loop)
self.seen_urls = set()
self.done = []
self.session = aiohttp.ClientSession(loop=self.loop)
self.root_domains = set()
for root in roots:
parts = urllib.parse.urlparse(root)
host, port = urllib.parse.splitport(parts.netloc)
if not host:
continue
if re.match(r'\A[\d\.]*\Z', host):
self.root_domains.add(host)
else:
host = host.lower()
if self.strict:
self.root_domains.add(host)
else:
self.root_domains.add(lenient_host(host))
for root in roots:
self.add_url(root)
self.t0 = time.time()
self.t1 = None
def close(self):
"""Close resources."""
self.session.close()
def host_okay(self, host):
"""Check if a host should be crawled.
A literal match (after lowercasing) is always good. For hosts
that don't look like IP addresses, some approximate matches
are okay depending on the strict flag.
"""
host = host.lower()
if host in self.root_domains:
return True
if re.match(r'\A[\d\.]*\Z', host):
return False
if self.strict:
return self._host_okay_strictish(host)
else:
return self._host_okay_lenient(host)
def _host_okay_strictish(self, host):
"""Check if a host should be crawled, strict-ish version.
This checks for equality modulo an initial 'www.' component.
"""
host = host[4:] if host.startswith('www.') else 'www.' + host
return host in self.root_domains
def _host_okay_lenient(self, host):
"""Check if a host should be crawled, lenient version.
This compares the last two components of the host.
"""
return lenient_host(host) in self.root_domains
def record_statistic(self, fetch_statistic):
"""Record the FetchStatistic for completed / failed URL."""
self.done.append(fetch_statistic)
@asyncio.coroutine
def parse_links(self, response):
"""Return a FetchStatistic and list of links."""
links = set()
content_type = None
encoding = None
body = yield from response.read()
if response.status == 200:
content_type = response.headers.get('content-type')
pdict = {}
if content_type:
content_type, pdict = cgi.parse_header(content_type)
encoding = pdict.get('charset', 'utf-8')
if content_type in ('text/html', 'application/xml'):
text = yield from response.text()
# Replace href with (?:href|src) to follow image links.
#.........这里部分代码省略.........
示例14: Crawler
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import get [as 别名]
class Crawler(object):
"""Crawl a set of URLs.
This manages two sets of URLs: 'urls' and 'done'. 'urls' is a set of
URLs seen, and 'done' is a list of FetchStatistics.
"""
def __init__(
self,
roots,
scraper=None,
data_handler=None,
exclude=None,
strict=True, # What to crawl.
max_redirect=5,
max_tries=10, # Per-url limits.
max_tasks=10,
max_connections_per_host=3,
*,
loop=None
):
self.loop = loop or asyncio.get_event_loop()
self.roots = roots
self.max_connections_per_host = max_connections_per_host
self.scraper = scraper
self.data_handler = data_handler
self.exclude = exclude
self.strict = strict
self.max_redirect = max_redirect
self.max_tries = max_tries
self.max_tasks = max_tasks
self.q = Queue(loop=self.loop)
self.seen_urls = set()
self.done = []
self.session = aiohttp.ClientSession(loop=self.loop)
self.root_domains = set()
for root in roots:
parts = urllib.parse.urlparse(root)
host, port = urllib.parse.splitport(parts.netloc)
if not host:
continue
if re.match(r"\A[\d\.]*\Z", host):
self.root_domains.add(host)
else:
host = host.lower()
if self.strict:
self.root_domains.add(host)
else:
self.root_domains.add(lenient_host(host))
for root in roots:
self.add_urls(root)
self.t0 = time.time()
self.t1 = None
def record_statistic(
self,
url=None,
next_url=None,
status=None,
exception=None,
content_type=None,
encoding=None,
num_urls=0,
num_new_urls=0,
):
"""Record the FetchStatistic for completed / failed URL."""
fetch_statistic = FetchStatistic(
url=url,
next_url=next_url,
status=status,
size=0,
exception=exception,
content_type=content_type,
encoding=encoding,
num_urls=num_urls,
num_new_urls=num_new_urls,
)
self.done.append(fetch_statistic)
def extract_data(self, root_url, html):
raise NotImplementedError("You need to define a extract_data method!")
def close(self):
"""Close resources."""
LOGGER.debug("closing resources")
self.session.close()
@asyncio.coroutine
def parse_links(self, web_page_html, base_url, _content_type, _encoding):
"""Return a list of links."""
links = set()
tree = html.fromstring(web_page_html)
tree.make_links_absolute(base_url)
urls = [link[2] for link in tree.iterlinks()]
for url in urls:
defragmented, frag = urllib.parse.urldefrag(url)
if verify.url_allowed(
defragmented, self.root_domains, exclude=self.exclude
): # Select Valid links, testing against regexp and root_domains
links.add(defragmented)
#.........这里部分代码省略.........