当前位置: 首页>>代码示例>>Python>>正文


Python parse.urldefrag函数代码示例

本文整理汇总了Python中six.moves.urllib.parse.urldefrag函数的典型用法代码示例。如果您正苦于以下问题:Python urldefrag函数的具体用法?Python urldefrag怎么用?Python urldefrag使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了urldefrag函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: startElementNS

 def startElementNS(self, name, qname, attrs):
     stack = self.stack
     stack.append(ElementHandler())
     current = self.current
     parent = self.parent
     base = attrs.get(BASE, None)
     if base is not None:
         base, frag = urldefrag(base)
         if parent and parent.base:
             base = urljoin(parent.base, base)
         else:
             systemId = self.locator.getPublicId() \
                 or self.locator.getSystemId()
             if systemId:
                 base = urljoin(systemId, base)
     else:
         if parent:
             base = parent.base
         if base is None:
             systemId = self.locator.getPublicId() \
                 or self.locator.getSystemId()
             if systemId:
                 base, frag = urldefrag(systemId)
     current.base = base
     language = attrs.get(LANG, None)
     if language is None:
         if parent:
             language = parent.language
     current.language = language
     current.start(name, qname, attrs)
开发者ID:drewp,项目名称:rdflib,代码行数:30,代码来源:rdfxml.py

示例2: get_disk_name

def get_disk_name(ovf):
    """Get the disk format and file name from a OVF descriptor."""
    root = etree.fromstring(ovf)
    ovf_ns = root.nsmap['ovf']

    id_attr = '{%s}id' % ovf_ns
    href_attr = '{%s}href' % ovf_ns
    files = {f.get(id_attr): f.get(href_attr) for f in
             root.findall('ovf:References/ovf:File', root.nsmap)}

    # we do not care about more than one disk
    disk = root.find('ovf:DiskSection/ovf:Disk', root.nsmap)
    if disk is not None:
        format_attr = '{%s}format' % ovf_ns
        fileref_attr = '{%s}fileRef' % ovf_ns
        ovf_format = disk.get(format_attr)
        if not ovf_format:
            raise Exception("Expecting some format!")
        (format_url, _) = parse.urldefrag(ovf_format)
        try:
            disk_format = SPECS[format_url]
        except KeyError:
            raise Exception("Unknown format!")
        try:
            disk_file = files[disk.get(fileref_attr)]
        except KeyError:
            raise Exception("Unknown disk!")
        return (disk_format, disk_file)
    return None, None
开发者ID:alvarolopez,项目名称:atrope,代码行数:29,代码来源:ovf.py

示例3: __init__

    def __init__(self, request, timeout=180):
        self._url = urldefrag(request.url)[0]
        # converting to bytes to comply to Twisted interface
        self.url = to_bytes(self._url, encoding='ascii')
        self.method = to_bytes(request.method, encoding='ascii')
        self.body = request.body or None
        self.headers = Headers(request.headers)
        self.response_headers = None
        self.timeout = request.meta.get('download_timeout') or timeout
        self.start_time = time()
        self.deferred = defer.Deferred().addCallback(self._build_response, request)

        # Fixes Twisted 11.1.0+ support as HTTPClientFactory is expected
        # to have _disconnectedDeferred. See Twisted r32329.
        # As Scrapy implements it's own logic to handle redirects is not
        # needed to add the callback _waitForDisconnect.
        # Specifically this avoids the AttributeError exception when
        # clientConnectionFailed method is called.
        self._disconnectedDeferred = defer.Deferred()

        self._set_connection_attributes(request)

        # set Host header based on url
        self.headers.setdefault('Host', self.netloc)

        # set Content-Length based len of body
        if self.body is not None:
            self.headers['Content-Length'] = len(self.body)
            # just in case a broken http/1.1 decides to keep connection alive
            self.headers.setdefault("Connection", "close")
        # Content-Length must be specified in POST method even with no body
        elif self.method == b'POST':
            self.headers['Content-Length'] = 0
开发者ID:390218462,项目名称:scrapy,代码行数:33,代码来源:webclient.py

示例4: escape_ajax

def escape_ajax(url):
    """
    Return the crawleable url according to:
    http://code.google.com/web/ajaxcrawling/docs/getting-started.html

    >>> escape_ajax("www.example.com/ajax.html#!key=value")
    'www.example.com/ajax.html?_escaped_fragment_=key%3Dvalue'
    >>> escape_ajax("www.example.com/ajax.html?k1=v1&k2=v2#!key=value")
    'www.example.com/ajax.html?k1=v1&k2=v2&_escaped_fragment_=key%3Dvalue'
    >>> escape_ajax("www.example.com/ajax.html?#!key=value")
    'www.example.com/ajax.html?_escaped_fragment_=key%3Dvalue'
    >>> escape_ajax("www.example.com/ajax.html#!")
    'www.example.com/ajax.html?_escaped_fragment_='

    URLs that are not "AJAX crawlable" (according to Google) returned as-is:

    >>> escape_ajax("www.example.com/ajax.html#key=value")
    'www.example.com/ajax.html#key=value'
    >>> escape_ajax("www.example.com/ajax.html#")
    'www.example.com/ajax.html#'
    >>> escape_ajax("www.example.com/ajax.html")
    'www.example.com/ajax.html'
    """
    defrag, frag = urldefrag(url)
    if not frag.startswith("!"):
        return url
    return add_or_replace_parameter(defrag, "_escaped_fragment_", frag[1:])
开发者ID:naisanza,项目名称:scrapy,代码行数:27,代码来源:url.py

示例5: absolutize

 def absolutize(self, uri, defrag=1):
     base = urljoin("file:", pathname2url(os.getcwd()))
     result = urljoin("%s/" % base, uri, allow_fragments=not defrag)
     if defrag:
         result = urldefrag(result)[0]
     if not defrag:
         if uri and uri[-1] == "#" and result[-1] != "#":
             result = "%s#" % result
     return URIRef(result)
开发者ID:hsolbrig,项目名称:rdflib,代码行数:9,代码来源:namespace.py

示例6: url_query_cleaner

def url_query_cleaner(url, parameterlist=(), sep='&', kvsep='=', remove=False, unique=True, keep_fragments=False):
    """Clean URL arguments leaving only those passed in the parameterlist keeping order

    >>> import w3lib.url
    >>> w3lib.url.url_query_cleaner("product.html?id=200&foo=bar&name=wired", ('id',))
    'product.html?id=200'
    >>> w3lib.url.url_query_cleaner("product.html?id=200&foo=bar&name=wired", ['id', 'name'])
    'product.html?id=200&name=wired'
    >>>

    If `unique` is ``False``, do not remove duplicated keys

    >>> w3lib.url.url_query_cleaner("product.html?d=1&e=b&d=2&d=3&other=other", ['d'], unique=False)
    'product.html?d=1&d=2&d=3'
    >>>

    If `remove` is ``True``, leave only those **not in parameterlist**.

    >>> w3lib.url.url_query_cleaner("product.html?id=200&foo=bar&name=wired", ['id'], remove=True)
    'product.html?foo=bar&name=wired'
    >>> w3lib.url.url_query_cleaner("product.html?id=2&foo=bar&name=wired", ['id', 'foo'], remove=True)
    'product.html?name=wired'
    >>>

    By default, URL fragments are removed. If you need to preserve fragments,
    pass the ``keep_fragments`` argument as ``True``.

    >>> w3lib.url.url_query_cleaner('http://domain.tld/?bla=123#123123', ['bla'], remove=True, keep_fragments=True)
    'http://domain.tld/#123123'

    """

    if isinstance(parameterlist, (six.text_type, bytes)):
        parameterlist = [parameterlist]
    url, fragment = urldefrag(url)
    base, _, query = url.partition('?')
    seen = set()
    querylist = []
    for ksv in query.split(sep):
        if not ksv:
            continue
        k, _, _ = ksv.partition(kvsep)
        if unique and k in seen:
            continue
        elif remove and k in parameterlist:
            continue
        elif not remove and k not in parameterlist:
            continue
        else:
            querylist.append(ksv)
            seen.add(k)
    url = '?'.join([base, sep.join(querylist)]) if querylist else base
    if keep_fragments:
        url += '#' + fragment
    return url
开发者ID:scrapy,项目名称:w3lib,代码行数:55,代码来源:url.py

示例7: update_params

def update_params(_url, _debug=False, **params):
    """Update the query parameters in a URL.

    ``_url`` is any URL, with or without a query string.

    ``**params`` are query parameters to add or replace. Each value may be a
    string, a list of strings, or None. Passing a list generates multiple
    values for the same parameter. Passing None deletes the corresponding
    parameter if present.

    Return the new URL.

    *Debug mode:* if ``_debug=True``, return a tuple:
    ``[0]`` is the URL without query string or fragment,
    ``[1]`` is the final query parameters as a dict, and
    ``[2]`` is the fragment part of the original URL or the empty string.

    Usage:

    >>> update_params("foo", new1="NEW1")
    'foo?new1=NEW1'
    >>> update_params("foo?p=1", p="2")
    'foo?p=2'
    >>> update_params("foo?p=1", p=None)
    'foo'
    >>> update_params("http://example.com/foo?new1=OLD1#myfrag", new1="NEW1")
    'http://example.com/foo?new1=NEW1#myfrag'
    >>> update_params("http://example.com/foo?new1=OLD1#myfrag", new1="NEW1", _debug=True)
    ('http://example.com/foo', {'new1': 'NEW1'}, 'myfrag')
    >>> update_params("http://www.mau.de?foo=2", brrr=3)
    'http://www.mau.de?foo=2&brrr=3'
    >>> update_params("http://www.mau.de?foo=A&foo=B", foo=["C", "D"])
    'http://www.mau.de?foo=C&foo=D'
    """

    url, fragment = urldefrag(_url)
    if "?" in url:
        url, qs = url.split("?", 1)
        query = parse_qs(qs)
    else:
        query = {}
    for key in params:
        value = params[key]
        if value is not None:
            query[key] = value
        elif key in query:
            del query[key]
    if _debug:
        return url, query, fragment
    qs = urlencode(query, True)
    if qs:
        qs = "?" + qs
    if fragment:
        fragment = "#" + fragment
    return "{0}{1}{2}".format(url, qs, fragment)
开发者ID:jManji,项目名称:Trivia,代码行数:55,代码来源:tools.py

示例8: resolve_ref

    def resolve_ref(self, obj, base_url):
        ref = obj.pop('import', None)

        url = urlparse.urljoin(base_url, ref)
        if url in self.resolved:
            return self.resolved[url]
        if url in self.resolving:
            raise RuntimeError('Circular reference for url %s' % url)
        self.resolving[url] = True
        doc_url, pointer = urlparse.urldefrag(url)
        document = self.fetch(doc_url)
        fragment = copy.deepcopy(resolve_pointer(document, pointer))
        try:
            result = self.resolve_all(fragment, doc_url)
        finally:
            del self.resolving[url]
        return result
开发者ID:lowks,项目名称:rabix,代码行数:17,代码来源:ref_resolver.py

示例9: url_query_cleaner

def url_query_cleaner(url, parameterlist=(), sep='&', kvsep='=', remove=False, unique=True):
    """Clean URL arguments leaving only those passed in the parameterlist keeping order

    >>> import w3lib.url
    >>> w3lib.url.url_query_cleaner("product.html?id=200&foo=bar&name=wired", ('id',))
    'product.html?id=200'
    >>> w3lib.url.url_query_cleaner("product.html?id=200&foo=bar&name=wired", ['id', 'name'])
    'product.html?id=200&name=wired'
    >>>

    If `unique` is ``False``, do not remove duplicated keys

    >>> w3lib.url.url_query_cleaner("product.html?d=1&e=b&d=2&d=3&other=other", ['d'], unique=False)
    'product.html?d=1&d=2&d=3'
    >>>

    If `remove` is ``True``, leave only those **not in parameterlist**.

    >>> w3lib.url.url_query_cleaner("product.html?id=200&foo=bar&name=wired", ['id'], remove=True)
    'product.html?foo=bar&name=wired'
    >>> w3lib.url.url_query_cleaner("product.html?id=2&foo=bar&name=wired", ['id', 'foo'], remove=True)
    'product.html?name=wired'
    >>>

    """

    if isinstance(parameterlist, (six.text_type, bytes)):
        parameterlist = [parameterlist]
    url = urldefrag(url)[0]
    base, _, query = url.partition('?')
    seen = set()
    querylist = []
    for ksv in query.split(sep):
        k, _, _ = ksv.partition(kvsep)
        if unique and k in seen:
            continue
        elif remove and k in parameterlist:
            continue
        elif not remove and k not in parameterlist:
            continue
        else:
            querylist.append(ksv)
            seen.add(k)
    return '?'.join([base, sep.join(querylist)]) if querylist else base
开发者ID:Preetwinder,项目名称:w3lib,代码行数:44,代码来源:url.py

示例10: resolve_ref

 def resolve_ref(self, obj, base_url):
     ref, mixin, checksum = (obj.pop("$ref", None), obj.pop("$mixin", None), obj.pop("$checksum", None))
     ref = ref or mixin
     url = urlparse.urljoin(base_url, ref)
     if url in self.resolved:
         return self.resolved[url]
     if url in self.resolving:
         raise RuntimeError("Circular reference for url %s" % url)
     self.resolving[url] = True
     doc_url, pointer = urlparse.urldefrag(url)
     document = self.fetch(doc_url)
     fragment = copy.deepcopy(resolve_pointer(document, pointer))
     try:
         self.verify_checksum(checksum, fragment)
         if isinstance(fragment, dict) and mixin:
             fragment = dict(obj, **fragment)
         result = self.resolve_all(fragment, doc_url)
     finally:
         del self.resolving[url]
     return result
开发者ID:RitwikGupta,项目名称:rabix,代码行数:20,代码来源:ref_resolver.py

示例11: download_request

    def download_request(self, request):
        timeout = request.meta.get('download_timeout') or self._connectTimeout
        agent = self._get_agent(request, timeout)

        # request details
        url = urldefrag(request.url)[0]
        method = request.method
        headers = TxHeaders(request.headers)
        bodyproducer = _RequestBodyProducer(request.body) if request.body else None

        start_time = time()
        d = agent.request(method, url, headers, bodyproducer)
        # set download latency
        d.addCallback(self._cb_latency, request, start_time)
        # response body is ready to be consumed
        d.addCallback(self._cb_bodyready, request)
        d.addCallback(self._cb_bodydone, request, url)
        # check download timeout
        self._timeout_cl = reactor.callLater(timeout, d.cancel)
        d.addBoth(self._cb_timeout, request, url, timeout)
        return d
开发者ID:BillWangCS,项目名称:scrapy,代码行数:21,代码来源:http11.py

示例12: download_request

    def download_request(self, request):
        timeout = request.meta.get('download_timeout') or self._connectTimeout
        agent = self._get_agent(request, timeout)

        # request details
        url = urldefrag(request.url)[0]
        method = to_bytes(request.method)
        headers = TxHeaders(request.headers)
        if isinstance(agent, self._TunnelingAgent):
            headers.removeHeader(b'Proxy-Authorization')
        if request.body:
            bodyproducer = _RequestBodyProducer(request.body)
        elif method == b'POST':
            # Setting Content-Length: 0 even for POST requests is not a
            # MUST per HTTP RFCs, but it's common behavior, and some
            # servers require this, otherwise returning HTTP 411 Length required
            #
            # RFC 7230#section-3.3.2:
            # "a Content-Length header field is normally sent in a POST
            # request even when the value is 0 (indicating an empty payload body)."
            #
            # Twisted < 17 will not add "Content-Length: 0" by itself;
            # Twisted >= 17 fixes this;
            # Using a producer with an empty-string sends `0` as Content-Length
            # for all versions of Twisted.
            bodyproducer = _RequestBodyProducer(b'')
        else:
            bodyproducer = None
        start_time = time()
        d = agent.request(
            method, to_bytes(url, encoding='ascii'), headers, bodyproducer)
        # set download latency
        d.addCallback(self._cb_latency, request, start_time)
        # response body is ready to be consumed
        d.addCallback(self._cb_bodyready, request)
        d.addCallback(self._cb_bodydone, request, url)
        # check download timeout
        self._timeout_cl = reactor.callLater(timeout, d.cancel)
        d.addBoth(self._cb_timeout, request, url, timeout)
        return d
开发者ID:JohnDoes95,项目名称:project_parser,代码行数:40,代码来源:http11.py

示例13: download_request

    def download_request(self, request):
        timeout = request.meta.get('download_timeout') or self._connectTimeout
        agent = self._get_agent(request, timeout)

        # request details
        url = urldefrag(request.url)[0]
        method = to_bytes(request.method)
        headers = TxHeaders(request.headers)
        if isinstance(agent, self._TunnelingAgent):
            headers.removeHeader(b'Proxy-Authorization')
        bodyproducer = _RequestBodyProducer(request.body) if request.body else None

        start_time = time()
        d = agent.request(
            method, to_bytes(url, encoding='ascii'), headers, bodyproducer)
        # set download latency
        d.addCallback(self._cb_latency, request, start_time)
        # response body is ready to be consumed
        d.addCallback(self._cb_bodyready, request)
        d.addCallback(self._cb_bodydone, request, url)
        # check download timeout
        self._timeout_cl = reactor.callLater(timeout, d.cancel)
        d.addBoth(self._cb_timeout, request, url, timeout)
        return d
开发者ID:bedreamer,项目名称:scrapy,代码行数:24,代码来源:http11.py

示例14: resolve_ref

    def resolve_ref(self, obj, base_url):
        ref = obj.pop('import', None)
        txt = obj.pop('include', None)

        parse = txt is None

        url = urlparse.urljoin(base_url, ref or txt)
        if url in self.resolved:
            return self.resolved[url]
        if url in self.resolving:
            raise RuntimeError('Circular reference for url %s' % url)
        self.resolving[url] = True
        doc_url, pointer = urlparse.urldefrag(url)
        try:
            document = self.fetch(doc_url, parse)
            if parse:
                fragment = (copy.deepcopy(self.index.get("#" + pointer))
                            or resolve_pointer(document, pointer))
                result = self.resolve_all(fragment, doc_url)
            else:
                result = document
        finally:
            del self.resolving[url]
        return result
开发者ID:dionjwa,项目名称:rabix,代码行数:24,代码来源:ref_resolver.py

示例15: escape_ajax

def escape_ajax(url):
    """
    Return the crawleable url according to:
    http://code.google.com/web/ajaxcrawling/docs/getting-started.html

    >>> escape_ajax("www.example.com/ajax.html#!key=value")
    'www.example.com/ajax.html?_escaped_fragment_=key%3Dvalue'
    >>> escape_ajax("www.example.com/ajax.html?k1=v1&k2=v2#!key=value")
    'www.example.com/ajax.html?k1=v1&k2=v2&_escaped_fragment_=key%3Dvalue'
    >>> escape_ajax("www.example.com/ajax.html?#!key=value")
    'www.example.com/ajax.html?_escaped_fragment_=key%3Dvalue'
    >>> escape_ajax("www.example.com/ajax.html#!")
    'www.example.com/ajax.html?_escaped_fragment_='

    URLs that are not "AJAX crawlable" (according to Google) returned as-is:

    >>> escape_ajax("www.example.com/ajax.html#key=value")
    'www.example.com/ajax.html#key=value'
    >>> escape_ajax("www.example.com/ajax.html#")
    'www.example.com/ajax.html#'
    >>> escape_ajax("www.example.com/ajax.html")
    'www.example.com/ajax.html'
    """
    #>>>urlparse('http://www.example.com/ajax.html?k1=v1&k2=v2#!key=value')
    #ParseResult(scheme='http', netloc='www.example.com', path='/ajax.html'
    #           , params='', query='k1=v1&k2=v2', fragment='!key=value')

    defrag, frag = urldefrag(url)
    #这个函数就是将fragment单独抽取出来的。以上面的url为例
    #('http://www.example.com/ajax.html?k1=v1&k2=v2', '!key=value')

    #如果不是以!开头就直接返回。
    #如果是则对url进行操作。
    if not frag.startswith('!'):
        return url
    return add_or_replace_parameter(defrag, '_escaped_fragment_', frag[1:])#[1:]去感叹号。
开发者ID:Terrenceyang213,项目名称:SourceLearningNote-Scrapy-,代码行数:36,代码来源:url.py


注:本文中的six.moves.urllib.parse.urldefrag函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。