Python urlparse.urlunsplit函数代码示例

本文整理汇总了Python中urlparse.urlunsplit函数的典型用法代码示例。


示例1: video

def video():
    Video request handler.
    :return: list of available videos in json format.
    entries = []
    for entry in os.walk(settings.VIDEO_FILES_PATH):
        if not entry[2]:  # there is no file
        date = os.path.basename(entry[0])
        for basename in entry[2]:
            filename = os.path.join(entry[0], basename)
            relpath = os.path.relpath(filename,
            parts = list(urlparse.urlsplit(request.base_url)[:2])
            parts.append(settings.VIDEO_FILES_LOCATION + '/' + relpath)
            parts.extend(['', ''])
            url = urlparse.urlunsplit(parts)
            parts[2] = settings.THUMBNAIL_FILES_LOCATION + '/'
            parts[2] += os.path.splitext(relpath)[0] + '.png'
            thumbnail = urlparse.urlunsplit(parts)
            entries.append({'date': date, 'url': url, 'thumbnail': thumbnail})
    entries.sort(reverse=True, key=lambda x: x['date'])

    response = Response()
    response.headers['Content-Type'] = 'application/json'
    response.data = json.dumps(entries)
    return response

示例2: handler

    def handler(self, fname, language='text', linenumbers=False, filename=None, site=None, data=None, lang=None, post=None):
        """Create HTML for a listing."""
        fname = fname.replace('/', os.sep)
        if len(self.folders) == 1:
            listings_folder = next(iter(self.folders.keys()))
            if fname.startswith(listings_folder):
                fpath = os.path.join(fname)  # new syntax: specify folder name
                # old syntax: don't specify folder name
                fpath = os.path.join(listings_folder, fname)
            # must be new syntax: specify folder name
            fpath = os.path.join(fname)
        linenumbers = 'table' if linenumbers else False
        deps = [fpath]
        with open(fpath, 'r') as inf:
            target = urlunsplit(
                ("link", 'listing', fpath.replace('\\', '/'), '', ''))
            src_target = urlunsplit(
                ("link", 'listing_source', fpath.replace('\\', '/'), '', ''))
            src_label = self.site.MESSAGES('Source')

            data = inf.read()
            lexer = pygments.lexers.get_lexer_by_name(language)
            formatter = pygments.formatters.get_formatter_by_name(
                'html', linenos=linenumbers)
            output = '<a href="{1}">{0}</a>  <a href="{3}">({2})</a>' .format(
                fname, target, src_label, src_target) + pygments.highlight(data, lexer, formatter)

        return output, deps

示例3: to_python

    def to_python(self, value):
        def split_url(url):
            Returns a list of url parts via ``urlparse.urlsplit`` (or raises a
            ``ValidationError`` exception for certain).
                return list(urlparse.urlsplit(url))
            except ValueError:
                # urlparse.urlsplit can raise a ValueError with some
                # misformatted URLs.
                raise ValidationError(self.error_messages["invalid"])

        value = super(URLField, self).to_python(value)
        if value:
            url_fields = split_url(value)
            if not url_fields[0]:
                # If no URL scheme given, assume http://
                url_fields[0] = "http"
            if not url_fields[1]:
                # Assume that if no domain is provided, that the path segment
                # contains the domain.
                url_fields[1] = url_fields[2]
                url_fields[2] = ""
                # Rebuild the url_fields list, since the domain segment may now
                # contain the path too.
                url_fields = split_url(urlparse.urlunsplit(url_fields))
            if not url_fields[2]:
                # the path portion may need to be added before query params
                url_fields[2] = "/"
            value = urlparse.urlunsplit(url_fields)
        return value

示例4: get_relative_url

def get_relative_url(destination, source):
    """Get relative URL between two sources.


    :param destination:
    :param source:
    :return: tuple (is same domain, relative url)

    u_dest = urlparse.urlsplit(destination)
    u_src = urlparse.urlsplit(source)

    _uc1 = urlparse.urlunsplit(u_dest[:2]+tuple('' for i in range(3)))
    _uc2 = urlparse.urlunsplit(u_src[:2]+tuple('' for i in range(3)))

    if _uc1 != _uc2:
        ## This is a different domain
        return False, destination

    # If there is no / component in url assume it's root path
    src_path = u_src.path or "/"

    _relpath = posixpath.relpath(u_dest.path, posixpath.dirname(src_path))

    return True, _relpath
    # return True, urlparse.urlunsplit(('', '', _relpath, u_dest.query, u_dest.fragment))

示例5: _generalize_url

    def _generalize_url(self, url):
        parts = urlsplit(url)
        simplified_url = urlunsplit((parts.scheme,
        url = simplified_url
        segments = split_path_into_segments(parts.path)
        parent_is_collection = False

        for segment in segments:
            simplified_url = simplified_url + '/' + (ID_SUBSTITUTE_CHAR if parent_is_collection else segment)
            url = url + '/' + segment
            if url in self and self._is_a_collection(url):
                parent_is_collection = True
                parent_is_collection = False

        generalized_path = urlsplit(simplified_url).path

        return urlunsplit((parts.scheme,

示例6: parse

    def parse(self, response):
        sel = Selector(response)

        # Extract any cars found
        cars = sel.xpath('//*[contains(@class, "inv-type-used")]')
        for c in cars:
            car = Car()

            # Title and year
            car['title'] = c.xpath('.//div/div/h1/a/text()').extract()[0].strip()
            car['year'] = car['title'][0:4]

            # Price, but remove non-number characters.
            # Examples: '$12,000', 'Please Call', etc.
            price = c.xpath('.//*[contains(@class, "value")]/text()').extract()[0]
            car['price'] = ''.join(d for d in price if d.isdigit())

            # url
            path = c.xpath('.//div/div/h1/a/@href').extract()[0]
            url = urlparse.urlparse(response.url)
            car['url'] = urlparse.urlunsplit([url.scheme, url.netloc, path, None, None])

            # Certain specs are frequently missing, so we need to handle
            # them with try / except
            specs = [
                    'name': 'vin',
                    'xpath': './/*/dt[text()="VIN:"]/following-sibling::dd/text()'
                    'name': 'color',
                    'xpath': './/*/dt[text()="Exterior Color:"]/following-sibling::dd/text()'
                    'name': 'miles',
                    'xpath': './/*/dt[text()="Mileage:"]/following-sibling::dd/text()'
                    'name': 'transmission',
                    'xpath': './/*/dt[text()="Transmission:"]/following-sibling::dd/text()'

            for s in specs:
                    car[s['name']] = c.xpath(s['xpath']).extract()[0]
                except IndexError:
                    car[s['name']] = None

            yield car

        # If there's a next page link, parse it for cars as well
        next_links = sel.xpath('//*[@rel="next"]/@href').extract()
        if len(next_links) > 0:
            query = next_links[0]
            url = urlparse.urlparse(response.url)
            base = urlparse.urlunsplit([url.scheme, url.netloc, url.path, None, None])
            next_url = urlparse.urljoin(base, query)
            # Construct url
            yield Request(next_url, callback=self.parse)

示例7: _load_uri

    def _load_uri(self, base_uri, uri_to_resolve):
        Obtain a remote instruction.

        Returns the instruction as a python object, along with the resolved uri
        resolved_uri = urlparse.urlsplit(urlparse.urljoin(base_uri, uri_to_resolve))
        base_scheme = urlparse.urlsplit(base_uri).scheme
        if base_scheme is not None and base_scheme != resolved_uri.scheme:
            raise SchemeSecurityError("Cannot cross from '%s' to '%s'" % (
                base_scheme, resolved_uri.scheme))

            if resolved_uri.scheme in ['http', 'https']:
                instruction = json.loads(requests.get(resolved_uri).text)
            elif resolved_uri.scheme is '':
                instruction = json.load(open(urlparse.urlunsplit(resolved_uri)))
                raise InvalidInstructionError("Reference to unsupported scheme '%s'" % (
            return instruction, urlparse.urlunsplit(resolved_uri)
        except requests.exceptions.RequestException as e:
            raise InvalidInstructionError("Couldn't load '%s': %s" % (resolved_uri, e))
        except IOError as e:
            raise InvalidInstructionError("Couldn't open '%s': %s" % (resolved_uri, e))
        except ValueError:
            raise InvalidInstructionError("Invalid JSON in '%s'" % resolved_uri)

示例8: from_url

    def from_url(url, headers=None, allowed=None):
        if headers is None:
            headers = {}

        result = urlparse.urlsplit(url)
        if result.scheme == 'qpid':
            # remove the queue from the url
            queue, query = extract_param('queue', result.query)

            if queue is None:
                raise ApplicationException('No queue provided in qpid url!')

            new_url = urlparse.urlunsplit((result.scheme, result.netloc, result.path,
                                           query, result.fragment))
            return QpidPublisher(new_url, queue, headers, allowed)

        elif result.scheme == 'rabbit':
            queue, query = extract_param('queue', result.query)

            if queue is None:
                raise ApplicationException('No queue provided in qpid url!')

            new_url = urlparse.urlunsplit(('amqp', result.netloc, result.path,
                                           query, result.fragment))
            return RabbitPublisher(new_url, queue, headers, allowed)

        elif result.scheme == 'log':
            return LogPublisher(allowed)

        elif result.scheme == 'count':
            return CountPublisher(allowed)

示例9: requestData

 def requestData(self, basepath):
     self.log.info("Attempting to communicate with Nexus server.")
     auth = "Basic " + base64.b64encode(self.user + ':' + self.pasw)
     deppath = self.url[2] + basepath
     delpath = deppath + '/artifactorymigrator'
     runpath = delpath + '/run'
     depurl = urlparse.urlunsplit((self.url[0], self.url[1], deppath, '', ''))
     delurl = urlparse.urlunsplit((self.url[0], self.url[1], delpath, '', ''))
     runurl = urlparse.urlunsplit((self.url[0], self.url[1], runpath, '', ''))
     delheaders = {'User-Agent': 'nex2art', 'Authorization': auth}
     depheaders, runheaders = delheaders.copy(), delheaders.copy()
     depheaders['Content-Type'] = 'application/json'
     runheaders['Content-Type'] = 'text/plain'
     depjson = {'name': 'artifactorymigrator', 'type': 'groovy'}
     depjson['content'] = pkgutil.get_data('nex2art', 'resources/plugin.groovy')
     depbody = json.dumps(depjson)
     res, data = None, None
     self.log.info("Deploying extraction plugin to Nexus.")
     ex, _ = self.dorequest(depurl, depbody, depheaders, 'POST', "deploy")
     if ex == None:
             self.log.info("Executing Nexus extraction.")
             ex, res = self.dorequest(runurl, None, runheaders, 'POST', "execute", True)
             self.log.info("Deleting extraction plugin from Nexus.")
             self.dorequest(delurl, None, delheaders, 'DELETE', "delete")
         if res != None and 'result' in res: data = json.loads(res['result'])
     if ex != None:
         self.log.error("Error accessing Nexus instance: %s", ex)
         return "Error accessing Nexus instance."
     self.log.info("Successfully fetched Nexus data.")
     return data

示例10: resolve_links

	def resolve_links(self, links, pageurl):
		for x in links:
			p = urlparse.urlsplit(x)
			if p.scheme == "http":
				if p.netloc != self.hostname:
					# Remote link
				# Turn this into a host-relative url
				p = ('', '', p.path, p.query, '')

			if p[4] != "" or p[3] != "":
				# Remove fragments (part of the url past #)
				p = (p[0], p[1], p[2], '', '')

			if p[0] == "":
				if p[2] == "":
					# Nothing in the path, so it's a pure fragment url

				if p[2][0] == "/":
					# Absolute link on this host, so just return it
					yield urlparse.urlunsplit(p)
					# Relative link
					yield urlparse.urljoin(pageurl, urlparse.urlunsplit(p))
				# Ignore unknown url schemes like mailto

示例11: _split_uri

    def _split_uri(self, identifier):
        if isinstance(identifier, URIRef):
            scheme, netloc, path, query, fragment = urlsplit(identifier)
            if query:
                namespace, resource_id = split_uri(identifier)
            if fragment:
                # if we have a fragment, we will split there
                namespace, resource_id = urldefrag(identifier)
                namespace += "#"
            elif "/" in path and len(path) > 1:
                splits = path.split("/")
                if path.endswith("/"):
                    resource_id = "/".join(splits[-2:])
                    path = "/".join(splits[:-2]) + "/"
                    namespace = urlunsplit((scheme, netloc, path, "", ""))
                    resource_id = "/".join(splits[-1:])
                    path = "/".join(splits[:-1]) + "/"
                    namespace = urlunsplit((scheme, netloc, path, "", ""))
            elif path:
                resource_id = path
                namespace = urlunsplit((scheme, netloc, "", "", ""))
                namespace, resource_id = split_uri(identifier)

            log.debug("Split %s to %s, %s" % (identifier, namespace, resource_id))
            return namespace, resource_id
            raise ValueError("Unknown identifier type %r" % identifier)

示例12: rewrite_urls

def rewrite_urls(origin_url, urls):
    origin_pack = urlparse.urlsplit(origin_url)
    for u in urls:
        # kill breaks
        if u:
            u = re.sub("(\n|\t)", "", u)

        pack = urlparse.urlsplit(u)
        (scheme, netloc, path, query, fragment) = pack

        # try to rewrite scheme
        scheme = rewrite_scheme(pack.scheme)

        # rewrite netloc to include credentials
        if origin_pack.username and pack.hostname == origin_pack.hostname:
            netloc = assemble_netloc(origin_pack.username,\
                        origin_pack.password, pack.hostname, pack.port)

        # reassemble into url
        new_u = urlparse.urlunsplit((scheme, netloc, path, query, None))

        # no scheme or netloc, it's a path on-site
        if not scheme and not netloc and (path or query):
            path_query = urlparse.urlunsplit((None, None, path, query, None))
            new_u = urlparse.urljoin(origin_url, path_query)

        # quote spaces
        new_u = new_u.replace(" ", "%20")
        if new_u:
            yield new_u

示例13: verify_image

    def verify_image(self, baseURL, imageURL):
        fullImageURL = imageURL
        if not urlsplit(imageURL).scheme:
            # Resolve relative path
            fullImageURL = urljoin(baseURL, imageURL)

        echo("Checking image: {}".format(fullImageURL))
        urlparts = urlsplit(fullImageURL)
        escapedparts = self.get_escaped_address_parts_minus_host(urlparts)
        if urlparts.netloc and urlparts.path:
                conn = httplib.HTTPConnection(urlparts.netloc)
                conn.request("HEAD", urlunsplit(escapedparts))
                echo("Going to path: {}\n".format(urlunsplit(escapedparts)))
                res = conn.getresponse()
            except Exception as inst:
                self.fail("While checking image {}, encountered exception: {}".format(
                    fullImageURL, inst))
            self.assertEqual(res.status, 200, 
                'The image at {} is not OK. Looking for it resulted in HTTP code: {}'.format(
                    urlunsplit([urlparts.scheme, urlparts.netloc, escapedparts[2], 
                        escapedparts[3], escapedparts[4]]), 
            self.fail("The URL for this image is invalid: {}".format(fullImageURL))

示例14: get_onedrive_embed_code

    def get_onedrive_embed_code(self, onedrive_url):

        onedrive_url = onedrive_url.strip()

        # check if it already is an embed code
        embed_code_regex = '<iframe'
        matched = re.match(embed_code_regex, onedrive_url, re.IGNORECASE)

        if matched is not None:
            return onedrive_url

        scheme, netloc, path, query_string, fragment = urlsplit(onedrive_url)
        query_params = parse_qs(query_string)

        # OneDrive for Business
        odb_regex = 'https?:\/\/((\w|-)+)-my.sharepoint.com\/'
        matched = re.match(odb_regex, onedrive_url, re.IGNORECASE)

        if matched is not None:
            query_params['action'] = ['embedview']
            new_query_string = urlencode(query_params, doseq=True)
            document_url = urlunsplit((scheme, netloc, path, new_query_string, fragment))
            return self.EMBED_CODE_TEMPLATE.format(document_url)

        # OneDrive (for consumers)
        onedrive_regex = '(https?:\/\/(onedrive\.)?)(live\.com)'
        matched = re.match(onedrive_regex, onedrive_url, re.IGNORECASE)

        if matched is not None:
            new_path = path.replace('view.aspx', 'embed').replace('redir', 'embed')
            query_params = parse_qs(query_string)
            query_params['em'] = ['2']
            new_query_string = urlencode(query_params, doseq=True)
            document_url = urlunsplit((scheme, netloc, new_path, new_query_string, fragment))
            return self.EMBED_CODE_TEMPLATE.format(document_url)

示例15: normalize_url

def normalize_url(url, domain_canonical=None):
    Ensure we have a value url - raise exception if not.
    If given, we convert the domain to a domain_canonical
    url = url.strip()
    rgURL = list(urlparse.urlsplit(url))
    if rgURL[split.scheme] == '':
        url = r"http://%s" % url
        rgURL = list(urlparse.urlsplit(url))
    # Invalid protocol
    if rgURL[split.scheme] != "http" and rgURL[split.scheme] != "https":
        raise reqfilter.Error("Invalid protocol: %s" % rgURL[split.scheme]) 

    if domain_canonical is not None:
        rgURL[split.domain] = domain_canonical
    if rgURL[split.domain]:
        rgURL[split.domain] = rgURL[split.domain].lower()
    if not rgURL[split.domain] or not regDomain.search(rgURL[split.domain]) or len(rgURL[split.domain]) > 255:
        raise reqfilter.Error("Invalid URL: %s" % urlparse.urlunsplit(rgURL))

    # Always end naked domains with a trailing slash as canonical
    if rgURL[split.path] == '':
        rgURL[split.path] = '/'

    return urlparse.urlunsplit(rgURL)
