Python urlparse.urlsplit函数代码示例

本文整理汇总了Python中urllib2.urlparse.urlsplit函数的典型用法代码示例。如果您正苦于以下问题：Python urlsplit函数的具体用法？Python urlsplit怎么用？Python urlsplit使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了urlsplit函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: validate_links

def validate_links(data):
    widgets = [Bar(), SimpleProgress()]
    pbar = ProgressBar(widgets=widgets, maxval=len(data)).start()
    for i, element in enumerate(data):
        url = element['url']
        if url == '':
            continue
        scheme = urlparse.urlsplit(url).scheme
        host = urlparse.urlsplit(url).netloc
        if scheme in ('http', 'https') and \
            url_status_cache.get(url) is not True:
            try:
                request = head(url, timeout=10)
                # some web sites cannot into head requests
                if request.status_code in (403, 405, 500) or \
                    host in ('mobil.morgenpost.de'):
                    request = get(url)
            except Timeout as e:
                stderr.write('Connection to <%s> timeouted.\n' % url)
                exit(1)
            except ConnectionError as e:
                stderr.write('Connection to <%s> failed.\n' % url)
                stderr.write(str(e) + '\n')
                exit(1)
            if request.ok:
                url_status_cache.set(url, request.ok)
            else:
                stderr.write('<%s> is unreachable.\n' % url)
                exit(1)
        pbar.update(i+1)

开发者ID:erlehmann，项目名称:redokast，代码行数:30，代码来源:validate-links.py

示例2: _send_header

    def _send_header(self, header_pieces, headers, body, is_request):

        if not self.headers_prepared:
            body_length = len(body)
            had_length = False
            had_host = False
            if is_request:
                resource = header_pieces[1]
                splitted = urlparse.urlsplit(resource)
                url = splitted.path
                if splitted.query:
                    url += '?' + splitted.query
                header_line = '%s %s HTTP/%s\r\n' % (header_pieces[0], url, header_pieces[2])
            else:
                header_line = 'HTTP/%s %s %s\r\n' % header_pieces

            io_request = StringIO()
            io_request.write(header_line)
            for name, value in headers.iteritems():
                if name == 'content-length':
                    io_request.write('%s: %s\r\n' % (name.title(), body_length))
                    had_length = True
                else:
                    io_request.write('%s: %s\r\n' % (name.title(), value))
                if name == 'host':
                    had_host = True

            if not had_length and body_length > 0:
                io_request.write('%s: %s\r\n' % ('Content-Length', body_length))

            if not had_host and is_request:
                splitted = urlparse.urlsplit(resource)
                io_request.write('%s: %s\r\n' % ('Host', splitted.hostname))

            io_request.write('\r\n')
            self.buffer = io_request.getvalue()
            io_request.close()
            self.headers_prepared = True
            self.to_write = len(self.buffer)
            self.written = 0

        if not self.headers_sent:
            while self.to_write > 0:
                written = self.csock.send(self.buffer[self.written:])
                self.written += written
                self.to_write -= written

            self.headers_sent = True

开发者ID:g-fleischer，项目名称:wtfy，代码行数:48，代码来源:HttpWriter.py

示例3: download_metadata

def download_metadata(target_directory):
    """
    Downloads XML files for PMCIDs on stdin into given directory.
    """
    stderr.write('Input PMCIDs, delimited by whitespace: ')
    pmcids = stdin.read().split()
    if len(pmcids) == 0:
        raise RuntimeError, 'No PMCIDs found.'

    # delete files from earlier invocations
    listing = listdir(target_directory)
    for filename in listing:
        file_path = path.join(target_directory, filename)
        stderr.write("Removing “%s” … " % file_path)
        remove(file_path)
        stderr.write("done.\n")

    # chunk function by nosklo, source:
    # <http://stackoverflow.com/questions/434287/what-is-the-most-pythonic-way-to-iterate-over-a-list-in-chunks#answer-434328>
    def chunker(seq, size):
        return (seq[pos:pos + size] for pos in xrange(0, len(seq), size))

    for i, chunk in enumerate(chunker(pmcids, 365)):
        url = _get_query_url_from_pmcids(chunk)
        yield { 'url': url, 'completed': 0, 'total': 1 }

        url_path = urlparse.urlsplit(url).path
        local_filename = path.join(target_directory, \
            url_path.split('/')[-1] + str(i))
        with open(local_filename, 'wb') as local_file:
            content = _get_file_from_pmcids(chunk)
            local_file.write(content.read())
            yield { 'url': url, 'completed': 1, 'total': 1 }

开发者ID:npettiaux，项目名称:open-access-media-importer，代码行数:33，代码来源:pmc_pmcid.py

示例4: validateURL

    def validateURL (cls, full_url, video_item=True):
        """Make sure the url passed is in a valid form and return a video parser object"""
        if not isinstance (full_url, str):
            raise TypeError ("Argument must be a string")

        spliturl = urlparse.urlsplit (full_url)
        hostname = spliturl.hostname
#        print len (cls.parsers.keys ())

        if not hostname:
            return None
        elif hostname.startswith ("www."):
            hostname = hostname.lstrip ("www.")

        if hostname not in cls.parsers:
            return None

        page_parser = cls.parsers[hostname].checkURL (full_url)
        if page_parser and video_item:
            youtube_video = VideoItem (page_parser)
        elif page_parser:
            youtube_video = page_parser
        else:
            youtube_video = None

        return youtube_video

开发者ID:Ryochan7，项目名称:YouTubed-2x，代码行数:26，代码来源:parsermanager.py

示例5: set_language_ex

def set_language_ex(request):
    next = request.POST.get('next', request.GET.get('next'))
    if not is_safe_url(url=next, host=request.get_host()):
        next = request.META.get('HTTP_REFERER')
        if not is_safe_url(url=next, host=request.get_host()):
            next = '/'

    # remove lang from query
    scheme, netloc, path, query, fragment = urlparse.urlsplit(next)
    parsed_query = urlparse.parse_qsl(query)
    altered = False
    for k, v in parsed_query[:]:
        if LANG_GET_KEY == k:
            parsed_query.remove((k, v))
            altered = True
    if altered:
        query = urllib.urlencode(parsed_query)
        next = urlparse.urlunsplit((scheme, netloc, path, query, fragment))

    response = http.HttpResponseRedirect(next)
    if request.method == 'POST':
        lang_code = request.POST.get('language', None)
        if lang_code and check_for_language(lang_code):
            if hasattr(request, 'session'):
                request.session[LANGUAGE_SESSION_KEY] = lang_code
            else:
                response.set_cookie(settings.LANGUAGE_COOKIE_NAME, lang_code,
                                    max_age=settings.LANGUAGE_COOKIE_AGE,
                                    path=settings.LANGUAGE_COOKIE_PATH,
                                    domain=settings.LANGUAGE_COOKIE_DOMAIN)
    return response

开发者ID:Nnonexistent，项目名称:chemphys，代码行数:31，代码来源:views.py

示例6: download_content_list

def download_content_list(detail_url, headers, timeout):
    """
    sample url: http://weixin.sogou.com/gzhjs?openid=oIWsFt86NKeSGd_BQKp1GcDkYpv0&ext=D4y5Z3wUwj5uk6W7Yk9BqC3LAaFqirWHT5QFje14y0dip_leVhZF6qjo9Mm_UUVg&cb=sogou.weixin_gzhcb&page=1&gzhArtKeyWord=&tsn=0&t=1459425446419&_=1459425446169

    其中openid是固定的
    ext也是固定的
    cb=sogou.weixin_gzhcb这个也是固定的
    唯一变化的就是这个t以及_这2个字段，看上去是打开这个页面的时间戳
    """
    global start_flag
    total_records = 0
    context_lst = []
    _t = start_flag 
    now = int(time.time() * 1000)
    url_netloc = urlparse.urlsplit(detail_url)
    cur_url = 'http://%s/gzhjs?%s' % (url_netloc.netloc, url_netloc.query)
    params = "cb=sogou.weixin_gzhcb&page=%s&gzhArtKeyWord=&tsn=0&t=%s&_=%s"
    query_url = cur_url + '&' + params

    for i in range(1, 11):
        target_url = query_url % (i, now, _t)
        print target_url
        resp = download_page(target_url, headers, timeout=DEFAULT_TIMEOUT)
        strip_text = resp.text.replace('sogou.weixin_gzhcb(', '')
        strip_text = strip_text[:len(strip_text)-1]
        context_lst.extend(json.loads(strip_text).get('items', []))
        if not total_records:
            total_records = json.loads(strip_text).get('totalItems', 0)
        _t = _t + 1
        time.sleep(2)

    return context_lst

开发者ID:seraphln，项目名称:wheel，代码行数:32，代码来源:crawler.py

示例7: submit

    def submit(self, opener, res):
        """submit WAYF form with IDP

        :param opener: the urllib2 opener
        :param data: the form data as a dictionary
        :param res: the response object

        """
        log.info("Submitting form to wayf")
        # Set IDP to correct IDP
        wayf_data = {}
        idp = self.idp
        data = self.data
        idps = {}
        for d in data["user_idp"]:
            if isinstance(data["user_idp"][d], dict):
                idps.update(data["user_idp"][d])
        if not idp.get_idp() in idps:
            raise WAYFException("Can't find IdP '%s' in WAYF's IdP list" % idp)
        wayf_data["user_idp"] = idps[idp.get_idp()]
        wayf_data["Select"] = "Select"
        if data["form"]["action"].startswith("?"):
            urlsp = urlparse.urlsplit(res.url)
            urlsp = urlparse.urlunsplit((urlsp[0], urlsp[1], urlsp[2], "", ""))
            url = res.url + data["form"]["action"]
        else:
            url = urlparse.urljoin(res.url, data["form"]["action"])
        data = urllib.urlencode(wayf_data)
        request = Request(url, data)
        log.debug("POST: %s" % request.get_full_url())
        response = opener.open(request)
        return request, response

开发者ID:russell，项目名称:sibboleth，代码行数:32，代码来源:forms.py

示例8: open_url

def open_url(url, **kwargs):
    """
    open_url(url, **kwargs) - open url and return file descriptor

    url - local file path or full url path. Allowed protocols are local file
    path, file, http and ftp

    kwargs - additional attributes according to protocol, 'mode' for local
    path and file protocol, 'proxy', 'data' and 'timeout' (Python >= 2.6)
    for http and ftp protocols

    Examples:

    open_url('/home/praetorian/secret.txt')
    open_url('file:///home/praetorian/secret.txt', mode='r')
    open_url('http://domain.tld/secret.txt', proxy='172:16:1:100:8000')
    open_url('ftp://domain.tld/secret.txt')
    """
    bits = urlparse.urlsplit(url)
    attrs = kwargs

    if bits.scheme in ('', 'file'):
        url = bits.netloc + bits.path
        opener = open
    elif bits.scheme in ('http', 'ftp'):
        handlers = []
        if 'proxy' in attrs:
            handlers.append(ProxyHandler({bits.scheme: attrs.pop('proxy')}))

        url =  bits.geturl()
        opener = build_opener(*handlers).open
    else:
        raise URLError("Unsupported protocol '%s'" % bits.scheme)

    return opener(url, **attrs)

开发者ID:centrumholdings，项目名称:yowie，代码行数:35，代码来源:fileutils.py

示例9: _do_request

    def _do_request(self, request_id, parameters={}):
        """
        """
        if request_id is None:
            # Generate a new request identifier using the class' default generator
            request_id = self.idgenerator.id()
        
        req_params = dict(parameters)
        req_params.update(dict(
            partner = self.partner,
            vendor = self.vendor,
            user = self.username,
            pwd = self.password,            
        ))
        
        parmlist = self._build_parmlist(req_params)
        
        headers = {
            'Host': urlparse.urlsplit(self.url_base)[1],
            'X-VPS-REQUEST-ID': str(request_id),
            'X-VPS-CLIENT-TIMEOUT': str(self.timeout), # Doc says to do this
            'X-VPS-Timeout': str(self.timeout), # Example says to do this
            'X-VPS-INTEGRATION-PRODUCT': self.CLIENT_IDENTIFIER,
            'X-VPS-INTEGRATION-VERSION': self.API_VERSION,
            'X-VPS-VIT-OS-NAME': sys.platform,
            'Connection': 'close',
            'Content-Type': 'text/namevalue',            
            }

        self.log.debug(u'Request Headers: %s' % headers)
            
        try_count = 0
        results = None
        while (results is None and try_count < self.MAX_RETRY_COUNT):
            try:
                try_count += 1
                request = Request(
                    url = self.url_base, 
                    data = parmlist.encode('utf-8'), 
                    headers = headers)
                    
                response = urlopen(request)
                result_parmlist = response.read()
                response.close()
                
                self.log.debug(
                    u'Result text: %s' % result_parmlist.decode('utf-8')
                )
                
                results = self._parse_parmlist(result_parmlist)
            except Exception, e:
                
                if try_count < self.MAX_RETRY_COUNT:
                    self.log.warn(
                        u'API request attempt %s of %s failed - %%s' % (
                            try_count, self.MAX_RETRY_COUNT), e
                        )
                else:
                    self.log.exception(u'Final API request failed - %s', e)
                    raise e

开发者ID:briang1，项目名称:python-payflowpro，代码行数:60，代码来源:client.py

示例10: version_matcher

 def version_matcher(self, url):
    	fname = os.path.basename(urlparse.urlsplit(url).path)
     version_match = re.search(r"([0-9]{2}.[0-9]{0,2}.[0-9]{0,2})", fname)
     if version_match == None:
         raise ProcessorError("Something went wrong matching FMP update to full version.")
     else:
         return version_match.group(1)

开发者ID:grahampugh，项目名称:recipes，代码行数:7，代码来源:FilemakerProAdvancedUpdateURLProcessor.py

示例11: victimise

def victimise(victim, uri):
    raw_url = victim + uri
    scheme, netloc, path, raw_query, fragment = urlparse.urlsplit(raw_url)
    query = urlparse.parse_qs(raw_query)
    url = urlparse.urlunsplit((scheme, netloc, path, urlencode(query, True), fragment))
    print url
    http_client.fetch(url, fetch, use_gzip=False)

开发者ID:rodders，项目名称:darklaunch，代码行数:7，代码来源:simplesender.py

示例12: download_metadata

def download_metadata(target_directory):
    """
    Downloads XML files for DOIs on stdin into given directory.
    """
    stderr.write('Input DOIs, delimited by whitespace: ')
    dois = stdin.read().split()
    if len(dois) == 0:
        raise RuntimeError, 'No DOIs found.'

    stderr.write('Getting PubMed Central IDs for given DOIs … ')
    pmcids = _get_pmcids_from_dois(dois)
    if len(pmcids) == 0:
        raise RuntimeError, 'No PubMed Central IDs for given DOIs found.'
    stderr.write('found: %s\n' % ', '.join(pmcids))

    url = _get_query_url_from_pmcids(pmcids)
    yield { 'url': url, 'completed': 0, 'total': 1 }

    url_path = urlparse.urlsplit(url).path
    local_filename = path.join(target_directory, \
        url_path.split('/')[-1])
    with open(local_filename, 'wb') as local_file:
        content = _get_file_from_pmcids(pmcids)
        local_file.write(content.read())
        yield { 'url': url, 'completed': 1, 'total': 1 }

开发者ID:npettiaux，项目名称:open-access-media-importer，代码行数:25，代码来源:pmc_doi.py

示例13: login_proceed

def login_proceed(request):
    """View that handles the successful login.
    """

    template_name = '_user_login.html'

    # Check if the request came from logout page, if so set
    # authentication to redirect to home page
    referer_path = urlparse.urlsplit(request.META['HTTP_REFERER'])[2]
    if referer_path == reverse('auth_logout'):
      response = {
        'authentication': 'success',
        'redirect': reverse('home_page'),
        }
    elif referer_path == reverse('registration_activation_complete'):
      response = {
        'authentication': 'success',
        'redirect': reverse('view_profile'),
        }
    else:
        response = {
          'authentication': 'success',
          'markup': loader.render_to_string(template_name,
                                            RequestContext(request, {}))
        }

    json_response = json.dumps(response)
    return http.HttpResponse(json_response)

开发者ID:arvindkhadri，项目名称:pytask，代码行数:28，代码来源:views.py

示例14: make_requests_from_url

    def make_requests_from_url(self, url):

        kw = self.macro.query(url)
        us = urlparse.urlsplit(url)
        qstr = dict(urlparse.parse_qsl(us.query))
        base = urlparse.urlunsplit(us._replace(query=''))
        meta = {'keyword':kw}
        return FormRequest(base, formdata=qstr, method=self.start_method, headers=self.headers, cookies=self.cookies, dont_filter=True, meta=meta)

开发者ID:BlankRain，项目名称:webbot，代码行数:8，代码来源:webbot_spider.py

示例15: testIndexRedirect

 def testIndexRedirect(self):
     if settings.SET_URL_ROOT_HANDLER:
         response = self.client.get('/')
         self.assertEquals(response.status_code, 302)
         # Documentation says that we must get response.headers, but
         # instead we have HttpResponseRedirect object here
         self.assertEquals(urlparse.urlsplit(response['Location'])[2],
                           '/' + settings.BLOG_URLCONF_ROOT)

开发者ID:gvidon，项目名称:blombum，代码行数:8，代码来源:tests.py

注：本文中的urllib2.urlparse.urlsplit函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。