Python urlparse.urljoin函数代码示例

本文整理汇总了Python中urllib2.urlparse.urljoin函数的典型用法代码示例。如果您正苦于以下问题：Python urljoin函数的具体用法？Python urljoin怎么用？Python urljoin使用的例子？那么, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了urljoin函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse_products

    def parse_products(self, response):
        print "parse_products", response.url
        sel = Selector(response)
        breadcrumb = sel.xpath('//div[contains(@class,"breadCrumb")]')
        categories = [span for span in breadcrumb.xpath(".//span[@itemprop='title']/text()").extract()[1:]]
        categories.append(breadcrumb.xpath(".//span/text()").extract()[-1])
        print categories
        
        for product in sel.xpath('//div[contains(@id,"quickLookItem")]'):
            # check if it is a multistore product
            if product.xpath('.//span[contains(@id, "numStoresQA")]'):
                print product.xpath(".//a/@href").extract()[0]
                url = product.xpath(".//a/@href").extract()[0]
                url = "/".join(url.split("/")[:-1])+"/prices"
                yield Request(urlparse.urljoin(response.url, url), callback=self.parse_multiple_store_product)
            else:
                # It is not a multistore product. Parse it.
                item = ShoppingdotcomItem()
                item["categories"] = categories
                item["product_name"] = product.xpath(".//span[contains(@id, 'nameQA')]/@title").extract()[0]
                if product.xpath(".//span[@class='placeholderImg']").extract():
                    item["image_urls"] = product.xpath(".//span[@class='placeholderImg']/text()").extract()
                else:
                    item["image_urls"] = product.xpath(".//div[@class='gridItemTop']//img/@src").extract()
                item["product_urls"] = [urlparse.urljoin(response.url, product.xpath(".//a/@href").extract()[0])]
                item["stores"] = product.xpath(".//a[@class='newMerchantName']/text()").extract()
                item["prices"] = [price.replace("\n","") for price in product.xpath(".//span[@class='productPrice']/a/text()").extract()]
                yield item

        # Check if Next page link is there then yeild request with next URL
        if sel.xpath("//a[@name='PLN']").extract():
            yield Request(urlparse.urljoin(response.url, sel.xpath("//a[@name='PLN']/@href").extract()[0]), self.parse_products)
            pass

开发者ID:Scorpio1987，项目名称:shoppingdotcom，代码行数:33，代码来源:shoppingdotcomspider.py

示例2: parse_start_url

 def parse_start_url(self, response):
     print response.url
     sel = Selector(response)
     
     for url in sel.xpath("//a"):
         #print url.xpath("@href").extract()
         href = url.xpath("@href").extract()[0] if url.xpath("@href").extract() else None
         if href and href.split("/")[-1] == "products":
             yield Request(urlparse.urljoin(response.url, href), callback=self.parse_products)
         if href and href.find("xFA-") >= 0:
             href = href.replace("xFA-", "").split("~")[0]+"/products"
             yield Request(urlparse.urljoin(response.url, href), callback=self.parse_products)
         pass

开发者ID:Scorpio1987，项目名称:shoppingdotcom，代码行数:13，代码来源:shoppingdotcomspider.py

示例3: main

def main(argv=sys.argv):
    """ Punto de entrada al programa """
    url = "http://www.vientonomade.com.ar/index.php?option=com_content&view=category&" "layout=blog&id=8&Itemid=10"
    fetcher = httplib2.Http()
    get = partial(obtener_pagina, fetcher)

    while url:
        html = get(url)
        uri, links = buscar_links(html)
        for link in links:
            try:
                print urlparse.urljoin(url, link)
            except UnicodeEncodeError:
                pass
        url = uri and urlparse.urljoin(url, uri) or None

开发者ID:D3f0，项目名称:vnomade，代码行数:15，代码来源:scrap.py

示例4: check_config

    def check_config():
        """
        Check crucial configuration details for existence and workability.

        Runs checks to see whether bugtracker's URL is reachable, whether
        backend is available at the right filename, and whether the script has
        the key arguments it needs to run: URL, backend, and database details.

        The filename for the backend in the backends/ directory needs to be the
        same as the configuration argument specifying that backend. For
        instance, invoking the Launchpad backend uses 'lp', and so the filename
        is 'lp.py'.
        """
        Config.check_params(['url', 'backend'])

        if Config.backend + ".py" not in Backend.get_all_backends():
            raise InvalidConfig('Backend "' + Config.backend + '" does not exist')

        url = urlparse.urlparse(Config.url)
        check_url = urlparse.urljoin(url.scheme + '://' + url.netloc, '')
        print("Checking URL: " + check_url)
        req = Request(check_url)

        if Config.backend != 'github':
            try:
                response = urlopen(req)
            except HTTPError, e:
                raise InvalidConfig('The server could not fulfill the request '
                                    + str(e.msg) + '(' + str(e.code) + ')')
            except URLError, e:
                raise InvalidConfig('We failed to reach a server. ' + str(e.reason))

开发者ID:davidziman，项目名称:Bicho，代码行数:31，代码来源:config.py

示例5: transform

def transform(row, table):
    'Transform row "link" into full URL and add "state" based on "name"'

    data = row._asdict()
    data['link'] = urlparse.urljoin('https://pt.wikipedia.org', data['link'])
    data['name'], data['state'] = regexp_city_state.findall(data['name'])[0]
    return data

开发者ID:abelthf，项目名称:rows，代码行数:7，代码来源:brazilian_cities_wikipedia.py

示例6: bot_send_video

def bot_send_video(gesture, video_url, video_preview_img, to_mid="u2ef38a8c1f3f1c2c63bdf9c0a629023c"):
    
    headers = {}
    headers['Content-type'] = 'application/json; charset=UTF-8'
    headers['X-Line-ChannelID'] = settings.CHANNEL_ID
    headers['X-Line-ChannelSecret'] = settings.CHANNEL_SECRET
    headers['X-Line-Trusted-User-With-ACL'] = settings.CHANNEL_MID

    api = 'https://trialbot-api.line.me/v1/events'

    body = {}
    body['to'] = [to_mid]
    body['toChannel'] = 1383378250
    body['eventType'] = "138311608800106203"

    #gesture = Gesture.objects.all()[0]
    myurl = 'https://eldertranslator.herokuapp.com/'
    video_url = urlparse.urljoin(myurl, gesture.video.url)

    content = {
        "contentType": 3,
        "toType": 1,
        "originalContentUrl": video_url,
        "previewImageUrl": video_preview_img 
    }

    body['content'] = content
    req = requests.post(api, data=json.dumps(body), headers=headers, verify=False)
    
    return req

开发者ID:rasca0027，项目名称:ElderTranslator，代码行数:30，代码来源:line.py

示例7: parse

    def parse(self, response):
        delinquent_link = Selector(response).xpath(
            '//*[@id="box1"]/td[1]/li/font/i/a/@href').extract()
        urllib.urlretrieve(urlparse.urljoin(response.url, delinquent_link[0]), 'delinquent.zip')
        unzip('delinquent.zip', 'delinquent')

        with open(glob.glob('delinquent/*.csv')[0], 'rb') as csvfile:
            csvreader = csv.reader(csvfile, delimiter=',')
            for idx, column in enumerate(csvreader.next()):
                column = re.sub('["]', "", column).strip()
                if column.startswith("PARCELID"):
                    parcelidcol = idx
                if column.startswith("OWNERNAME1"):
                    ownernamecol = idx
                if column.startswith("PARCELLOCATION"):
                    parcellocationcol = idx
                if column.startswith("CLS"):
                    parcelclass = idx
                if column.startswith("ASMTBLDG"):
                    buildingvalue = idx
            for row in csvreader:
                item = ReapItem()
                item['parcel_id'] = re.sub('["]', "", row[parcelidcol]).strip()
                item['parcel_location'] = row[parcellocationcol].strip()
                item['parcel_class'] = row[parcelclass].strip()
                item['building_value'] = row[buildingvalue].strip()
                request = scrapy.Request(
                    "http://mctreas.org/master.cfm?parid={0}&taxyr={1}&own1={2}".format(
                        item['parcel_id'], str(YEAR), row[ownernamecol]),
                    callback=self.get_tax_eligibility)
                request.meta['item'] = item
                yield request

开发者ID:AndrewADev，项目名称:scrapers，代码行数:32，代码来源:reap_spider.py

示例8: parse

def parse(url, body, **kwargs):
    for line in body.decode('gbk', errors='ignore').splitlines():
        if line.lstrip().startswith('var docData'):
            l, r = line.find('{'), line.rfind('}')
            obj = json.loads(line[l:r+1])
            doc = obj['result']['docinfo'][0]['foolrinfo']
            doc['title'] = obj['result']['sDocTitle']
            doc['url'] = urlparse.urljoin('http://www.xici.net', obj['result']['strPageUrl'])
            doc['date'] = '20'+doc['LongDate']
            doc['content'] = html.fromstring(doc['floorcontent']).text_content()

            tpl = Template('''
                <html>
                <head>
                    <meta content="text/html; charset=utf-8" http-equiv="content-type">
                    <title>{{doc['title']}}</title>
                </head>
                <body>
                    <a id="title" href="{{doc['url']}}">{{doc['title']}}</a>
                    <p id="date">{{doc['date']}}</p>
                    <div id="content">{{doc['content']}}</div>
                </body>
                </html>''')

            return tpl.render(doc=doc).encode('gbk', errors='ignore')
    else:
        return '<html/>'

开发者ID:UncleJim，项目名称:project，代码行数:27，代码来源:xici_plugin.py

示例9: submit

    def submit(self, opener, res):
        """submit login form to COSign IdP

        :param opener: the urllib2 opener
        :param data: the form data
           as a dictionary :param res: the response object :param cm: a
           :class:`~slick.passmgr.CredentialManager` containing the URL
           to the service provider you want to connect to

        """
        idp_data = {}
        cm = self.cm
        data = self.data
        url = urlparse.urljoin(res.url, data["form"]["action"])
        log.info("Form Authentication from: %s" % url)
        idp_data[self.username_field] = cm.get_username()
        idp_data[self.password_field] = cm.get_password()
        idp_data["service"] = data["service"]["value"]
        idp_data["ref"] = data["ref"]["value"]
        data = urllib.urlencode(idp_data)
        request = Request(url, data=data)
        log.info("Submitting login form")
        log.debug("POST: %s" % request.get_full_url())
        response = opener.open(request)
        return request, response

开发者ID:russell，项目名称:sibboleth，代码行数:25，代码来源:forms.py

示例10: submit

    def submit(self, opener, res):
        """submit WAYF form with IDP

        :param opener: the urllib2 opener
        :param data: the form data as a dictionary
        :param res: the response object

        """
        log.info('Submitting form to wayf')
        #Set IDP to correct IDP
        wayf_data = {}
        idp = self.idp
        data = self.data
        if not idp.get_idp() in data['origin']:
            raise WAYFException(
                "Can't find IdP '{0}' in WAYF's IdP list".format(
                    idp.get_idp()))
        wayf_data['origin'] = data['origin'][idp.get_idp()]
        wayf_data['shire'] = data['shire']['value']
        wayf_data['providerId'] = data['providerId']['value']
        wayf_data['target'] = data['target']['value']
        wayf_data['time'] = data['time']['value']
        wayf_data['cache'] = 'false'
        wayf_data['action'] = 'selection'
        url = urlparse.urljoin(res.url, data['form']['action'])
        data = urllib.urlencode(wayf_data)
        request = Request(url + '?' + data)
        log.debug("POST: %s" % request.get_full_url())
        response = opener.open(request)
        return request, response

开发者ID:grith，项目名称:sibboleth，代码行数:30，代码来源:forms.py

示例11: install_artifacts

def install_artifacts(artifacts, dirstruct, installdir, basestaticurl):
    """
    Install the artifacts.
    """
    assert basestaticurl.endswith("/"), "Basestaticurl should end with /"
    installed = []
    for reldir, artifactnames in dirstruct.items():
        destdir = os.path.join(installdir, reldir)
        if not os.path.exists(destdir):
            log.warn(msg="Making install directory %s" % destdir)
            os.makedirs(destdir)
        else:
            assert os.path.isdir(destdir)
        for artifactname in artifactnames:
            destpath = os.path.abspath(os.path.join(destdir, artifactname))
            if artifactname in artifacts.keys():
                # The artifact must be loaded from jenkins
                theartifact = artifacts[artifactname]
            else:
                # It's probably a static file, we can get it from the static collection
                staticurl = urlparse.urljoin(basestaticurl, artifactname)
                theartifact = Artifact(artifactname, staticurl)
            theartifact.save(destpath)
            installed.append(destpath)
    return installed

开发者ID:wgaggioli，项目名称:jenkinsapi，代码行数:25，代码来源:api.py

示例12: notify

    def notify(cls, alert, *args, **kwargs):

        current_span = extract_span_from_kwargs(**kwargs)

        url = cls._config.get('notifications.hipchat.url')
        token = kwargs.get('token', cls._config.get('notifications.hipchat.token'))
        repeat = kwargs.get('repeat', 0)
        notify = kwargs.get('notify', False)
        alert_def = alert['alert_def']
        message_format = kwargs.get('message_format', 'html')

        current_span.set_tag('alert_id', alert_def['id'])

        entity = alert.get('entity')
        is_changed = alert.get('alert_changed', False)
        is_alert = alert.get('is_alert', False)

        current_span.set_tag('entity', entity['id'])
        current_span.set_tag('alert_changed', bool(is_changed))
        current_span.set_tag('is_alert', is_alert)

        current_span.log_kv({'room': kwargs.get('room')})

        color = 'green' if alert and not alert.get('is_alert') else kwargs.get('color', 'red')

        message_text = cls._get_subject(alert, custom_message=kwargs.get('message'))

        if kwargs.get('link', False):
            zmon_host = kwargs.get('zmon_host', cls._config.get('zmon.host'))
            alert_id = alert['alert_def']['id']
            alert_url = urlparse.urljoin(zmon_host, '/#/alert-details/{}'.format(alert_id)) if zmon_host else ''
            link_text = kwargs.get('link_text', 'go to alert')
            if message_format == 'html':
                message_text += ' -- <a href="{}" target="_blank">{}</a>'.format(alert_url, link_text)
            else:
                message_text += ' -- {} - {}'.format(link_text, alert_url)

        message = {
            'message': message_text,
            'color': color,
            'notify': notify,
            'message_format': message_format
        }

        try:
            logger.info(
                'Sending to: ' + '{}/v2/room/{}/notification?auth_token={}'.format(url, urllib.quote(kwargs['room']),
                                                                                   token) + ' ' + json.dumps(message))
            r = requests.post(
                '{}/v2/room/{}/notification'.format(url, urllib.quote(kwargs['room'])),
                json=message, params={'auth_token': token}, headers={'Content-type': 'application/json'})
            r.raise_for_status()
        except Exception:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': traceback.format_exc()})
            logger.exception('Hipchat write failed!')

        return repeat

开发者ID:drummerwolli，项目名称:zmon-worker，代码行数:58，代码来源:hipchat.py

示例13: show

 def show(self):
     slcs_login_url = urlparse.urljoin(self.settings.slcs, 'login')
     idp_keys = list_idps(slcs_login_url).keys()
     idp_keys.sort()
     for i in idp_keys:
         self.idps.append_text(i)
         if i == self.settings.idp:
             self.idps.set_active(len(self.idps.get_model())-1)
     self.window.show_all()

开发者ID:grith，项目名称:slick.gui，代码行数:9，代码来源:main.py

示例14: relative_to_full_url

def relative_to_full_url(original_url, url):
    """
    Resolve the URL based on the original_url
    """
    from urllib2 import urlparse
    parsed = urlparse.urlparse(url)
    if not parsed.netloc:
        url = urlparse.urljoin(original_url, parsed.path)
    return url

开发者ID:callowayproject，项目名称:django-vintage，代码行数:9，代码来源:archiveurl.py

示例15: urlIterator

def urlIterator(startUrl, nextCssSelector):
    '''Yields the url of a page while there is a next one found by the cssSelector'''
    #This function takes time because it has to parse the dom to get the next url
    url = startUrl
    while url:
        yield url
        nextTags = getElementsFromUrl(url, nextCssSelector)
        url = None

        for possibleNext in nextTags:
            if possibleNext.tag == 'a':
                href = possibleNext.get('href')
                # Absolute href
                url = urlparse.urljoin(startUrl, href)
                break
            else:
                newTag = possibleNext.find('a')
                if newTag != None:
                    href = newTag.get('href')
                    url = urlparse.urljoin(startUrl, href)
                    break

开发者ID:niroyb，项目名称:ScrapMoodle，代码行数:21，代码来源:scraptools.py

注：本文中的urllib2.urlparse.urljoin函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。