当前位置: 首页>>代码示例>>Python>>正文


Python urlparse.urlparse函数代码示例

本文整理汇总了Python中urlparse.urlparse.urlparse函数的典型用法代码示例。如果您正苦于以下问题:Python urlparse函数的具体用法?Python urlparse怎么用?Python urlparse使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了urlparse函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: searchcrawler

def searchcrawler(url,keyword=''):
    """
    tb搜索页爬虫
    """
    html=get_html(url)
    #print html
    if html:
        soup = BeautifulSoup(html,fromEncoding='gbk')
        items_row = soup.findAll('div',{'class':'row item icon-datalink'})
        if items_row:
            print '=======================row search row=========================='
            #print items
            for item in items_row:
                item_info = item.find('div',{'class':'col title'}).h3.a
                item_url = item_info['href']
                url_info = urlparse.urlparse(item_url)
                item_id = urlparse.parse_qs(url_info.query,True)['id'][0]
                print item_url
                print item_id
                judge_site(item_url,keyword)
        items_col = soup.findAll('div',{'class':'col item icon-datalink'})
        if items_col:
            print '=======================row search col=========================='
            #print items
            for item in items_col:
                item_info = item.find('div',{'class':'item-box'}).h3.a
                item_url = item_info['href']
                url_info = urlparse.urlparse(item_url)
                item_id = urlparse.parse_qs(url_info.query,True)['id'][0]
                print item_url
                print item_id
                judge_site(item_url,keyword)
开发者ID:chu888chu888,项目名称:Crawler-python-tbcrawler,代码行数:32,代码来源:crawler.py

示例2: rendered_wall_posts

def rendered_wall_posts( wall_posts ):
	for wall_post in wall_posts:
		title = ''
		desc = ''
		site_image = ''
		article_title = ''
		urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[[email protected]&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', wall_post.data['post_content'])
		for url in urls: 
			parse_obj = urlparse.urlparse(url)
			site = parse_obj.netloc
			path = parse_obj.path
			conn = httplib.HTTPConnection(site)
			conn.request('HEAD',path)
			response = conn.getresponse()
			conn.close()
			ctype = response.getheader('Content-Type')
			if response.status < 400 and ctype.startswith('image'):
				wall_post.data['post_content'] = wall_post.data['post_content']+"<br/><a href='"+url+"' target='_blank'><img width=300 src='"+url+"' target = '_blank'/></a>"
			else:
				og = opengraph.OpenGraph(url)
				if not len(og.items()) == 2:
					for x,y in og.items():
						if x == 'type' and y == 'video':
							for k,l in og.items():
								if k == 'site_name' and l == 'YouTube':
							
									url_data = urlparse.urlparse(url)
									query = urlparse.parse_qs(url_data.query)
									video = query["v"][0]
									wall_post.data['post_content'] = wall_post.data['post_content'].replace(url,"")+"<br/><iframe width='300' height='200' src='//www.youtube.com/embed/"+video+"' frameborder='0' allowfullscreen></iframe>"
								elif k == 'site_name' and l == 'Vimeo':
									url_data = urlparse.urlparse(url)
									video = url_data.path
									wall_post.data['post_content'] = wall_post.data['post_content'].replace(url,"")+"<br/><iframe src='//player.vimeo.com/video"+video+"' width='300' height='200' frameborder='0' webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe> <p></p>"
						elif x == 'type' and y == 'article':
							for k,l in og.items():
								if k == 'title':
									article_title = l
								elif k == 'site_name':
									title = l
								elif k=='description':
									desc = l
								elif k=='image':
									site_image = l
							wall_post.data['post_content'] = wall_post.data['post_content'] +"<br/><table><tr><td><img width='50' src='"+site_image+"'</td><td><a href='"+url+"' target='_blank'/>"+article_title+"</a><br/>"+title+"</td></td></table>"
						elif x=='type':
							for k,l in og.items():
								if k == 'site_name':
									title = l
								elif k=='description':
									desc = l
								elif k=='image':
									site_image = l
							wall_post.data['post_content'] = wall_post.data['post_content'].replace(url, "<table><tr><td><img width='50' src='"+site_image+"'</td><td><a href='"+url+"' target='_blank'/>"+title+"</a><br/>"+desc+"</td></td></table>")
				else:
					wall_post.data['post_content'] = wall_post.data['post_content'].replace(url, "<a href='"+url+"' target='_blank'>"+url+"</a>")	
	return wall_posts	
开发者ID:dithua,项目名称:collato,代码行数:57,代码来源:views.py

示例3: searchcrawler

def searchcrawler(url):
    
    html=get_html(url)
#     print url
    if html:
        soup = BeautifulSoup(html,fromEncoding='gbk')
        items_row = soup.findAll('div',{'class':'item-box st-itembox'})
        if items_row:
            print '=======================row search row=========================='
            for item in items_row:
#                 print item
                item_info = item.find('h3',{'class':'summary'}).a
                item_url = item_info['href']
#                 print item_url
                
                
                sid_info = item.find('div',{'class':'col seller feature-dsi-tgr'}).a
                print sid_info
                sid_item_url = sid_info['href']
                sid_url_info = urlparse.urlparse(sid_item_url)
                sid_id = urlparse.parse_qs(sid_url_info.query,True)['user_number_id'][0]
                print sid_id
                
                judge_site(item_url, sid_id)
                
#                 logging.warning(item_id)
#                 
#                 download_reply_by_id(item_id)
                
        items_col = soup.findAll('div',{'class':'product-item row icon-datalink'})       
        if items_col:
            
            print '=======================row search col=========================='
            #print items
            for item in items_col:
                item_info = item.find('div',{'class':'title'}).a
                item_url = item_info['href']
#                 url_info = urlparse.urlparse(item_url)
#                 item_id = urlparse.parse_qs(url_info.query,True)['id'][0]
                print item_url
#                 print item_id

                sid_info = item.find('div',{'class':'seller'}).a
                print sid_info
                sid_item_url = sid_info['href']
                sid_url_info = urlparse.urlparse(sid_item_url)
                sid_id = urlparse.parse_qs(sid_url_info.query,True)['user_number_id'][0]
                print sid_id
                
                judge_site(item_url, sid_id)
开发者ID:fubendong,项目名称:wangw,代码行数:50,代码来源:tb.py

示例4: post

 def post(self):
   try:
     name = self.request.POST['name']
     topic = MicroTopic.all().filter('name =', name).get()
     if not topic:
       raise ReatiweError("Topic %s does not exists." % name)
     if self.request.POST['mode']:
       mode = self.request.POST['mode']
     else:
       mode = "subscribe"
     form_fields = { "hub.mode": mode,
                     "hub.callback": "%s/callback/%s" % (settings.SITE_URL, topic.name),
                     "hub.topic": topic.url,
                     "hub.verify": "sync",
                     "hub.verify_token": topic.name }
     result = 200
     url = self.request.POST['hub']
     req = urllib2.Request(url, urllib.urlencode(form_fields))
     o = urlparse.urlparse(url)
     # superfeedr support
     if o.username and o.password:
       base64string = base64.encodestring('%s:%s' % (o.username, o.password))[:-1]
       authheader =  "Basic %s" % base64string
       new_url = "%s://%s%s" % (o.scheme, o.hostname, o.path)
       req = urllib2.Request(new_url, urllib.urlencode(form_fields))
       req.add_header("Authorization", authheader)
     urllib2.urlopen(req)
   except DownloadError, e:
     logging.error('DownloadError: %s' % repr(e))
     pass
开发者ID:zh,项目名称:ReaTiWe,代码行数:30,代码来源:webhooks.py

示例5: searchcrawler

def searchcrawler(url):
    
    html=get_html(url)
#     print url
    if html:
        soup = BeautifulSoup(html,fromEncoding='gbk')
        items_row = soup.findAll('div',{'class':'product-iWrap'})
        #items_row = soup.find('div',{'class':'item-box st-itembox'})
#         print items_row
        if items_row:
            print '=======================row search row=========================='
            for item in items_row:
#                 print item
                try:
                    item_info = item.find('p',{'class':'productTitle'}).a
                except:
                    item_info = item.find('div',{'class':'productTitle productTitle-spu'}).a
                
#                 print item_info
                item_url = item_info['href']
#                 print item_url
                
                url_info = urlparse.urlparse(item_url)
                item_id = urlparse.parse_qs(url_info.query,True)['id'][0]
                print item_id
                logging.warning(item_id)
                
#                 item_id = 16862466992
                download_reply_by_id(item_id)
开发者ID:fubendong,项目名称:test,代码行数:29,代码来源:phone_get.py

示例6: gensitemap

def gensitemap(server, urlformat):
    '''
    Crea la ruta del índice de sitemap para el servidor de archivos dado.
    Se conecta a los índices de segundo nivel y obtiene su fecha de modificación.

    @type server: dict-like
    @param server: Documento del servidor tal cual viene de MongoDB

    @rtype tuple (str, datetime) o None
    @return tupla con la url y su fecha de modificación, o None si no se puede
            obtener la url.
    '''
    subdomain = server["ip"].split(".")[0]
    serverno = int(subdomain[6:])
    url = urlformat % serverno
    domain = urlparse.urlparse(url)[1]
    con = httplib.HTTPConnection(domain)
    con.request("HEAD", url)
    response =  con.getresponse()

    if response.status == 200:
        mtime = time.mktime(time.strptime(
           response.getheader("last-Modified"),
            "%a, %d %b %Y %H:%M:%S %Z"))
        return (url, datetime.datetime.fromtimestamp(mtime))

    return None
开发者ID:kultus,项目名称:foofind-web,代码行数:27,代码来源:index.py

示例7: startupagent

    def startupagent(self, sender, **kwargs):

        if not self.bind_web_address:
            _log.info('Web server not started.')
            return
        import urlparse
        parsed = urlparse.urlparse(self.bind_web_address)
        hostname = parsed.hostname
        port = parsed.port

        _log.info('Starting web server binding to {}:{}.' \
                   .format(hostname, port))
        self.registeredroutes.append((re.compile('^/discovery/$'), 'callable',
                                      self._get_discovery))
        self.registeredroutes.append((re.compile('^/discovery/allow$'),
                                      'callable',
                                      self._allow))
        self.registeredroutes.append((re.compile('^/$'), 'callable',
                                      self._redirect_index))
        port = int(port)
        vhome = os.environ.get('VOLTTRON_HOME')
        logdir = os.path.join(vhome, "log")
        if not os.path.exists(logdir):
            os.makedirs(logdir)

        self.appContainer = WebApplicationWrapper(self, hostname, port)
        svr = WSGIServer((hostname, port), self.appContainer)
        self._server_greenlet = gevent.spawn(svr.serve_forever)
开发者ID:schandrika,项目名称:volttron,代码行数:28,代码来源:web.py

示例8: fps_ipn_handler

    def fps_ipn_handler(self, request):
        uri = request.build_absolute_uri()
        parsed_url = urlparse.urlparse(uri)
        resp = self.fps_connection.verify_signature(UrlEndPoint="%s://%s%s" % (parsed_url.scheme,
                                                                  parsed_url.netloc,
                                                                  parsed_url.path),
                                                    HttpParameters=request.body)
        if not resp.VerifySignatureResult.VerificationStatus == "Success":
            return HttpResponseForbidden()

        data = dict(map(lambda x: x.split("="), request.body.split("&")))
        for (key, val) in data.items():
            data[key] = urllib.unquote_plus(val)
        if AmazonFPSResponse.objects.filter(transactionId=data["transactionId"]).count():
            resp = AmazonFPSResponse.objects.get(transactionId=data["transactionId"])
        else:
            resp = AmazonFPSResponse()
        for (key, val) in data.items():
            attr_exists = hasattr(resp, key)
            if attr_exists and not callable(getattr(resp, key, None)):
                if key == "transactionDate":
                    val = datetime.datetime(*time.localtime(float(val))[:6])
                setattr(resp, key, val)
        resp.save()
        if resp.statusCode == "Success":
            transaction_was_successful.send(sender=self.__class__,
                                            type=data["operation"],
                                            response=resp)
        else:
            if not "Pending" in resp.statusCode:
                transaction_was_unsuccessful.send(sender=self.__class__,
                                                  type=data["operation"],
                                                  response=resp)
        # Return a HttpResponse to prevent django from complaining
        return HttpResponse(resp.statusCode)
开发者ID:BrajeshKhare,项目名称:merchant,代码行数:35,代码来源:amazon_fps_integration.py

示例9: startupagent

    def startupagent(self, sender, **kwargs):

        if not self.bind_web_address:
            _log.info('Web server not started.')
            return
        import urlparse
        parsed = urlparse.urlparse(self.bind_web_address)
        hostname = parsed.hostname
        port = parsed.port

        _log.info('Starting web server binding to {}:{}.' \
                   .format(hostname, port))
        self.registeredroutes.append((re.compile('^/discovery/$'), 'callable',
                                      self._get_discovery))
        self.registeredroutes.append((re.compile('^/discovery/allow$'),
                                      'callable',
                                      self._allow))
        self.registeredroutes.append((re.compile('^/$'), 'callable',
                                      self._redirect_index))
        port = int(port)
        vhome = os.environ.get('VOLTTRON_HOME')
        logdir = os.path.join(vhome, "log")
        if not os.path.exists(logdir):
            os.makedirs(logdir)
        with open(os.path.join(logdir, 'web.access.log'), 'wb') as accesslog:
            with open(os.path.join(logdir, 'web.error.log'), 'wb') as errlog:
                server = pywsgi.WSGIServer((hostname, port), self.app_routing,
                                       log=accesslog, error_log=errlog)
                server.serve_forever()
开发者ID:cbs-iiith,项目名称:volttron,代码行数:29,代码来源:web.py

示例10: judge_site

def judge_site(url,keyword=''):
    """
    判断物品是tb还是tm
    """
    url_info = urlparse.urlparse(url)
    urlkey = urlparse.parse_qs(url_info.query,True)
    iid = int(urlkey['id'][0])
    #print 'url_info:',url_info[1]
    try:
        if url_info[1] == 'detail.tmall.com':
            print 'it is a tm item'
            if check_item_update_time(iid,'tm'):
                return
            data = getTmallItemInfo(iid,keyword)
        elif urlkey.get('cm_id'):
            print 'it is a tm item'
            if check_item_update_time(iid,'tm'):
                return
            data = getTmallItemInfo(iid,keyword)
        else:
            print 'it is a tb item'
            if check_item_update_time(iid,'tb'):
                return
            data = getTaobaoItemInfo(iid,keyword)
    except Exception ,e:
        print traceback.print_exc()
        return
开发者ID:chu888chu888,项目名称:Crawler-python-tbcrawler,代码行数:27,代码来源:crawler.py

示例11: judge_site

def judge_site(url, sid_id):
    """
    判断物品是tb还是tm
    """
    url_info = urlparse.urlparse(url)
    urlkey = urlparse.parse_qs(url_info.query,True)
    iid = int(urlkey['id'][0])
    print iid
#     print 'url_info:',url_info[1]
    try:
        if url_info[1] == 'detail.tmall.com':
            print 'it is a tm item'
            
#             data = download_tm_reply_by_id(iid)
        elif urlkey.get('cm_id'):
            print 'it is a tm item cm_id'
            
#             data = download_tm_reply_by_id(iid)
        else:
            print 'it is a tb item'
            
            data = download_tb_reply_by_id(iid, sid_id)
    except Exception ,e:
        print traceback.print_exc()
        return
开发者ID:fubendong,项目名称:wangw,代码行数:25,代码来源:tb.py

示例12: is_local_service

def is_local_service(name):
    """
    Determine if a service definition describes a service running on
    the local node. This is true if the service URL is for localhost,
    matches the machine's name, or ec2 public name
    """
    if name is None:
        return False
    if "://" in name:
        url = urlparse.urlparse(name)
        if ":" in url.netloc:
            name = url.netloc.split(":")[0]
        else:
            name = url.netloc
    elif ":" in name:
        name = name.split(":")[0]

    if name == "localhost":
        return True

    if '.' in name:
        name = name.split('.')[0]
    node = platform.node()
    if '.' in node:
        node = node.split('.')[0]

    if name == node:
        return True
    pn = public_name()
    if pn is not None and pn.split(".")[0] == name:
        return True
    return False
开发者ID:bbockelm,项目名称:globus-toolkit,代码行数:32,代码来源:__init__.py

示例13: wait_for_servers

def wait_for_servers(urls, timeout):
    import time, urlparse, httplib
    from ssl import SSLError
    
    for u in urls:
        parsed = urlparse.urlparse(u.lower(), "https")
        netloc = parsed.hostname
        if parsed.port: netloc = "%s:%s" % (netloc, parsed.port)
        if parsed.scheme == "http":
            cnxn = httplib.HTTPConnection(netloc)
        elif parsed.scheme == "https":
            cnxn = httplib.HTTPSConnection(netloc)
        else:
            raise Exception("Don't know how to handle scheme %s" % parsed.scheme)
        i = 0
        while(i < timeout):
            try:
                cnxn.connect()
            except SSLError:
                break;
            except Exception as e:
                if "Connection refused" in str(e):
                    time.sleep(1)
                    i = i - 1
                elif "SSL" in str(e):
                    break
                else:
                    raise
            else:
                break
开发者ID:CarolinaFernandez,项目名称:ocf-expedient-ro,代码行数:30,代码来源:utils.py

示例14: _extracturls

 def _extracturls(self):
     #print "Extract URLs"
     urls = []
     htmlsrc, charset, parenturl = self.htmlSrcTuple
     if htmlsrc != None:
         resulturls = []
         urlExtractor = ExtractLinks(resulturls)
         try:
             if charset == None:
                 urlExtractor.feed(htmlsrc)
             else:
                 urlExtractor.feed(htmlsrc.decode(charset))
         except HTMLParser.HTMLParseError:
             pass
         try:
             urlExtractor.reset() # I think close needs special treatment .close()
         except HTMLParser.HTMLParseError:
             urlExtractor.reset()
         #this piece of code forms the URIs to full URLs by joining the
         #parenturl with the network location free URLs extracted
         for i in xrange(len(resulturls)): #replacing range() for performance reasons
             urlres = urlparse.urlparse(resulturls[i], "http")
             if urlres.netloc == "":
                 resulturls[i] = urlparse.urljoin(parenturl, resulturls[i])
             urls.extend(resulturls)
     return urls
开发者ID:dpritsos,项目名称:Synergy-Crawler,代码行数:26,代码来源:linkextractors.py

示例15: getParams

def getParams(path):
    query = urlparse.urlparse(path).query
    queryDict = dict([x.split('=') for x in query.split('&')])

    width = queryDict['WIDTH']
    height = queryDict['HEIGHT']
    bbox = queryDict['BBOX']
    return Params(int(width), int(height), map(float, bbox.split(',')))
开发者ID:gumik,项目名称:google-maps-wms,代码行数:8,代码来源:google_maps_wms.py


注:本文中的urlparse.urlparse.urlparse函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。