当前位置: 首页>>代码示例>>Python>>正文


Python MozillaCookieJar.load方法代码示例

本文整理汇总了Python中cookielib.MozillaCookieJar.load方法的典型用法代码示例。如果您正苦于以下问题:Python MozillaCookieJar.load方法的具体用法?Python MozillaCookieJar.load怎么用?Python MozillaCookieJar.load使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在cookielib.MozillaCookieJar的用法示例。


在下文中一共展示了MozillaCookieJar.load方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: GetWithCookie

# 需要导入模块: from cookielib import MozillaCookieJar [as 别名]
# 或者: from cookielib.MozillaCookieJar import load [as 别名]
	def GetWithCookie( url, cookie_name, data = '', retry = 3):
		global PATH_TMP, ACGINDEX_UA

		try:
			cj = MozillaCookieJar( PATH_TMP + cookie_name )

			try :
				cj.load( PATH_TMP + cookie_name )
			except:
				pass # 还没有cookie只好拉倒咯

			ckproc = urllib2.HTTPCookieProcessor( cj )

			AmagamiSS = urllib2.build_opener( ckproc )
			AmagamiSS.addheaders = [ ACGINDEX_UA ]

			if data != '':
				request = urllib2.Request( url = url, data = data )
				res = AmagamiSS.open( request )
				cj.save() # 只有在post时才保存新获得的cookie
			else:
				res = AmagamiSS.open( url )

			return Haruka.GetContent( res )

		except:
			# 这里有3次重新连接的机会,3次都超时就跳过
			if retry > 0 : 
				return Haruka.GetWithCookie( url, cookie_name, data , retry-1 )
			else:
				return False
开发者ID:chitosai,项目名称:acgindex,代码行数:33,代码来源:utility.py

示例2: _get_cookie_headers

# 需要导入模块: from cookielib import MozillaCookieJar [as 别名]
# 或者: from cookielib.MozillaCookieJar import load [as 别名]
 def _get_cookie_headers(cls):
     jar = MozillaCookieJar(config.netflix.cookies_path)
     jar.load()
     cookies = []
     for line in jar:
         cookies.append('='.join((line.name, line.value)))
     return cookies
开发者ID:mrowl,项目名称:filmdata,代码行数:9,代码来源:netflix.py

示例3: __init__

# 需要导入模块: from cookielib import MozillaCookieJar [as 别名]
# 或者: from cookielib.MozillaCookieJar import load [as 别名]
class RDWorker:
    """
    Worker class to perform Real-Debrid related actions:
    - format login info so they can be used by Real-Debrid
    - login
    - unrestricting links
    - keeping cookies
    """

    _endpoint = 'http://www.real-debrid.com/ajax/%s'

    def __init__(self, cookie_file):
        self._cookie_file = cookie_file
        self.cookies = MozillaCookieJar(self._cookie_file)

    def login(self, username, password_hash):
        """
        Log into Real-Debrid. password_hash must be a MD5-hash of the password string.
        :param username:
        :param password_hash:
        :return: :raise:
        """
        if path.isfile(self._cookie_file):
            self.cookies.load(self._cookie_file)

            for cookie in self.cookies:
                if cookie.name == 'auth' and not cookie.is_expired():
                    return  # no need for a new cookie

        # request a new cookie if no valid cookie is found or if it's expired
        opener = build_opener(HTTPCookieProcessor(self.cookies))
        try:
            response = opener.open(self._endpoint % 'login.php?%s' % urlencode({'user': username, 'pass': password_hash}))
            resp = load(response)
            opener.close()

            if resp['error'] == 0:
                self.cookies.save(self._cookie_file)
            else:
                raise LoginError(resp['message'].encode('utf-8'), resp['error'])
        except Exception as e:
            raise Exception('Login failed: %s' % str(e))

    def unrestrict(self, link, password=''):
        """
        Unrestrict a download URL. Returns tuple of the unrestricted URL and the filename.
        :param link: url to unrestrict
        :param password: password to use for the unrestriction
        :return: :raise:
        """
        opener = build_opener(HTTPCookieProcessor(self.cookies))
        response = opener.open(self._endpoint % 'unrestrict.php?%s' % urlencode({'link': link, 'password': password}))
        resp = load(response)
        opener.close()

        if resp['error'] == 0:
            info = resp['generated_links'][0]
            return info[2], info[0].replace('/', '_')
        else:
            raise UnrestrictionError(resp['message'].encode('utf-8'), resp['error'])
开发者ID:MrMitch,项目名称:realdebrid-CLI,代码行数:62,代码来源:RDWorker.py

示例4: __init__

# 需要导入模块: from cookielib import MozillaCookieJar [as 别名]
# 或者: from cookielib.MozillaCookieJar import load [as 别名]
    def __init__(self, mobile, password=None, status='0',
        cachefile='Fetion.cache', cookiesfile=''):
        '''登录状态:
        在线:400 隐身:0 忙碌:600 离开:100
        '''
        if cachefile:
            self.cache = Cache(cachefile)

        if not cookiesfile:
            cookiesfile = '%s.cookies' % mobile
            
        # try:
            # with open(cookiesfile, 'rb') as f:
                # cookie_processor = load(f)
        # except:
            # cookie_processor = HTTPCookieProcessor(CookieJar())            
        cookiejar = MozillaCookieJar(filename=cookiesfile)
        try:
          f=open(cookiesfile)
        except IOError:
          f=open(cookiesfile,'w')  
          f.write(MozillaCookieJar.header)
        finally:
          f.close()                  
        cookiejar.load(filename=cookiesfile)  
        cookie_processor = HTTPCookieProcessor(cookiejar)        
        self.opener = build_opener(cookie_processor,
            HTTPHandler)
        self.mobile, self.password = mobile, password
        if not self.alive():
            if self._login(): cookiejar.save()

        #dump(cookie_processor, open(cookiesfile, 'wb'))        
        self.changestatus(status)
开发者ID:falconchen,项目名称:PyWapFetion,代码行数:36,代码来源:Fetion.py

示例5: __init__

# 需要导入模块: from cookielib import MozillaCookieJar [as 别名]
# 或者: from cookielib.MozillaCookieJar import load [as 别名]
    def __init__(self, mobile, password=None, status='0',
        cachefile='Fetion.cache', cookiesfile=''):
        '''登录状态:
        在线:400 隐身:0 忙碌:600 离开:100
        '''
        if cachefile:
            self.cache = Cache(cachefile)

        if not cookiesfile:
            cookiesfile = '%s.cookies' % mobile

        cookiejar = MozillaCookieJar(filename=cookiesfile)
        if not os.path.isfile(cookiesfile):
            open(cookiesfile, 'w').write(MozillaCookieJar.header)

        cookiejar.load(filename=cookiesfile)

        cookie_processor = HTTPCookieProcessor(cookiejar)

        self.opener = build_opener(cookie_processor,
            HTTPHandler)
        self.mobile, self.password = mobile, password
        if not self.alive():
            self._login()
            cookiejar.save()

        self.changestatus(status)
开发者ID:GitHublong,项目名称:PyWapFetion,代码行数:29,代码来源:Fetion.py

示例6: WebBrowser

# 需要导入模块: from cookielib import MozillaCookieJar [as 别名]
# 或者: from cookielib.MozillaCookieJar import load [as 别名]
class WebBrowser(object):
    '''mantiene en memoria las cookies, emulando un navegador
       *actualmente no ejecuta javascript'''
    def __init__(self, uAgent=None, headers=None):
        '''uAgent es el agente de usuario'''
        self.cookie_j = MozillaCookieJar()
        if uAgent is None:
            uAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36'
        self.opener = build_opener(HTTPCookieProcessor(self.cookie_j))
        self.user_agent = uAgent
        self.opener.addheaders = [('User-Agent', self.user_agent)]
        # self.session = requests.Session()
        # self.session.headers.update({ 'User-Agent': uAgent })
        # self.session.max_redirects = 20
        self.timeout = 25
        socket.setdefaulttimeout(self.timeout)

    def newtree(f):
        return lambda *a, **k: etree.parse(f(*a, **k), parser=etree.HTMLParser())

    @newtree
    def fetch(self, url, data=None, headers=None, method='POST'):
        '''obtiene los datos de una pagina web, ingresada en url
           para enviar datos por post, pasar codificados por data'''
        if headers:
            self.opener.addheaders = headers

        if not (data == None or type(data) == str):
            data = urllib.urlencode(data)

        if method == 'POST':
            # self.last_seen = self.session.post(url, data=data)
            self.last_seen = self.opener.open(url, data)
        elif method == 'GET':
            #self.last_seen = self.session.get(url + '?' + data)
            if data is None:
                self.last_seen = self.opener.open(url)
            else:
                self.last_seen = self.opener.open(url + '?' + data)
        else:
            raise Exception
        return self.last_seen

    def geturl(self):
        return self.last_seen.geturl()

    def save_cookies(self, path):
        '''guarda los cookies en memoria al disco'''
        '''path es el directorio'''
        self.cookie_j.save(path, ignore_discard=True, ignore_expires=True)

    def load_cookies(self, path):
        '''carga cookies del disco a la memoria'''
        '''path es el directorio'''
        self.cookie_j.load(path, ignore_discard=True, ignore_expires=True)

    def print_cookies(self):
        for cookie in self.cookie_j:
            print cookie.name, cookie.value
开发者ID:itolosa,项目名称:transantiago_api,代码行数:61,代码来源:basebrowser.py

示例7: main

# 需要导入模块: from cookielib import MozillaCookieJar [as 别名]
# 或者: from cookielib.MozillaCookieJar import load [as 别名]
def main(*args):

    # Populate our options, -h/--help is already there for you.
    usage = "usage: %prog [options] URL"
    optp = optparse.OptionParser(usage=usage)
    optp.add_option("-d", "--storedir", dest="store_dir",
                     help="the directory to store the certificate/key and \
                     config file",
                     metavar="DIR",
                     default=path.join(homedir, ".shibboleth"))
    optp.add_option('-v', '--verbose', dest='verbose', action='count',
            help="Increase verbosity (specify multiple times for more)")
    # Parse the arguments (defaults to parsing sys.argv).
    opts, args = optp.parse_args()

    # Here would be a good place to check what came in on the command line and
    # call optp.error("Useful message") to exit if all it not well.

    log_level = logging.WARNING  # default
    if opts.verbose == 1:
        log_level = logging.INFO
    elif opts.verbose >= 2:
        log_level = logging.DEBUG

    # Set up basic configuration, out to stderr with a reasonable
    # default format.
    logging.basicConfig(level=log_level)

    if not path.exists(opts.store_dir):
        os.mkdir(opts.store_dir)

    if args:
        sp = args[0]

    # if the cookies file exists load it
    cookies_file = path.join(opts.store_dir, 'cookies.txt')
    cj = MozillaCookieJar(filename=cookies_file)
    if path.exists(cookies_file):
        cj.load()

    logout_urls = []
    for cookie in cj:
        if cookie.name.startswith('_shibsession_') or \
               cookie.name.startswith('_shibstate_'):
            logout_urls.append(
                "https://%s/Shibboleth.sso/Logout" % cookie.domain)

    logout_urls = list(set(logout_urls))

    opener = urllib2.build_opener(HTTPCookieProcessor(cookiejar=cj))
    for url in logout_urls:
        request = urllib2.Request(url)
        log.debug("GET: %s" % request.get_full_url())
        response = opener.open(request)

    cj.save()
开发者ID:grith,项目名称:sibboleth,代码行数:58,代码来源:shiblogout.py

示例8: Get

# 需要导入模块: from cookielib import MozillaCookieJar [as 别名]
# 或者: from cookielib.MozillaCookieJar import load [as 别名]
def Get( url, data = '', refer = 'http://www.pixiv.net/', retry = 3 ):
    global ABS_PATH

    cj = MozillaCookieJar( ABS_PATH + 'pixiv.cookie.txt' )

    try :
        cj.load( ABS_PATH + 'pixiv.cookie.txt' )
    except:
        pass # 还没有cookie只好拉倒咯

    ckproc = urllib2.HTTPCookieProcessor( cj )

    opener = urllib2.build_opener( ckproc )
    opener.addheaders = [
        ('Accept', '*/*'),
        ('Accept-Language', 'zh-CN,zh;q=0.8'),
        ('Accept-Charset', 'UTF-8,*;q=0.5'),
        ('Accept-Encoding', 'gzip,deflate'),
        ('User-Agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31'),
        ('Referer', refer)
    ]

    # 防止海外访问weibo变英文版
    if 'weibo.com' in url:
        opener.addheaders = [('Cookie', 'lang=zh-cn; SUB=Af3TZPWScES9bnItTjr2Ahd5zd6Niw2rzxab0hB4mX3uLwL2MikEk1FZIrAi5RvgAfCWhPyBL4jbuHRggucLT4hUQowTTAZ0ta7TYSBaNttSmZr6c7UIFYgtxRirRyJ6Ww%3D%3D; UV5PAGE=usr512_114; UV5=usrmdins311164')]

    debug('Network: url - ' + url)

    try:
        # 发出请求
        if data != '':
            debug('Network: post')
            debug(data)
            request = urllib2.Request( url = url, data = data )
            res = opener.open( request, timeout = 15 )
            cj.save() # 只有在post时才保存新获得的cookie
        else:
            debug('Network: get')
            res = opener.open( url, timeout = 15 )

        debug('Network: Status Code - ' + str(res.getcode()))

        return GetContent( res )

    except Exception, e:
        # 自动重试,每张图最多3次
        if retry > 0:
            return Get( url, data, refer, retry-1 )
        else:
            log(e, 'Error: unable to get %s' % url)
            return False
开发者ID:Akkariin,项目名称:PixivRss,代码行数:53,代码来源:utility.py

示例9: __init__

# 需要导入模块: from cookielib import MozillaCookieJar [as 别名]
# 或者: from cookielib.MozillaCookieJar import load [as 别名]
class NRK:
    def __init__(self):
        policy = DefaultCookiePolicy(
            rfc2965=True, strict_ns_domain=DefaultCookiePolicy.DomainStrict)
        self.cj = MozillaCookieJar(".cookies", policy)

        try:
            self.cj.load()
        except IOError, e:
            if e.errno != 2:
                raise e
            # else: Ignore "File not found"    
        self.opener = build_opener(HTTPCookieProcessor(self.cj))
        self.init()
        #self.login()
        self.setspeed()
开发者ID:stain,项目名称:scrapbook,代码行数:18,代码来源:nrk.py

示例10: check_kilnauth_token

# 需要导入模块: from cookielib import MozillaCookieJar [as 别名]
# 或者: from cookielib.MozillaCookieJar import load [as 别名]
def check_kilnauth_token(ui, url):
    cookiepath = _get_path('hgcookies')
    if (not os.path.exists(cookiepath)) or (not os.path.isdir(cookiepath)):
        return ''
    cookiepath = os.path.join(cookiepath, md5(get_username(get_dest(ui))).hexdigest())

    try:
        if not os.path.exists(cookiepath):
            return ''
        cj = MozillaCookieJar(cookiepath)
    except IOError:
        return ''

    domain = get_domain(url)

    cj.load(ignore_discard=True, ignore_expires=True)
    for cookie in cj:
        if domain == cookie.domain:
            if cookie.name == 'fbToken':
                return cookie.value
开发者ID:szechyjs,项目名称:dotfiles,代码行数:22,代码来源:kiln.py

示例11: Json_RPC

# 需要导入模块: from cookielib import MozillaCookieJar [as 别名]
# 或者: from cookielib.MozillaCookieJar import load [as 别名]
class Json_RPC(object):
    def __init__(self):
        #self.cookie_jar=CookieJar()
        self.cookie_jar=MozillaCookieJar()
        self.opener=urllib2.build_opener(
                urllib2.HTTPCookieProcessor(self.cookie_jar),
                #urllib2.HTTPHandler(debuglevel=1),
                #urllib2.HTTPSHandler(debuglevel=1),
                )

    def load_cookie(self,filename):
        ''' Load Cookie from file '''
        self.cookie_jar.load(filename,ignore_discard=True)

    def save_cookie(self,filename):
        ''' Save Cookie to file '''
        self.cookie_jar.save(filename,ignore_discard=True)

    def json_rpc(self,url,method="GET",**kwargs):
        '''
        Performs a json rpc to url and return python-native result

        will extract dict or list from result

        Example:
        try{callback({'result':0,'data':[]});}catch(e){}
        will be transcode to
        {"result":0,"data":[]}

        See also: http_rpc

        '''
        ret=self.http_rpc(url,method,**kwargs)
        ret=sub(r'try{(.*)}catch\(.*\){.*};?',r'\1',ret)
        ret=(search(r'{.+}',ret) or search(r'\[.+\]',ret)).group()
        #ret=sub(r"'",r'"',ret)
        ret=loads(ret)
        return ret

    def http_rpc(self,url,method="GET",**kwargs):
        '''
        Perfoms a http rpc to url and return raw result

        url          base url to rpc
        method       'GET' or 'POST'
        query        query string passing by a dict
        data         post data passing by a dict
        file         post files passing by a list of 3-tuple: key, filename, data
                     ( this indicates multipart/form-data )
        
        '''
        kwe=Entity(kwargs)

        if method not in ['GET','POST']:
            raise RPCError("Method not in GET or POST")

        if kwe.query:
            url+="?"+urlencode(kwe.query)

        if method=='GET':
            request=Request(url)
        elif kwe.file:
            content_type,data=multipart_encode(kwe.data,kwe.file)
            request=Request(url,data)
            request.add_header('Content-Type', content_type)
        elif kwe.data:
            data=urlencode(kwe.data)
            request=Request(url,data)
        else:
            raise RPCError("POST with no data")

        request.add_header('User-Agent',
            "Mozilla/5.0 (Ubuntu; X11; Linux x86_64; rv:8.0) Gecko/20100101 Firefox/8.0"
            )
        request.add_header('Accept-Charset',"UTF-8")

        response=self.opener.open(request)
        ret=response.read()
        response.close()

        #print "\033[33m"+str(self.cookie_jar)+"\033[0m"

        # FIXME: An Ugly hack to Tencent server's charset indicator using BOM header
        if ret.startswith('\xef\xbb\xbf'):
            ret=ret[3:]

        return ret
开发者ID:Tydus,项目名称:qqdown,代码行数:89,代码来源:json_rpc.py

示例12: LSession

# 需要导入模块: from cookielib import MozillaCookieJar [as 别名]
# 或者: from cookielib.MozillaCookieJar import load [as 别名]
class LSession():
    def __init__(self,cookiefile = None, proxy = None, timeout = 10, retime = 30,sleept = 3):
        self.timeout=timeout
        self.retime=retime
        self.sleept=sleept
        #proxy '1.234.77.96:80'
        if cookiefile == None:
            self.cookiejar = CookieJar()
        else:
            self.cookiejar = MozillaCookieJar(filename=cookiefile)
            #self.cookiejar =cookielib.LWPCookieJar(filename=cookiefile)
            if not os.path.isfile(cookiefile):
                open(cookiefile, 'w').write(MozillaCookieJar.header)
                #open(cookiefile, 'w').write('#abc\n')
                pass
            self.cookiejar.load(filename=cookiefile,ignore_discard=True)
            #print "ck:",self.cookiejar 
        self.cookie_processor = HTTPCookieProcessor(self.cookiejar)
        self.opener=build_opener(urllib2.HTTPRedirectHandler(),self.cookie_processor)
        if proxy : self.opener.add_handler(ProxyHandler({"http" : proxy}))
        #for posting a file
        try:
            import MultipartPostHandler #for posting a file,need installed
            self.opener.add_handler(MultipartPostHandler.MultipartPostHandler())
        except NameError as e:print e
            
        self.response=None
        self.request=None
        self.header=[]
    def add_header(self,k,v) : self.header.append((k,v))

    def build_request(self,url,params=None):
        self.request=Request(url,params)
        if not self.response is None:self.request.add_header('Referer',self.url())
        #self.request.add_header('User-Agent',
        #                        'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 \
        #                        (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25')
        #NokiaE63/UC Browser7.9.0.102/28/355/UCWEB
        #self.request.add_header('User-Agent','NokiaE63/UC Browser7.9.0.102/28/355/UCWEB')
        self.request.add_header('User-Agent','Opera/9.80 (J2ME/MIDP; Opera Mini/1.0/886; U; en) Presto/2.4.15')
        while  self.header :
            _k,_v = self.header.pop()
            self.request.add_header(_k,_v)
        #Mobile/7B405
        #self.request.add_header('User-Agent','Mobile/7B405')
        return self.request

    def __del__(self) : self.save_cookie()

    def urlopen(self,req):
        retime=self.retime
        while retime > 0:
            try:
                return self.opener.open(req,timeout=self.timeout)
            except Exception as e:
                retime -= 1
                traceback.print_exc(file=sys.stdout)
                print 'Wait and retry...%d'%(self.retime-retime)
                sleep(self.sleept)

    def savefile(self,filename,url):
        self.response=self.urlopen(self.build_request(url))
        CHUNK = 50 * 1024
        with open(filename, 'wb') as fp:
            while True:
                chunk = self.response.read(CHUNK)
                if not chunk: break
                fp.write(chunk)
    def post(self,url,post_data):
        self.response=self.urlopen(self.build_request(url,urlencode(post_data)))
        return  self.response
    def post_raw(self,url,post_data):
        self.response=self.urlopen(self.build_request(url,post_data))
        return  self.response

    def post_file(self,url,params):
        self.response=self.urlopen(self.build_request(url, params))
        return  self.response
    def get(self,url):
        self.response=self.urlopen(self.build_request(url))
        #import urllib
        #print  urllib.urlopen('http://mrozekma.com/302test.php').geturl()
        # import requests
        # r=requests.get(url)
        # print r.content
        return  self.response
    def text(self,dec='gbk',enc='utf') :
        return self.response.read().decode(dec).encode(enc)
    def url(self) : return self.response.url
    def logout(self) : self.cookiejar.clear()
    def Verify_proxy(self) :
        pass
    def show_cookie(self):
        #print self.cookiejar
        for i in self.cookiejar:
            print i
    def save_cookie(self):
        # if  hasattr(self.cookiejar,'save'):#in case non cookiejar
        #     self.cookiejar.save(ignore_discard=True, ignore_expires=False)
        try: 
#.........这里部分代码省略.........
开发者ID:khalidhsu,项目名称:qqmsgbak,代码行数:103,代码来源:lsession.py

示例13: __init__

# 需要导入模块: from cookielib import MozillaCookieJar [as 别名]
# 或者: from cookielib.MozillaCookieJar import load [as 别名]
class SimpleCrawler:

  USER_AGENT = 'SimpleCrawler/0.1'
  HEADERS = {
    'User-Agent': USER_AGENT,
    'Accept-Encoding': 'gzip',
    'Connection': 'keep-alive'
    }
  CONTENT_TYPE_PAT = re.compile(r'([^\s;]+)(.*charset=([^\s;]+))?', re.I)
  
  def __init__(self, starturl, index_html='', maxlevel=1,
               cookie_file=None, acldb=None, urldb=None, default_charset=None,
               delay=0, timeout=300, debug=0):
    (proto, self.hostport, _x, _y, _z) = urlsplit(starturl)
    assert proto == 'http'
    #Thread.__init__(self)
    self.debug = debug
    self.index_html = index_html
    if cookie_file:
      self.cookiejar = MozillaCookieJar(cookie_file)
      self.cookiejar.load()
    else:
      self.cookiejar = None
    self.robotstxt = RobotFileParser()
    self.robotstxt.set_url(urljoin(starturl, '/robots.txt'))
    self.robotstxt.read()
    self.conn = None
    self.urldb = urldb
    self.acldb = acldb
    self.curlevel = 0
    self.delay = delay
    self.timeout = timeout
    self.default_charset = default_charset
    if starturl.endswith('/'):
      starturl += self.index_html
    self.urls = [(starturl, maxlevel)]
    self.crawled = {}                   # 1:injected, 2:crawled
    return

  def accept_url(self, url):
    if url.endswith('/'):
      url += self.index_html
    if self.acldb and not self.acldb.allowed(url):
      return None
    return url
  
  def inject_url(self, url):
    if (not self.curlevel) or (not url) or (url in self.crawled): return False
    if not self.robotstxt.can_fetch(self.USER_AGENT, url):
      if self.debug:
        print >>stderr, 'DISALLOW: %r' % url
      return None
    if self.debug:
      print >>stderr, 'INJECT: %r' % url
    self.crawled[url] = 1
    self.urls.append((url, self.curlevel-1))
    return True

  def get1(self, url, maxretry=3, maxredirect=3):
    if self.debug:
      print >>stderr, 'GET: %r' % url
    # loop
    for rtry in range(maxredirect):
      # forge urllib2.Request object.
      req = Request(url)
      # add cookie headers if necessary.
      if self.cookiejar:
        self.cookiejar.add_cookie_header(req)
        headers = req.unredirected_hdrs
        headers.update(self.HEADERS)
      else:
        headers = self.HEADERS
      # get response.
      for ctry in range(maxretry):
        try:
          if not self.conn:
            print >>stderr, 'Making connection: %r...' % (self.hostport,)
            self.conn = HTTPConnection(self.hostport)
          self.conn.request('GET', req.get_selector().replace(' ',''), '', headers)
	  self.conn.sock.settimeout(self.timeout)
          resp = self.conn.getresponse()
          break
        except BadStatusLine, x:
          # connection closed unexpectedly
          print >>stderr, 'Connection closed unexpectedly.'
          # it restarts the connection...
          self.conn.close()
          self.conn = None
        except socket.error, x:
          # connection closed unexpectedly
          print >>stderr, 'Socket error:', x
          self.conn.close()
          self.conn = None
      else:
开发者ID:dreamfrog,项目名称:jophiel,代码行数:96,代码来源:textcrawler.py

示例14: GetDayData

# 需要导入模块: from cookielib import MozillaCookieJar [as 别名]
# 或者: from cookielib.MozillaCookieJar import load [as 别名]
class BasisRetr:
	"""The main entry points, once a BasisRetr object has been created, are: 1) GetDayData()-- download metrics, activity, sleep data for a single day from the basis website and save it, 2) GetActivityCsvForMonth()-- download activity summaries for an entire month, and 3) GetSleepCsvForMonth()--download sleep summaries for an entire month."""
	LOGIN_URL = 'https://app.mybasis.com/login'
	UID_URL = 'https://app.mybasis.com/api/v1/user/me.json'
	METRICS_URL = 'https://app.mybasis.com/api/v1/chart/{userid}.json?interval=60&units=s&start_date={date}&start_offset=0&end_offset=0&summary=true&bodystates=true&heartrate=true&steps=true&calories=true&gsr=true&skin_temp=true&air_temp=true'
	ACTIVITIES_URL ='https://app.mybasis.com/api/v2/users/me/days/{date}/activities?expand=activities&type=run,walk,bike,sleep'
	SLEEP_URL = 'https://app.mybasis.com/api/v2/users/me/days/{date}/activities?expand=activities&type=sleep'
	SLEEP_EVENTS_URL = 'https://app.mybasis.com/api/v2/users/me/days/{date}/activities?type=sleep&event.type=toss_and_turn&expand=activities.stages,activities.events'

	DATE_FORMAT = "%04d-%02d-%02d"
	
	# save-to filename.  date is prefix, format is suffix
	MO_ACTIVITY_FNAME_TEMPLATE = "{yr:04d}-{mo:02d}_basis_activities_summary.csv"
	MO_SLEEP_FNAME_TEMPLATE = "{yr:04d}-{mo:02d}_basis_sleep_summary.csv"
	# day sleep and activity filenames (for month summaries)
	DAY_ACTIVITY_FNAME_TEMPLATE = "{yr:04d}-{mo:02d}-{dy:02d}_basis_activities.json"
	DAY_SLEEP_FNAME_TEMPLATE = "{yr:04d}-{mo:02d}-{dy:02d}_basis_sleep.json"
	DAY_JSON_FNAME_TEMPLATE = "{date}_basis_{typ}.json"
	METRICS_FNAME_TEMPLATE = "{date}_basis_metrics.{ext}"
	SLEEP_FNAME_TEMPLATE= "{date}_basis_sleep.{format}"
	
	def __init__(self, loadconfig = None):
		# create config info
		self.cfg = Config(cfg_items = CFG_ITEMS)
		if loadconfig:
			self.cfg.Load()
		else:
			# if config file doesn't exist, save the defaults loaded above
			self.cfg.Save() #saves 
		# url opener for website retrieves
		opener = urllib2.build_opener()
		self.cj = MozillaCookieJar(self.cfg.cookie_filename)#BasisRetr.COOKIE_FILENAME)
		self.session_cookie = None
		if os.path.exists(self.cfg.cookie_filename):#BasisRetr.COOKIE_FILENAME):
			self.cj.load()
			self.CheckSessionCookie() # set session cookie if it exists and hasn't expired
		# need to use build_opener to submit cookies and post form data
		self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))

	def GetDayData(self, yr, mo, day, typ, save_csv, override_cache = False, act_metr= True):
		"""Main entry method for getting a day's worth of data, formatting, then saving it.  typ is the type of data: metrics, activities, or sleep.  Data is always saved in json format, but if save_csv is True, save to csv as well as json. override_cache ignores any already downloaded json.  act_metr, if True, saves sleep and activity state along with metrics."""
		date = BasisRetr.DATE_FORMAT % (yr, mo, day)
		ydate = self.GetYesterdayDateAsString(yr, mo, day)
		
		self.Status("Checking Login")
		self.CheckLogin() # ensure we're logged in
		self.Status("getting {} for {}".format(typ,date))
		# figure out which data to get
		data = None
		# filename 
		cfname = "{date}_basis_{typ}.csv".format(date=date, typ=typ)
				
		# if needed, download json data from website and save to file
		if typ == 'metrics':
			mjdata = self.RetrieveJsonOrCached(date, 'metrics', override_cache)
			### MOVE THIS ERROR CHECKING INTO THE ABOVE METHOD
			if type(mjdata) == str or mjdata == None: # simple error checking
				self.Status('OnGetDayData: Metrics json conversion failed.')
				print mjdata[:500]
				return
			# also load up actities
		if typ == 'activities' or act_metr:
			ajdata = self.RetrieveJsonOrCached(date, 'activities', override_cache)
			if type(ajdata) == str or ajdata == None: # simple error checking
				self.Status('OnGetDayData: Activities json conversion failed.')
				print ajdata[:500]
				return
		if typ == 'sleep' or act_metr:
			sjdata = self.RetrieveJsonOrCached(date, 'sleep', override_cache)
			if type(sjdata) == str or sjdata == None: # simple error checking
				self.Status('OnGetDayData: Sleep json conversion failed.')
				print sjdata[:500]
				return
			if act_metr: # add yesterday's sleep data
				sjdata2= self.RetrieveJsonOrCached(ydate, 'sleep')
		
		# Next, turn the list of python objects into a csv file.
		# If asked to (via act_metr), collect sleep and activity type, then add them to each timestamp.
		cdata = None
		if save_csv:
			if typ == 'activities' or act_metr:
				act_list = self.JsonActivitiesToList(ajdata)
				cdata = self.CreateCSVFromList(self.cfg.csv_activity_colnames, act_list)
			if typ == 'sleep' or act_metr:
				sleep_evts_list = self.JsonSleepEventsToList(sjdata)
				cdata = self.CreateCSVFromList(self.cfg.csv_sleep_evt_colnames, sleep_evts_list)
				if act_metr:
					# prepend yesterday's sleep events as they may start before midnight.
					sleep_evts_list[:0] = self.JsonSleepEventsToList(sjdata2)
			if typ == 'metrics':
				metrics_list = self.JsonMetricsToList(mjdata)
				if act_metr: # add activities to metrics               
					self.AddActivityTypeToMetrics(metrics_list, act_list, sleep_evts_list)
					header = self.cfg.csv_metrics_colnames + self.cfg.csv_activity_type_colnames
				else:
					header = self.cfg.csv_metrics_colnames
				cdata = self.CreateCSVFromList(header, metrics_list)
		
		# If we were able to make a csv file, save it.
		if cdata:
#.........这里部分代码省略.........
开发者ID:jshrader,项目名称:student_sleep,代码行数:103,代码来源:basis_retr.py

示例15: AOJClient

# 需要导入模块: from cookielib import MozillaCookieJar [as 别名]
# 或者: from cookielib.MozillaCookieJar import load [as 别名]
class AOJClient(object):
    def __init__(self, cookie_file_path='aoj-cookie.txt'):
        self.cookie_file_path = cookie_file_path
        self.cookiejar = MozillaCookieJar()
        if os.path.isfile(cookie_file_path):
            self.cookiejar.load(cookie_file_path)

        self.opener = urllib2.build_opener(
                urllib2.HTTPRedirectHandler(),
                urllib2.HTTPHandler(),
                urllib2.HTTPSHandler(),
                urllib2.HTTPCookieProcessor(self.cookiejar))

    def get_csrf_token(self, url):
        request = urllib2.Request(url=url)
        response = self.opener.open(request)
        data = response.read()
        return REGEXP_CSRF.findall(data)[0]

    def refresh_session(self):
        print 'Not Logged In!'
        context = {'csrfmiddlewaretoken': self.get_csrf_token(LOGIN_URL),
                   'username': raw_input('Username: '), 'password': getpass.getpass('Password: ')}

        request = urllib2.Request(url=SITE_PREFIX+'accounts/login/',
                                  data=urllib.urlencode(context))
        self.opener.open(request)
        self.cookiejar.save(self.cookie_file_path)

    def check_problem_exist(self, problem_name):
        try:
            request = urllib2.Request(url=PROB_PREFIX+'read/'+problem_name)
            response = self.opener.open(request)
        except urllib2.HTTPError as err:
            if err.code == 404: # Not Found
                raise AOJProblemNotExist
            else:
                raise

    def detect_language(self, source_file):
        if '.' in source_file:
            selected_language = source_file[source_file.rfind('.')+1:]
        else:
            selected_language = ''
        
        while selected_language not in LANGUAGES:
            selected_language = raw_input('Please select your langauge: (' + '/'.join(LANGUAGES) + ') ? ').strip().lower()

        return selected_language

    def submit(self, submission):
        self.check_problem_exist(submission.problem)
        context = {}
        context['language'] = self.detect_language(submission.source)
        context['csrfmiddlewaretoken'] = self.get_csrf_token(url=PROB_PREFIX+'submit/'+submission.problem)

        try:
            with open(submission.source) as f:
                context['source'] = f.read()
        except IOError:
            raise AOJFileNotExist()

        def try_submit(first=True):
            if not first:
                self.refresh_session()
            request = urllib2.Request(url=PROB_PREFIX+'submit/'+submission.problem,
                                  data=urllib.urlencode(context))
            response = self.opener.open(request)

            if not response.geturl().lower().startswith(LOGIN_URL):
                print 'Submission Complete!'
                return
            try_submit(first=False)
        try_submit()

    def get_submission_list(self, problem_name):
        self.check_problem_exist(problem_name)
        request = urllib2.Request(url=SITE_PREFIX+'judge/submission/recent/?problem='+problem_name)
        response = self.opener.open(request)

        try:
            import lxml.html
        except ImportError:
            print 'lxml library is needed for parsing HTML'
            return

        html = lxml.html.fromstring(unicode(response.read().decode('utf8')))
        context = {}
        fields = ('id', 'problem', 'user', 'language', 'length', 'state', 'stats', 'submitted_on')
        length = {'id': 9, 'problem': 15, 'user': 15, 'language': 5, 'length': 7, 'state': 15, 'stats': 7, 'submitted_on': 15}
        template = u'%(id)s %(problem)s %(user)s %(language)s %(length)s %(state)s %(stats)s %(submitted_on)s'

        def width(string):
            return sum(1+(unicodedata.east_asian_width(c) in 'WF') for c in string)

        for tr in html.cssselect('table.submission_list tr'):
            for field in fields:
                element = tr.find_class(field)
                if element:
                    context[field] = unicode(element[0].text_content().strip())
#.........这里部分代码省略.........
开发者ID:wurikiji,项目名称:algospot,代码行数:103,代码来源:submit.py


注:本文中的cookielib.MozillaCookieJar.load方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。