本文整理汇总了Python中http.cookiejar.MozillaCookieJar.load方法的典型用法代码示例。如果您正苦于以下问题:Python MozillaCookieJar.load方法的具体用法?Python MozillaCookieJar.load怎么用?Python MozillaCookieJar.load使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类http.cookiejar.MozillaCookieJar
的用法示例。
在下文中一共展示了MozillaCookieJar.load方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from http.cookiejar import MozillaCookieJar [as 别名]
# 或者: from http.cookiejar.MozillaCookieJar import load [as 别名]
def __init__(self, mobile, password=None, status='0',
cachefile='Fetion.cache', cookiesfile=''):
'''登录状态:
在线:400 隐身:0 忙碌:600 离开:100
'''
if cachefile:
self.cache = Cache(cachefile)
if not cookiesfile:
cookiesfile = '%s.cookies' % mobile
cookiejar = MozillaCookieJar(filename=cookiesfile)
if not os.path.isfile(cookiesfile):
open(cookiesfile, 'w').write(MozillaCookieJar.header)
cookiejar.load(filename=cookiesfile)
cookie_processor = HTTPCookieProcessor(cookiejar)
self.opener = build_opener(cookie_processor,
HTTPHandler)
self.mobile, self.password = mobile, password
if not self.alive():
self._login()
cookiejar.save()
self.changestatus(status)
示例2: have_cookie_login
# 需要导入模块: from http.cookiejar import MozillaCookieJar [as 别名]
# 或者: from http.cookiejar.MozillaCookieJar import load [as 别名]
def have_cookie_login(self):
print('Test cookies...')
cookie = MozillaCookieJar()
cookie.load(self.cookiesFile, ignore_discard=True, ignore_expires=True)
self.build_opener(cookie, self.use_proxy)
page = self.get_page_data(self.userSetUrl)
if not search('page-setting-user', page):
print('This cookies has been invalid.')
remove(self.cookiesFile)
self.have_not_cookie_login()
示例3: ScholarQuerier
# 需要导入模块: from http.cookiejar import MozillaCookieJar [as 别名]
# 或者: from http.cookiejar.MozillaCookieJar import load [as 别名]
class ScholarQuerier(object):
"""
ScholarQuerier instances can conduct a search on Google Scholar
with subsequent parsing of the resulting HTML content. The
articles found are collected in the articles member, a list of
ScholarArticle instances.
"""
# Default URLs for visiting and submitting Settings pane, as of 3/14
GET_SETTINGS_URL = ScholarConf.SCHOLAR_SITE + '/scholar_settings?' \
+ 'sciifh=1&hl=en&as_sdt=0,5'
SET_SETTINGS_URL = ScholarConf.SCHOLAR_SITE + '/scholar_setprefs?' \
+ 'q=' \
+ '&scisig=%(scisig)s' \
+ '&inststart=0' \
+ '&as_sdt=1,5' \
+ '&as_sdtp=' \
+ '&num=%(num)s' \
+ '&scis=%(scis)s' \
+ '%(scisf)s' \
+ '&hl=en&lang=all&instq=&inst=569367360547434339&save='
# Older URLs:
# ScholarConf.SCHOLAR_SITE + '/scholar?q=%s&hl=en&btnG=Search&as_sdt=2001&as_sdtp=on
class Parser(ScholarArticleParser120726):
def __init__(self, querier):
ScholarArticleParser120726.__init__(self)
self.querier = querier
def handle_article(self, art):
self.querier.add_article(art)
def __init__(self):
self.articles = []
self.query = None
self.cjar = MozillaCookieJar()
# If we have a cookie file, load it:
if ScholarConf.COOKIE_JAR_FILE and \
os.path.exists(ScholarConf.COOKIE_JAR_FILE):
try:
self.cjar.load(ScholarConf.COOKIE_JAR_FILE,
ignore_discard=True)
ScholarUtils.log('info', 'loaded cookies file')
except Exception,msg:
ScholarUtils.log('warn', 'could not load cookies file: %s' % msg)
self.cjar = MozillaCookieJar() # Just to be safe
self.opener = build_opener(HTTPCookieProcessor(self.cjar))
self.settings = None # Last settings object, if any
示例4: Session
# 需要导入模块: from http.cookiejar import MozillaCookieJar [as 别名]
# 或者: from http.cookiejar.MozillaCookieJar import load [as 别名]
class Session(object):
def __init__(self, app_name, app_version, data_path, **kwargs):
self.app_name = app_name
self.app_version = app_version
if not data_path or not os.path.isdir(data_path):
raise Exception('invalid data_path: %s' % data_path)
self.cookie_jar = MozillaCookieJar(os.path.join(data_path, default.COOKIES_FILENAME))
try:
self.cookie_jar.load()
except EnvironmentError:
pass
self.opener = build_opener(
HTTPRedirectHandler(),
HTTPCookieProcessor(self.cookie_jar))
super(Session, self).__init__(**kwargs)
def open(self, request, default_charset=None):
request.add_header('User-Agent', util.user_agent(self.app_name, self.app_version))
system_string = json.dumps(util.system_info(self.app_name, self.app_version))
request.add_header('X-Sputnik-System', system_string)
r = self.opener.open(request)
if hasattr(r.headers, 'get_content_charset'): # py3
charset = r.headers.get_content_charset() or default_charset
elif hasattr(r.headers, 'getparam'): # py2
charset = r.headers.getparam('charset') or default_charset
else:
charset = default_charset
if charset is None:
return r
return codecs.getreader(charset)(r)
def __del__(self):
if hasattr(self, 'cookie_jar'):
self.cookie_jar.save()
示例5: Session
# 需要导入模块: from http.cookiejar import MozillaCookieJar [as 别名]
# 或者: from http.cookiejar.MozillaCookieJar import load [as 别名]
class Session(Base):
def __init__(self, data_path, **kwargs):
if not validation.is_data_path(data_path):
raise Exception('invalid data_path: %s' % data_path)
self.cookie_jar = MozillaCookieJar(os.path.join(data_path, default.COOKIES_FILENAME))
try:
self.cookie_jar.load()
except EnvironmentError:
pass
self.opener = build_opener(
HTTPRedirectHandler(),
HTTPCookieProcessor(self.cookie_jar))
super(Session, self).__init__(**kwargs)
def open(self, request, default_charset=None):
request.add_header('User-Agent', self.s.user_agent())
if self.s.name:
request.add_header('X-Sputnik-Name', self.s.name)
if self.s.version:
request.add_header('X-Sputnik-Version', self.s.version)
r = self.opener.open(request)
if hasattr(r.headers, 'get_content_charset'): # py3
charset = r.headers.get_content_charset() or default_charset
elif hasattr(r.headers, 'getparam'): # py2
charset = r.headers.getparam('charset') or default_charset
else:
charset = default_charset
if charset is None:
return r
return codecs.getreader(charset)(r)
def __del__(self):
if hasattr(self, 'cookie_jar'):
self.cookie_jar.save()
示例6: ScholarQuerier
# 需要导入模块: from http.cookiejar import MozillaCookieJar [as 别名]
# 或者: from http.cookiejar.MozillaCookieJar import load [as 别名]
class ScholarQuerier(object):
"""
ScholarQuerier instances can conduct a search on Google Scholar
with subsequent parsing of the resulting HTML content. The
articles found are collected in the articles member, a list of
ScholarArticle instances.
"""
# Default URLs for visiting and submitting Settings pane, as of 3/14
GET_SETTINGS_URL = ScholarConf.SCHOLAR_SITE + '/scholar_settings?' \
+ 'sciifh=1&hl=en&as_sdt=0,5'
SET_SETTINGS_URL = ScholarConf.SCHOLAR_SITE + '/scholar_setprefs?' \
+ 'q=' \
+ '&scisig=%(scisig)s' \
+ '&inststart=0' \
+ '&as_sdt=1,5' \
+ '&as_sdtp=' \
+ '&num=%(num)s' \
+ '&scis=%(scis)s' \
+ '%(scisf)s' \
+ '&hl=en&lang=all&instq=&inst=569367360547434339&save='
# Older URLs:
# ScholarConf.SCHOLAR_SITE + '/scholar?q=%s&hl=en&btnG=Search&as_sdt=2001&as_sdtp=on
class Parser(ScholarArticleParser120726):
def __init__(self, querier):
ScholarArticleParser120726.__init__(self)
self.querier = querier
def handle_article(self, art):
self.querier.add_article(art)
def __init__(self):
self.articles = []
self.query = None
self.cjar = MozillaCookieJar()
# If we have a cookie file, load it:
if ScholarConf.COOKIE_JAR_FILE and \
os.path.exists(ScholarConf.COOKIE_JAR_FILE):
try:
self.cjar.load(ScholarConf.COOKIE_JAR_FILE,
ignore_discard=True)
ScholarUtils.log('info', 'loaded cookies file')
except Exception as msg:
ScholarUtils.log('warn', 'could not load cookies file: %s' % msg)
self.cjar = MozillaCookieJar() # Just to be safe
self.opener = build_opener(HTTPCookieProcessor(self.cjar))
self.settings = None # Last settings object, if any
def apply_settings(self, settings):
"""
Applies settings as provided by a ScholarSettings instance.
"""
if settings is None or not settings.is_configured():
return True
self.settings = settings
# This is a bit of work. We need to actually retrieve the
# contents of the Settings pane HTML in order to extract
# hidden fields before we can compose the query for updating
# the settings.
html = self._get_http_response(url=self.GET_SETTINGS_URL,
log_msg='dump of settings form HTML',
err_msg='requesting settings failed')
if html is None:
return False
# Now parse the required stuff out of the form. We require the
# "scisig" token to make the upload of our settings acceptable
# to Google.
soup = BeautifulSoup(html)
tag = soup.find(name='form', attrs={'id': 'gs_settings_form'})
if tag is None:
ScholarUtils.log('info', 'parsing settings failed: no form')
return False
tag = tag.find('input', attrs={'type':'hidden', 'name':'scisig'})
if tag is None:
ScholarUtils.log('info', 'parsing settings failed: scisig')
return False
urlargs = {'scisig': tag['value'],
'num': settings.per_page_results,
'scis': 'no',
'scisf': ''}
if settings.citform != 0:
urlargs['scis'] = 'yes'
urlargs['scisf'] = '&scisf=%d' % settings.citform
html = self._get_http_response(url=self.SET_SETTINGS_URL % urlargs,
log_msg='dump of settings result HTML',
err_msg='applying setttings failed')
#.........这里部分代码省略.........
示例7: MozillaCookieJar
# 需要导入模块: from http.cookiejar import MozillaCookieJar [as 别名]
# 或者: from http.cookiejar.MozillaCookieJar import load [as 别名]
"http://security.stackexchange.com/users/8857/b-con",
"http://stackoverflow.com/users/1361836/b-con"
]
logging.basicConfig(filename="/tmp/site-ping.log",
datefmt="%m-%d %H:%M",
level=logging.DEBUG)
# Extract the cookies from Firefox. The script to do so is co-located.
path = os.path.dirname(os.path.realpath(__file__))
p = subprocess.call(path + "/extract-cookies.sh")
# Load the cookies.
cj = MozillaCookieJar("/tmp/firefox-cookies.txt")
try:
cj.load()
except FileNotFoundErr as ex:
logging.error(ex)
quit(1)
# Use the cookies to visit each of the URLs.
for url in urls:
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
response = opener.open(url)
html = response.read().decode("utf-8")
response.close()
# The "votes" tab only appears on the user profile when you're logged in.
match = re.search("tab=votes", html)
if match:
示例8: __init__
# 需要导入模块: from http.cookiejar import MozillaCookieJar [as 别名]
# 或者: from http.cookiejar.MozillaCookieJar import load [as 别名]
class PowerSchool:
"""This class manages cookies for accessing PowerSchool, as well as
providing facilities for retrieving pages."""
def __init__(self,host=DEFAULT_HOST,cookiejar=None,debug=False):
"""Params:
host: the protocol, hostname, and port (without a trailing slash)
that is the root of the PowerSchool url.
cookiejar: An http.cookiejar.CookieJar or subclass. If a FileCookieJar,
cookies will be saved after every request.
debug: sets verbose mode"""
self.DEBUG = debug
self.host = host
self.setCookieJar(cookiejar)
def setCookieJar(self,cookiejar):
"""Changes the CookieJar used to manage the session.
Existing cookies will not be transferred.
Returns: the old CookieJar"""
tmpcookies = getattr(self,"cookies",None)
if type(cookiejar) == str:
self.cookies = MozillaCookieJar(cookiejar)
if os.path.exists(cookiejar):
self.cookies.load(ignore_discard=True)
else:
self.cookies = cookiejar
self.opener = build_opener(HTTPCookieProcessor(self.cookies))
return tmpcookies
def _get_page(self,url,data=None):
start = time.time()
page = (self.opener.open(url,urlencode(data).encode()) if data else
self.opener.open(url))
if self.DEBUG:
print("Request time: {}".format(time.time()-start))
if hasattr(self.cookies,"save"):
self.cookies.save(ignore_discard=True)
return page
def _read_page(self,url,data=None):
self.__last_page = self._get_page(url,data).read().decode()
if self.DEBUG:
fd = open("/tmp/pschool-debug-temp.html","w")
fd.write(self.__last_page)
fd.close()
return self.__last_page
def _get_url(self,url):
return self.host + (url if url.startswith("/") else "/"+url)
def _check_for_logout(self):
if self.__last_page.find("Student and Parent Sign In") > -1:
raise LoggedOut()
def login(self,username,password):
"""Login to a PowerSchool session using the supplied credentials."""
data = self._read_page(self._get_url("/public/"))
form = dict(re.findall(r'<input .+?name="(.+?)".+?value="(.*?)".+?>',
data, re.MULTILINE|re.IGNORECASE))
form["account"] = username
form["ldappassword"] = password
pskey = form["contextData"].encode()
password = password.encode()
b64pw = b64encode(md5(password).digest()).decode().rstrip("=")
form["pw"] = hmac.new(pskey,b64pw.encode()).hexdigest()
form["dbpw"] = hmac.new(pskey,password.lower()).hexdigest()
self._read_page(self._get_url("/guardian/home.html"),form)
try:
self._check_for_logout()
except LoggedOut:
raise InvalidCredentials
def get(self,page="Main",args=(),**kwargs):
"""Retrieves data for and constructs the supplied Page class."""
if type(page) == str:
page = getattr(pages,page,None)
if not page:
raise TypeError("Invalid page")
data = self._read_page(self._get_url(page.get_url(*args,**kwargs)))
self._check_for_logout()
return page(data,self,(args,kwargs))
示例9: ScholarQuerier
# 需要导入模块: from http.cookiejar import MozillaCookieJar [as 别名]
# 或者: from http.cookiejar.MozillaCookieJar import load [as 别名]
class ScholarQuerier(object):
"""
ScholarQuerier instances can conduct a search on Google Scholar
with subsequent parsing of the resulting HTML content. The
articles found are collected in the articles member, a list of
ScholarArticle instances.
"""
SCHOLAR_QUERY_URL = ScholarConf.SCHOLAR_SITE + '/scholar?' \
+ 'as_q=%(words)s' \
+ '&as_epq=%(phrase)s' \
+ '&as_oq=%(words_some)s' \
+ '&as_eq=%(words_none)s' \
+ '&as_occt=%(scope)s' \
+ '&as_sauthors=%(authors)s' \
+ '&as_publication=%(pub)s' \
+ '&as_ylo=%(ylo)s' \
+ '&as_yhi=%(yhi)s' \
+ '&btnG=&hl=en&as_sdt=0,5&num=%(num)s'
# Default URLs for visiting and submitting Settings pane, as of 3/14
GET_SETTINGS_URL = ScholarConf.SCHOLAR_SITE + '/scholar_settings?' \
+ 'sciifh=1&hl=en&as_sdt=0,5'
SET_SETTINGS_URL = ScholarConf.SCHOLAR_SITE + '/scholar_setprefs?' \
+ 'q=' \
+ '&scisig=%(scisig)s' \
+ '&inststart=0' \
+ '&as_sdt=1,5' \
+ '&as_sdtp=' \
+ '&num=%(num)s' \
+ '&scis=%(scis)s' \
+ '%(scisf)s' \
+ '&hl=en&lang=all&instq=&inst=569367360547434339&save='
# Older URLs:
# ScholarConf.SCHOLAR_SITE + '/scholar?q=%s&hl=en&btnG=Search&as_sdt=2001&as_sdtp=on
class Parser(ScholarArticleParser120726):
def __init__(self, querier):
ScholarArticleParser120726.__init__(self)
self.querier = querier
def handle_article(self, art):
self.querier.add_article(art)
def __init__(self):
self.articles = []
self.query = None
self.cjar = MozillaCookieJar()
# If we have a cookie file, load it:
if ScholarConf.COOKIE_JAR_FILE and \
os.path.exists(ScholarConf.COOKIE_JAR_FILE):
try:
self.cjar.load(ScholarConf.COOKIE_JAR_FILE,
ignore_discard=True)
ScholarUtils.log('debug', 'loaded cookies file')
except Exception as msg:
ScholarUtils.log('warn', 'could not load cookies file: %s' % msg)
self.cjar = MozillaCookieJar() # Just to be safe
self.opener = build_opener(HTTPCookieProcessor(self.cjar))
self.settings = None # Last settings object, if any
def apply_settings(self, settings):
"""
Applies settings as provided by a ScholarSettings instance.
"""
if settings is None or not settings.is_configured():
return True
self.settings = settings
# This is a bit of work. We need to actually retrieve the
# contents of the Settings pane HTML in order to extract
# hidden fields before we can compose the query for updating
# the settings.
try:
req = Request(url=self.GET_SETTINGS_URL,
headers={'User-Agent': ScholarConf.USER_AGENT})
hdl = self.opener.open(req)
html = hdl.read()
except Exception as err:
ScholarUtils.log('debug', 'requesting settings failed: %s' % err)
return False
# Now parse the required stuff out of the form. We require the
# "scisig" token to make the upload of our settings acceptable
# to Google.
soup = BeautifulSoup(html)
tag = soup.find(name='form', attrs={'id': 'gs_settings_form'})
if tag is None:
ScholarUtils.log('debug', 'parsing settings failed: no form')
return False
tag = tag.find('input', attrs={'type':'hidden', 'name':'scisig'})
if tag is None:
ScholarUtils.log('debug', 'parsing settings failed: scisig')
#.........这里部分代码省略.........
示例10: BaseClient
# 需要导入模块: from http.cookiejar import MozillaCookieJar [as 别名]
# 或者: from http.cookiejar.MozillaCookieJar import load [as 别名]
class BaseClient(object):
"""
Базовый класс для работы с удалённым API
"""
username = None
password = None
url = 'http://localhost:8000/api/'
headers = {
"Content-type": "application/json",
"Accept": "application/json",
"Accept-Encoding": "gzip, deflate",
}
timeout = 10000
cookiejar = None
print_info = False
code_page = 'utf-8'
use_basic_auth = False
def __init__(self, cookie_filename=None, **kwargs):
for key, val in kwargs.items():
setattr(self, key, val)
if cookie_filename:
self.set_cookiejar(cookie_filename)
def set_cookiejar(self, name):
self.cookiejar = MozillaCookieJar(name)
try:
self.cookiejar.load()
except IOError:
self.cookiejar.save()
def get_request(self, data):
"""
Возвращает новый объект запроса.
"""
params = urlencode({'jsonData': data})
params = params.encode('ascii')
headers = {}
headers.update(self.headers)
if self.use_basic_auth and self.username and self.password:
s = '%s:%s' % (self.username, self.password)
if six.PY3:
b = bytes(s, 'utf-8')
else:
b = bytes(s.encode('utf-8'))
headers['Authorization'] = b'Basic ' + base64.b64encode(b)
request = Request(url=self.url, data=params, headers=headers)
return request
def get_opener(self):
"""
Возвращает новый обработчик запроса с необходимыми процессорами.
"""
args = ()
if not self.cookiejar is None:
cookiehand = HTTPCookieProcessor(self.cookiejar)
args += (cookiehand,)
return build_opener(*args)
def get_response(self, request):
"""
Возвращает новый обработчик запроса и устанавливает куки.
"""
opener = self.get_opener()
try:
response = opener.open(request, timeout=self.timeout)
except IOError as e:
raise e
if not self.cookiejar is None:
self.cookiejar.save()
return response
def get_result(self, data):
"""
Запрашивает данные из API
"""
if self.print_info:
print('Kwargs: %s' % data.get('kwargs', {}))
jsondata = json.dumps(data)
request = self.get_request(jsondata)
#.........这里部分代码省略.........
示例11: ScholarQuerier
# 需要导入模块: from http.cookiejar import MozillaCookieJar [as 别名]
# 或者: from http.cookiejar.MozillaCookieJar import load [as 别名]
class ScholarQuerier(object):
"""
ScholarQuerier instances can conduct a search on Google Scholar
with subsequent parsing of the resulting HTML content. The
articles found are collected in the articles member, a list of
ScholarArticle instances.
"""
# Default URLs for visiting and submitting Settings pane, as of 3/14
GET_SETTINGS_URL = ScholarConf.SCHOLAR_SITE + "/scholar_settings?" + "sciifh=1&hl=en&as_sdt=0,5"
SET_SETTINGS_URL = (
ScholarConf.SCHOLAR_SITE
+ "/scholar_setprefs?"
+ "q="
+ "&scisig=%(scisig)s"
+ "&inststart=0"
+ "&as_sdt=1,5"
+ "&as_sdtp="
+ "&num=%(num)s"
+ "&scis=%(scis)s"
+ "%(scisf)s"
+ "&hl=en&lang=all&instq=&inst=569367360547434339&save="
)
# Older URLs:
# ScholarConf.SCHOLAR_SITE + '/scholar?q=%s&hl=en&btnG=Search&as_sdt=2001&as_sdtp=on
class Parser(ScholarArticleParser120726):
def __init__(self, querier):
ScholarArticleParser120726.__init__(self)
self.querier = querier
def handle_num_results(self, num_results):
if self.querier is not None and self.querier.query is not None:
self.querier.query["num_results"] = num_results
def handle_article(self, art):
self.querier.add_article(art)
def __init__(self):
self.articles = []
self.query = None
self.cjar = MozillaCookieJar()
# If we have a cookie file, load it:
if ScholarConf.COOKIE_JAR_FILE and os.path.exists(ScholarConf.COOKIE_JAR_FILE):
try:
self.cjar.load(ScholarConf.COOKIE_JAR_FILE, ignore_discard=True)
ScholarUtils.log("info", "loaded cookies file")
except Exception as msg:
ScholarUtils.log("warn", "could not load cookies file: %s" % msg)
self.cjar = MozillaCookieJar() # Just to be safe
self.opener = build_opener(HTTPCookieProcessor(self.cjar))
self.settings = None # Last settings object, if any
def apply_settings(self, settings):
"""
Applies settings as provided by a ScholarSettings instance.
"""
if settings is None or not settings.is_configured():
return True
self.settings = settings
# This is a bit of work. We need to actually retrieve the
# contents of the Settings pane HTML in order to extract
# hidden fields before we can compose the query for updating
# the settings.
html = self._get_http_response(
url=self.GET_SETTINGS_URL, log_msg="dump of settings form HTML", err_msg="requesting settings failed"
)
if html is None:
return False
# Now parse the required stuff out of the form. We require the
# "scisig" token to make the upload of our settings acceptable
# to Google.
soup = BeautifulSoup(html)
tag = soup.find(name="form", attrs={"id": "gs_settings_form"})
if tag is None:
ScholarUtils.log("info", "parsing settings failed: no form")
return False
tag = tag.find("input", attrs={"type": "hidden", "name": "scisig"})
if tag is None:
ScholarUtils.log("info", "parsing settings failed: scisig")
return False
urlargs = {"scisig": tag["value"], "num": settings.per_page_results, "scis": "no", "scisf": ""}
if settings.citform != 0:
urlargs["scis"] = "yes"
urlargs["scisf"] = "&scisf=%d" % settings.citform
html = self._get_http_response(
url=self.SET_SETTINGS_URL % urlargs,
#.........这里部分代码省略.........
示例12: ScholarQuerier
# 需要导入模块: from http.cookiejar import MozillaCookieJar [as 别名]
# 或者: from http.cookiejar.MozillaCookieJar import load [as 别名]
class ScholarQuerier(object):
"""
ScholarQuerier instances can conduct a search on Google Scholar
with subsequent parsing of the resulting HTML content. The
articles found are collected in the articles member, a list of
ScholarArticle instances.
"""
class Parser(ScholarArticleParser):
def __init__(self, querier):
ScholarArticleParser.__init__(self)
self.querier = querier
def handle_article(self, art):
self.querier.add_article(art)
def __init__(self):
self.articles = []
self.query = None
self.cjar = MozillaCookieJar()
# If we have a cookie file, load it:
if ScholarConf.COOKIE_JAR_FILE and \
os.path.exists(ScholarConf.COOKIE_JAR_FILE):
try:
self.cjar.load(ScholarConf.COOKIE_JAR_FILE, ignore_discard=True)
ScholarUtils.log('info', 'loaded cookies file')
except Exception as msg:
ScholarUtils.log('warn', 'could not load cookies file: %s' % msg)
self.cjar = MozillaCookieJar() # Just to be safe
self.opener = build_opener(HTTPCookieProcessor(self.cjar))
self.settings = None # Last settings object, if any
def send_query(self, query):
"""
This method initiates a search query (a ScholarQuery instance)
with subsequent parsing of the response.
"""
self.clear_articles()
self.query = query
html = self._get_http_response(url=query.get_url(),
log_msg='dump of query response HTML',
err_msg='results retrieval failed')
if html is None:
return
#print len(html)
self.parse(html)
def parse(self, html):
"""
This method allows parsing of provided HTML content.
"""
parser = self.Parser(self)
parser.parse(html)
def add_article(self, art):
#self.get_citation_data(art)
self.articles.append(art)
def clear_articles(self):
"""Clears any existing articles stored from previous queries."""
self.articles = []
def _get_http_response(self, url, log_msg=None, err_msg=None):
"""
Helper method, sends HTTP request and returns response payload.
"""
if log_msg is None:
log_msg = 'HTTP response data follow'
if err_msg is None:
err_msg = 'request failed'
try:
ScholarUtils.log('info', 'requesting %s' % unquote(url))
req = Request(url=url, headers={'User-Agent': ScholarConf.USER_AGENT})
hdl = self.opener.open(req)
html = hdl.read()
ScholarUtils.log('debug', log_msg)
ScholarUtils.log('debug', '>>>>' + '-'*68)
ScholarUtils.log('debug', 'url: %s' % hdl.geturl())
ScholarUtils.log('debug', 'result: %s' % hdl.getcode())
ScholarUtils.log('debug', 'headers:\n' + str(hdl.info()))
ScholarUtils.log('debug', 'data:\n' + html.decode('utf-8')) # For Python 3
ScholarUtils.log('debug', '<<<<' + '-'*68)
return html
except Exception as err:
ScholarUtils.log('info', err_msg + ': %s' % err)
return None
示例13: put
# 需要导入模块: from http.cookiejar import MozillaCookieJar [as 别名]
# 或者: from http.cookiejar.MozillaCookieJar import load [as 别名]
def put(self, job_dict, args):
if not isinstance(job_dict, dict):
raise Exception("Jobs must be submitted as dictionaries")
# Make this a DotDict to make accessing keys cleaner
job = DotDict(job_dict)
# URL is the only thing required in each datum
if not "url" in job:
raise Exception("No url specified")
# Add an http prefix onto our URL, if its not
# explicitly defined as HTTP/HTTPS
if job.url[:4] != "http":
job.url = "http://" + job.url
# Other options can be inherited from those specified
# on the command line. Do some sanity checking here, too
# Set our method (GET, POST, etc)
if not "method" in job:
job.method = args.method
# Read in our job delay...
try:
job.delay = (job.delay/1000.0
if 'delay' in job else args.delay/1000.0)
except ValueError:
raise Exception("Delay must be an integer")
# ... and set our query parameters
job.params = {}
job.orig_url = job.url
if "?" in job.url:
job.url, query_string = job.url.split("?", 1)
job.params = parse_qs(query_string)
# ... and our authentication (if any)
if "auth" in job:
job.auth = job.auth.split(":",1)
elif args.auth:
job.auth = args.auth.split(":",1)
else:
job.auth = None
job.auth = None
if "authtype" in job:
job.authtype = job.authtype
else:
job.authtype = args.authtype
if job.auth and len(job.auth) == 1:
raise Exception("Credentials must be in username:password format")
if job.authtype not in ("basic","digest"):
raise Exception("Auth type must be one of: basic, digest")
# ... and our job counter
try:
job.count = int(job.count) if 'count' in job else args.num
except ValueError:
raise Exception("Count must be an integer")
# ... and cookies!
try:
cj = MozillaCookieJar()
if "cookiejar" in job:
cj.load(job.cookiejar)
job.cookiejar = cj
elif args.cookiejar:
cj.load(args.cookiejar)
job.cookiejar = cj
else:
job.cookiejar = None
except Exception as e:
raise Exception("Unable to load cookie jar: {}".format(e))
# ... our insecure option
if not "insecure" in job:
job.insecure = args.insecure
else:
if not isinstance(job.insecure, bool):
raise Exception("Insecure flag must be a boolean")
# Fix up method case; RFCs 7230/1 state method is case sensitive,
# but all current recognized methods are upper case, soooo...
job.method = job.method.upper()
# Now turn our list of header key:value pairs into
# the dict that the requests module requires
header_list = []
# Coalesce headers from the command line and the job/url file, if any
if "headers" in job:
if not isinstance(job.headers, list):
raise Exception("Headers must be in list form")
header_list = job.headers + args.header
else:
header_list = args.header
#.........这里部分代码省略.........
示例14: Aurploader
# 需要导入模块: from http.cookiejar import MozillaCookieJar [as 别名]
# 或者: from http.cookiejar.MozillaCookieJar import load [as 别名]
class Aurploader(object):
"""
A user object for interactive actions.
"""
def __init__(
self,
cookiejar_path=None,
cookiejar=None,
token=None,
categories=None
):
"""
cookiejar: a MozillaCookieJar object
token: a user token for submitting form data
categories: package categories
"""
if cookiejar_path is None:
cookiejar_path = get_default_cookiejar_path()
self.cookiejar_path = cookiejar_path
if cookiejar is None:
self.cookiejar = MozillaCookieJar()
self.load_cookies()
else:
self.cookiejar = cookiejar
# TODO
# Find way to use this with URL opener. (urlopen accepts a capath arg)
# CA_PATH = '/etc/ssl/certs'
self.opener = build_opener(HTTPCookieProcessor(self.cookiejar))
self.token = token
self.categories = categories
# self.rpc = AUR(ttl=0, clean=False)
self.rpc = AUR()
def get_info(self, pkgname):
"""
Get package information from the RPC interface.
"""
for pkg in self.rpc.info(pkgname):
return pkg
def parse_pkgsubmit(self):
"""
Parse the pkgsubmit page.
This will return package categories along with hidden inputs such as the
the token. If the returned values are empty then the user is not currently
logged in, so it doubles as a login check.
"""
parser = pkgsubmitParser()
with self.opener.open(PKGSUBMIT_URL) as f:
parser.feed(f.read().decode())
if parser.token:
self.token = parser.token
self.categories = parser.categories
def login(self, user=None, passwd=None, login_file=None, remember_me=True):
"""
Log in to the AUR.
"""
if login_file is not None:
user, passwd = load_login_file(login_file)
if user is None or passwd is None:
self.rpc.log("logging in to the AUR")
if user is None:
user = input('Username: ')
if passwd is None:
passwd = getpass()
data = [
('user', user),
('passwd', passwd)
]
if remember_me:
data.append(('remember_me', '1'))
data = urlencode(data).encode('UTF-8')
with self.opener.open(LOGIN_URL, data) as f:
pass
# python3-AUR could be used to cache the data, but sometimes the data must be
# fresh, such as when confirming the upload.
#.........这里部分代码省略.........