本文整理汇总了Python中mechanize.Browser.set_handle_robots方法的典型用法代码示例。如果您正苦于以下问题:Python Browser.set_handle_robots方法的具体用法?Python Browser.set_handle_robots怎么用?Python Browser.set_handle_robots使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mechanize.Browser
的用法示例。
在下文中一共展示了Browser.set_handle_robots方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: find_first_article
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def find_first_article():
mech = Browser()
cj = cookielib.LWPCookieJar()
mech.set_handle_equiv(True)
# mech.set_handle_gzip(True)
mech.set_handle_redirect(True)
mech.set_handle_referer(True)
mech.set_handle_robots(False)
# mech.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
mech.addheaders = [
(
"User-agent",
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1",
)
]
page = mech.open("https://bitcointalk.org/index.php?board=77.0")
html = page.read()
soup = BeautifulSoup(html)
first_article_tag = soup.find("td", class_="windowbg")
global startingpost
startingpost = first_article_tag.span.a.get("href")
print startingpost
示例2: get_browser
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def get_browser():
# Browser
br = Browser()
# Cookie Jar
#cj = cookielib.LWPCookieJar()
#br.set_cookiejar(cj)
# Browser options
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
# Follows refresh 0 but not hangs on refresh > 0
#br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
# Want debugging messages?
#
#br.set_debug_http(True)
#br.set_debug_redirects(True)
#br.set_debug_responses(True)
# User-Agent (this is cheating, ok?)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
return br
示例3: on_task_start
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def on_task_start(self, task, config):
try:
from mechanize import Browser
except ImportError:
raise PluginError('mechanize required (python module), please install it.', log)
userfield = config.get('userfield', 'username')
passfield = config.get('passfield', 'password')
url = config['url']
username = config['username']
password = config['password']
br = Browser()
br.set_handle_robots(False)
try:
br.open(url)
except Exception as e:
# TODO: improve error handling
raise PluginError('Unable to post login form', log)
#br.set_debug_redirects(True)
#br.set_debug_responses(True)
#br.set_debug_http(True)
for form in br.forms():
loginform = form
try:
loginform[userfield] = username
loginform[passfield] = password
break
except Exception as e:
pass
else:
received = os.path.join(task.manager.config_base, 'received')
if not os.path.isdir(received):
os.mkdir(received)
filename = os.path.join(received, '%s.formlogin.html' % task.name)
with open(filename, 'w') as f:
f.write(br.response().get_data())
log.critical('I have saved the login page content to %s for you to view' % filename)
raise PluginError('Unable to find login fields', log)
br.form = loginform
br.submit()
cookiejar = br._ua_handlers["_cookies"].cookiejar
# Add cookiejar to our requests session
task.requests.add_cookiejar(cookiejar)
# Add handler to urllib2 default opener for backwards compatibility
handler = urllib2.HTTPCookieProcessor(cookiejar)
if urllib2._opener:
log.debug('Adding HTTPCookieProcessor to default opener')
urllib2._opener.add_handler(handler)
else:
log.debug('Creating new opener and installing it')
urllib2.install_opener(urllib2.build_opener(handler))
示例4: downloadBuild
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def downloadBuild(build_file, target_directory):
"""Download a build file from the SESI website and place it in the target
directory.
"""
print "Attempting to download build: {}".format(build_file)
user, password = _getSESIAuthInfo()
browser = Browser()
browser.set_handle_robots(False)
browser.open("https://www.sidefx.com/login/?next=/download/daily-builds/")
browser.select_form(nr=0)
browser.form['username'] = user
browser.form['password'] = password
browser.submit()
browser.open('http://www.sidefx.com/download/daily-builds/')
resp = browser.follow_link(text=build_file, nr=0)
url = resp.geturl()
url += 'get/'
resp = browser.open(url)
target_path = os.path.join(target_directory, build_file)
print "Downloading to {}".format(target_path)
with open(target_path, 'wb') as handle:
handle.write(resp.read())
print "Download complete"
return target_path
示例5: github_connect
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def github_connect(path=""):
"""Connect to the website"""
br = Browser()
br.set_handle_robots(False)
br.addheaders = [('User-agent', 'Firefox')]
br.open('https://github.com/%s' % path)
return br
示例6: __init__
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def __init__(self, config):
self.login_url = 'http://%s.ogame.gameforge.com/' % config.country
# http://s114-br.ogame.gameforge.com/game/index.php?page=overview
self.index_url = 'http://s%s-%s.ogame.gameforge.com' % (config.universe, config.country) + '/game/index.php'
headers = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) \
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36')]
# Authentication data
self.username = config.username
self.password = config.password
self.universe = config.universe
self.country = config.country
self.logger = logging.getLogger('ogame-bot')
# Setting up the browser
self.cj = cookielib.LWPCookieJar()
br = Browser()
br.set_cookiejar(self.cj)
br.set_handle_robots(False)
br.addheaders = headers
# self.path = os.path.dirname(os.path.realpath(__file__))
# name of the cookies file
# self.cookies_file_name = os.path.join(self.path, 'cookies.tmp')
self.cookies_file_name = 'cookies.tmp'
super(AuthenticationProvider, self).__init__(br, config)
示例7: respond
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def respond(bot, event):
matches = []
for (ident, (regex, template)) in bot.commands_cache.iteritems():
match = regex.search(event.message)
if match:
params = match.groupdict()
params['nick'] = event.source
heappush(
matches, (match.start(0), template.safe_substitute(params))
)
if not matches:
if event.message.find("http") != -1:
br = Browser()
try:
br.set_handle_robots(False)
br.open(event.message)
bot.send_channel_action(bot.config.messages.urltitle, title = format.bold('\"' + br.title() + '\"'))
except:
return False
return True
else:
return False
bot.send_channel_action(matches[0][1])
return True
示例8: get_machines
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def get_machines(start,num_pages):
mech = Browser()
mech.set_handle_robots(False)
mech.set_handle_equiv(False)
mech.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
machines = []
try:
page_num = 0
for page_num in range(start,num_pages+1):
print("page %d" % (page_num))
url = "http://www.pinpedia.com/machine?page=%d" % page_num
html_page = mech.open(url)
html = html_page.read()
machines += parse_page(html)
time.sleep(0.1)
except Exception as e:
print e
print("finished at page %s" % page_num)
print("storing machines to machines.txt")
with open('machines.txt','w') as fh:
for machine in machines:
fh.write(machine + "\n")
示例9: sa_login
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def sa_login(sa_userName, sa_password):
'''Login to spolecneaktivity.cz portal as sa_userName user.
Temporary(?) no check
- if online,
- if not logged as other user
- succesfully logged in
'''
url_login = 'http://www.spolecneaktivity.cz'
br = Browser()
br.set_handle_robots(False)
ok = False
try:
r = br.open(url_login)
rru = r.read().upper()
if "LOGIN" in rru and "HESLO" in rru: # not logged in yet
br.select_form(nr=0)
br["userName"] = sa_userName
br["password"] = sa_password
r = br.submit()
ok = True
except:
pass
if not ok:
print u"sa_parse.sa_login: Selhalo přihlášení do spolecneaktivity.cz"
return br
示例10: get_google_news_by_url
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def get_google_news_by_url(url):
# Construct browser object
browser = Browser()
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36'
browser.addheaders = [('User-Agent', ua), ('Accept', '*/*')]
# Do not observe rules from robots.txt
browser.set_handle_robots(False)
# Create HTML document
html = fromstring(browser.open(url).read())
# get number of pages
xpath_pages = '//a[@class="fl"]'
page_num = len(html.xpath(xpath_pages)) + 1
# get all pages url
urls = generate_url_pages(url, page_num)
print 'On ' + str(len(urls)) + ' pages:'
df = [None] * page_num
# iterate through all pages of this url
for index, url in enumerate(urls):
page_html = fromstring(browser.open(url).read())
df[index] = get_google_news_in_page(page_html)
return pd.concat(df, ignore_index=True)
示例11: create
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def create():
while 1:
try:
br = Browser()
br.set_handle_robots(False)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
br.open('https://classic.netaddress.com/tpl/Subscribe/Step1?Locale=en&AdInfo=&Referer=http%3A%2F%2Fwww.netaddress.com%2F&T=1332304112864372')
br.select_form(name='Step1')
userid = randomname()
br.form['usrUserId'] = userid
pwd = randomname()
br.form['newPasswd'] = pwd
br.form['RPasswd'] = pwd
br.form['usrFirst'] = randomname()
br.form['usrLast'] = randomname()
br.form['usrTimeZone'] = ['Africa/Abidjan']
br.form['usrCn'] = ['AF']
br.submit()
print "Created " + userid + " with password " + pwd
filo = open(filex, 'a')
filo.write(userid + "@usa.net" + ":" + pwd + "\n")
filo.close()
except:
print "error"
示例12: main
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--total-jobs', metavar='<total-jobs>', help='total number of jobs downloading documents', type=int)
parser.add_argument('--job', metavar='<job>', help='job number between 1 and <total-jobs>', type=int)
args = parser.parse_args()
check_args(parser, args)
br = Browser()
br.set_handle_robots(False)
# br.set_debug_responses(True)
data = urlencode({'user': USERNAME, 'pass': getpass()})
document_urls = [LOGIN_PREFIX + url.strip() + '&view=etext' for url in file(DOCUMENT_URLS_FILE)]
start = args.job - 1
step = args.total_jobs
for url in iterview(document_urls[start::step]):
try:
get_document_pages(br, url, data)
except Exception as e:
print >> sys.stderr, '\n', (url, e)
示例13: getRandomXKCDComic
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def getRandomXKCDComic(urlBase):
br = Browser()
br.addheaders = [('User-agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6; en-us) AppleWebKit/531.9 (KHTML, like Gecko) Version/4.0.3 Safari/531.9')]
br.set_handle_robots(False)
#XKCD Comics are enumerated in the following type by URL: http://www.xkcd.com/1, http://www.xkcd.com/2, ..., http://www.xkcd.com/n
upperBound = 1
lowerBound = 1
#Multiply by two until address no longer exists
while True:
link = urlBase + str(upperBound) + "/"
try:
response = br.open(link)
except:
break
lowerBound = upperBound
upperBound = upperBound * 2
#Binary Search for last Comic
while True:
pivot = (upperBound + lowerBound)/2
link = urlBase + str(pivot) + "/"
if lowerBound == upperBound or pivot == lowerBound:
randomComicID = random.randint(1, pivot)
randPageLink = urlBase + str(randomComicID) + "/"
return br.open(randPageLink)
try:
response = br.open(link)
lowerBound = pivot
except:
upperBound = pivot
示例14: scrape
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def scrape(self):
"""
Opens the html page and parses the pdf links.
"""
browser = Browser()
#-----------
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
values1 = {'name' : 'Michael Foord',
'location' : 'Northampton',
'language' : 'Python' }
headers = { 'User-Agent' : user_agent }
browser.set_handle_redirect(True)
browser.set_handle_referer(True)
browser.set_handle_robots(False)
browser.addheaders = [('User-Agent', 'Firefox')]
#-------------
browser.set_handle_robots(False)
html = browser.open(self.site)
lines = html.read().splitlines()
for line in lines:
urls = re.findall('<a href="?\'?([^"\'>]*)', line)
for url in urls:
if '.pdf"' in url:
self.pdf_urls.append(url)
示例15: authenticate
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_handle_robots [as 别名]
def authenticate(self):
if self._client:
return self._client
try:
client = Browser()
client.set_handle_redirect(True)
client.set_handle_robots(False)
client.open('http://%s/cgi-bin/videoconfiguration.cgi' % self.camera.host)
client.select_form('frmLOGON')
client['LOGIN_ACCOUNT'] = self.camera.username
client['LOGIN_PASSWORD'] = self.camera.password
client.submit()
try:
client.select_form('frmLOGON')
except FormNotFoundError:
pass
else:
raise AccessDenied('Access denied for user `%s`' % self.camera.username)
except AccessDenied:
raise
except Exception, e:
raise ImproperlyConfigured(e.message)