本文整理汇总了Python中mechanize.Browser.viewing_html方法的典型用法代码示例。如果您正苦于以下问题:Python Browser.viewing_html方法的具体用法?Python Browser.viewing_html怎么用?Python Browser.viewing_html使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mechanize.Browser
的用法示例。
在下文中一共展示了Browser.viewing_html方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: login_and_authorize
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import viewing_html [as 别名]
def login_and_authorize(authorize_url, config):
print "AUTHORIZING", authorize_url
br = Browser()
br.set_debug_redirects(True)
br.open(authorize_url)
print "FIRST PAGE", br.title(), br.geturl()
br.select_form(nr=1)
br['login_email'] = config['testing_user']
br['login_password'] = config['testing_password']
resp = br.submit()
print "RESULT PAGE TITLE", br.title()
print "RESULT URL", resp.geturl()
assert br.viewing_html(), "Looks like it busted."
try:
br.select_form(nr=1)
br.submit()
br.viewing_html(), "Looks like it busted."
assert "API Request Authorized" in br.title(), "Title Is Wrong (bad email/password?): %r at %r" % (br.title(), br.geturl())
except FormNotFoundError:
print "Looks like we're blessed."
示例2: login_and_authorize
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import viewing_html [as 别名]
def login_and_authorize(argv=None):
if argv is None:
argv = sys.argv
authorize_url = argv[1]
config = json.loads(argv[2])
print "AUTHORIZING", authorize_url
br = Browser()
br.set_debug_redirects(True)
br.open(authorize_url)
print "FIRST PAGE", br.title(), br.geturl()
br.select_form(nr=2)
br["login_email"] = config[u"testing_user"]
br["login_password"] = config[u"testing_password"]
resp = br.submit()
print "RESULT PAGE TITLE", br.title()
print "RESULT URL", resp.geturl()
assert br.viewing_html(), "Looks like it busted."
try:
br.select_form(nr=2)
br.submit()
assert br.viewing_html(), "Looks like it busted."
assert "API Request Authorized" in br.title(), "Title Is Wrong (bad email/password?): %r at %r" % (br.title(), br.geturl())
except FormNotFoundError:
print "Looks like we're blessed."
return "OK"
示例3: main
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import viewing_html [as 别名]
def main():
br = Browser()
br.open("http://www.ec.gc.ca/contracts-contrats/default.asp?lang=En&n=168B9233-11")
# follow link with element text matching regular expression
response1 = br.follow_link(text_regex=r"Reports")
assert br.viewing_html()
response2 = br.follow_link(text_regex=r"Quarter")
assert br.viewing_html()
html = response2.read();
response2.close()
parse(html)
示例4: begin_scraper
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import viewing_html [as 别名]
def begin_scraper():
br = Browser()
br.addheaders = [('User-agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_8; rv:16:0) Gecko/20100101 Firefox/16.0')]
br.set_handle_robots(False)
br.open("https://wwws.mint.com/login.event")
assert br.viewing_html()
formcount=0
for f in br.forms():
if str(f.attrs["id"]) == "form-login":
break
formcount = formcount+1
br.select_form(nr=formcount)
br["username"] = "[email protected]" #Put your username here
br["password"] = getpass()
#import pdb; pdb.set_trace()
# Submit the user credentials to login to mint
response = br.submit()
response = br.follow_link(text="Transactions")
links_to_transactions = br.links(text_regex="Export all \d+ transactions")
link = ""
for f in links_to_transactions:
link = f
response2 = br.follow_link(link)
text_file = open("transactions.csv", "w")
text_file.write(response2.read())
text_file.close()
示例5: main
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import viewing_html [as 别名]
def main():
# Command line options.
parser = optparse.OptionParser('usage: %prog [options]')
parser.add_option('--unread', dest='unread', action='store_true',
default=False, help='Only tag unread items')
(opts, args) = parser.parse_args()
# Get all items.
api = readitlater.API(configs.RIL_APIKEY, configs.RIL_USERNAME,
configs.RIL_PASSWORD)
items = api.get(state=('unread' if opts.unread else None))
list = items['list']
br = Browser()
# Iterate over items.
for k, v in list.items():
#if v['title'] == v['url']:
if not v['title']:
print u'Found: {0} ({1})'.format(v['title'], v['url']);
try:
doc = br.open(v['url']);
except urllib2.HTTPError, e:
print u'Error fetching page: {0}'.format(e.code)
continue
if not br.viewing_html():
print u'Not a HTML file!'
else:
title = br.title();
print u'New title: {0}'.format(title.decode('ascii', 'ignore'))
# Send the new tags to RIL.
api.send(update_title=[{'url': v['url'], 'title': title}])
示例6: get_mechanized_browser
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import viewing_html [as 别名]
def get_mechanized_browser(self, url):
mech = Browser()
mech.set_handle_robots(False)
mech.set_debug_redirects(self.debug)
mech.set_debug_responses(self.debug)
mech.set_debug_http(self.debug)
mech.open(url)
assert mech.viewing_html()
return mech
示例7: downloadAll
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import viewing_html [as 别名]
def downloadAll(username, courseName):
br = Browser()
br.addheaders = [
(
"User-agent",
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6; en-us) AppleWebKit/531.9 (KHTML, like Gecko) Version/4.0.3 Safari/531.9",
)
]
br.set_handle_robots(False)
br.open("https://myvideosu.stanford.edu/oce/currentquarter.aspx")
assert br.viewing_html()
br.select_form(name="login")
br["username"] = username
br["password"] = getpass()
# Open the course page for the title you're looking for
print "Logging in to myvideosu.stanford.edu..."
response = br.submit()
print "Logged in, going to course link."
response = br.follow_link(text=courseName)
# print response.read()
# response = br.follow_link(text="HERE")
# print response.read()
# Build up a list of lectures
print "Loading video links."
links = []
for link in br.links(text="WMP"):
links.append(re.search(r"'(.*)'", link.url).group(1))
link_file = open("links.txt", "w")
# So we download the oldest ones first.
links.reverse()
print "Found %d links, getting video streams." % (len(links))
videos = []
for link in links:
response = br.open(link)
soup = BeautifulSoup(response.read())
video = soup.find("object", id="WMPlayer")["data"]
video = re.sub("http", "mms", video)
video = video.replace(" ", "%20") # remove spaces, they break urls
output_name = re.search(r"[a-z]+[0-9]+[a-z]?/[0-9]+", video).group(0).replace("/", "_") # + ".wmv"
output_wmv = output_name + ".wmv"
link_file.write(video + "\n")
print video
output_mp4 = output_name + ".mp4"
videos.append((video, output_wmv, output_mp4))
link_file.close()
print "Downloading %d video streams." % (len(videos))
for video in videos:
download(video)
print "Done!"
示例8: login_and_authorize
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import viewing_html [as 别名]
def login_and_authorize(self, authorize_url):
from mechanize import Browser, FormNotFoundError
import getpass
print "AUTHORIZING", authorize_url
br = Browser()
br.set_debug_redirects(True)
br.open(authorize_url)
print "FIRST PAGE", br.title(), br.geturl()
br.select_form(nr=1)
br['login_email'] = raw_input('Enter your dropbox email: ')
br['login_password'] = getpass.getpass('Enter your dropbox password: ')
resp = br.submit()
print "RESULT PAGE TITLE", br.title()
print "RESULT URL", resp.geturl()
assert br.viewing_html(), "Looks like it busted."
try:
br.select_form(nr=1)
br.submit()
assert br.viewing_html(), "Looks like it busted."
assert "API Request Authorized" in br.title(), "Title Is Wrong (bad email/password?): %r at %r" % (br.title(), br.geturl())
except FormNotFoundError:
print "Looks like we're blessed."
示例9: downloadAll
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import viewing_html [as 别名]
def downloadAll(username, courseName):
br = Browser()
br.addheaders = [('User-agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6; en-us) AppleWebKit/531.9 (KHTML, like Gecko) Version/4.0.3 Safari/531.9')]
br.set_handle_robots(False)
br.open('https://myvideosu.stanford.edu/oce/currentquarter.aspx')
assert br.viewing_html()
br.select_form(name='login')
br['username'] = username
br['password'] = getpass()
# Open the course page for the title you're looking for
print 'Logging in to myvideosu.stanford.edu...'
response = br.submit()
print 'Logged in, going to course link.'
response = br.follow_link(text=courseName)
# Build up a list of lectures.
print 'Loading video links.'
links = []
for link in br.links(text='WMP'):
links.append(re.search(r"'(.*)'", link.url).group(1))
# So we download the oldest ones first.
links.reverse()
print 'Found %d links, getting video streams.' % len(links)
videos = []
for link in links:
response = br.open(link)
soup = BeautifulSoup(response.read())
video = soup.find('object', id='WMPlayer')['data']
video = re.sub('http', 'mms', video)
video = video.replace(' ', '%20') # remove spaces, they break urls
output_name = re.search(r'[a-z]+[0-9]+[a-z]?/[0-9]+', video).group(0).replace('/', '_')
output_wmv = output_name + '.wmv'
print video
videos.append((video, output_wmv))
print 'Downloading %d video streams.' % (len(videos))
for video in videos:
download(video)
print 'Done!'
示例10: Browser
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import viewing_html [as 别名]
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
import sys
sys.path.append('ClientCookie-1.0.3')
import ClientCookie
sys.path.append('ClientForm-0.1.17')
import ClientForm
import re
from mechanize import Browser
br = Browser()
response1 = br.open("http://www.proxyfire.net/forum/login.php")
br.set_handle_robots(False)
assert br.viewing_html()
print br.title()
print response1.geturl()
#print response1.info() # headers
#print response1.read() # body
#ClientForm.ParseResponse(response1)
f = br.forms()
#~ print "vvvvv"
#~ print f.name
#~ print "^^^^^"
# .links() optionally accepts the keyword args of .follow_/.find_link()
#for link in br.links():
#print link
示例11: downloadAllLectures
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import viewing_html [as 别名]
def downloadAllLectures(username, courseName, password, downloadSettings):
br = Browser()
br.addheaders = [('User-agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6; en-us) AppleWebKit/531.9 (KHTML, like Gecko) Version/4.0.3 Safari/531.9')]
br.set_handle_robots(False)
br.open("https://myvideosu.stanford.edu/oce/currentquarter.aspx")
assert br.viewing_html()
br.select_form(name="login")
br["username"] = username
br["password"] = password
# Open the course page for the title you're looking for
print "Logging in to myvideosu.stanford.edu..."
response = br.submit()
# Check for 2 Factor Authentication
if (containsFormByName(br, "multifactor_send")):
br.select_form(name="multifactor_send")
br.submit()
br.select_form(name="login")
auth_code = raw_input("Please enter 2-Step Authentication code (text): ")
br["otp"] = auth_code
response = br.submit()
# Assert that the login was successful
assertLoginSuccessful(br.forms())
# Assert Course Exists
try:
response = br.follow_link(text=courseName)
except:
print 'Course Read Error: "'+ courseName + '"" not found'
return
print "Logged in, going to course link."
# Build up a list of lectures
print '\n=== Starting "' + courseName + '" ==='
print "Loading video links."
links = []
for link in br.links(text="WMP"):
links.append(re.search(r"'(.*)'",link.url).group(1))
link_file = open('links.txt', 'w')
if not downloadSettings["newestFirst"]:
links.reverse() # download the oldest ones first.
print "Found %d links, getting video streams."%(len(links))
videos = []
for link in links:
try:
response = br.open(link)
soup = BeautifulSoup(response.read())
except:
print '\n'
print "Error reading "+ link
print 'If this error is unexpected, try installing the html5lib parser for BeautifulSoup. Pages with Notes stored on them have been known to crash when using an outdated parser'
print 'you can find instructions on installing the html5lib at "http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser"'
print '\n'
continue
video = soup.find('object', id='WMPlayer')['data']
video = re.sub("http","mms",video)
video = video.replace(' ', '%20') # remove spaces, they break urls
output_name = re.search(r"[a-z]+[0-9]+[a-z]?/[0-9]+",video).group(0).replace("/","_") #+ ".wmv"
#specify video name and path for .wmv file type
output_wmv = output_name + ".wmv"
link_file.write(video + '\n')
#specify video name and path for .mp4 file type
output_mp4 = output_name + ".mp4"
videos.append((video, output_wmv, output_mp4))
print video
link_file.close()
print "Downloading %d video streams."%(len(videos))
for video in videos:
download(video, courseName, downloadSettings)
print "Done!"
示例12: begin_scraper
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import viewing_html [as 别名]
def begin_scraper():
#import pdb;pdb.set_trace()
global first_class
global num_vids_already_downloaded
courses_to_download = []
courses_file = "%s/%s" % (current_folder, courses_to_download_local_txt)
if _file_exists(courses_file):
courses_to_download = _file_to_list(courses_file)
else:
print "There is no courses_to_download.txt file.\nPlease create one and populate it with courses"
sys.exit()
if len(courses_to_download) == 0:
print "There are no courses in your courses_to_download.txt file"
sys.exit()
#import pdb;pdb.set_trace()
course_to_download = _first_line_to_back_of_file(courses_to_download, courses_file)
course_to_download = _remove_end_newline(course_to_download)
if len(first_class) == 0:
first_class = course_to_download
elif first_class == course_to_download:
return
#import pdb;pdb.set_trace()
if "->" in course_to_download:
index = course_to_download.find('->')
course_to_download = course_to_download[0:index]
#if not _file_exists("vidoes_downloaded.txt"):
# f = open("videos_downloaded.txt", 'w')
# f.close()
#
#temp = _file_to_list("videos_downloaded.txt")
br = Browser()
br.addheaders = [('User-agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_8; rv:16:0) Gecko/20100101 Firefox/16.0')]
br.set_handle_robots(False)
br.open("https://myvideosu.stanford.edu/oce/currentquarter.aspx")
assert br.viewing_html()
br.select_form(name="login")
br["username"] = "rhintz42" #Put your username here
br["password"] = getpass()
# Open the course page for the title you're looking for
response = br.submit()
response = br.follow_link(text=course_to_download)
#print response.read()
#response = br.follow_link(text="WMP", nr=2)#(text="SL", nr=0)#(text="HERE", nr=0)
#response = br.open('https://myvideosu.stanford.edu/player/slplayer.aspx?coll=6d44dcc5-9136-42ab-a0fa-1475fcbfa463&course=CS140&co=66ce29bd-4bbc-4cb0-b166-8589ebb14e60&lecture=120924&authtype=WA&wmp=true')
#print response.read()
# Build up a list of lectures
links = []
for link in br.links(text="WMP"):
links.append(re.search(r"'(.*)'",link.url).group(1))
link_file = open('links.txt', 'w')
# So we download the oldest ones first.
links.reverse()
videos = []
for link in links:
response = br.open(link)
soup = BeautifulSoup(response.read())
video = soup.find('object', id='WMPlayer')['data']
video = re.sub("http","mms",video)
video = video.replace(' ', '%20') # remove spaces, they break urls
output_name = re.search(r"[a-z]+[0-9]+[a-z]?/[0-9]+",video).group(0).replace("/","_") #+ ".wmv"
output_wmv = output_name + ".wmv"
link_file.write(video + '\n')
#import pdb
#pdb.set_trace()
print video
output_mp4 = output_name + ".mp4"
videos.append((video, output_wmv, output_mp4))
link_file.close()
num_vids_already_downloaded = 0
#This needs to be fixed to handle classes that have no videos
class_name = ''
if len(videos) > 0:
class_name = re.search('(.*)_', videos[0][2])
else:
return
if not os.path.exists("%s/%s" %(current_folder, class_name.group(1))):
os.makedirs("%s/%s" %(current_folder, class_name.group(1)))
vid_urls_path = "%s/%s/%s" %(current_folder, class_name.group(1), "video_urls.txt")
if not _file_exists(vid_urls_path):
f = open(vid_urls_path, 'w')
f.close()
#.........这里部分代码省略.........
示例13: get_mechanized_browser
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import viewing_html [as 别名]
def get_mechanized_browser(url):
mech = Browser()
mech.set_handle_robots(False)
mech.open(url)
assert mech.viewing_html()
return mech