本文整理汇总了Python中mechanize.Browser.links方法的典型用法代码示例。如果您正苦于以下问题:Python Browser.links方法的具体用法?Python Browser.links怎么用?Python Browser.links使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mechanize.Browser
的用法示例。
在下文中一共展示了Browser.links方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: regex_find_links
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def regex_find_links(self, page, regex):
br = Browser()
br.set_handle_robots(False)
try:
br.open(page)
except URLError:
self.output_q.put('URL Not Found: %s' % page)
# Number of matching links found
num_links = 0
# Max length of printed links
max_len = 0
#print page, regex
for link in br.links():
if regex.match(link.url):
num_links += 1
max_len = max(len(basename(link.url)), max_len)
# Put the found links on the download queue
self.download_q.put(link.url)
# Put the output messages on the respective queue.
# Implemented this way just in case this method is also threaded/multi-processed.
# NOTE: Try to keep the number of output messages the same as the links
self.output_q.put('Found Link: %s' % link.url)
return num_links, max_len
示例2: searchTitle
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def searchTitle(rawtitle):
br = Browser()
# Ignore robots.txt
br.set_handle_robots( False )
# Google demands a user-agent that isn't a robot
br.addheaders = [('User-agent', 'Firefox')]
br.open( "http://www.google.com " )
br.select_form( 'f' )
s='imdb'+' + '+' '.join(re.compile('[\.]').split(rawtitle))
br.form[ 'q' ] = s
br.submit()
resp = None
for link in br.links():
siteMatch = re.compile( 'www.imdb.com/title/tt[0-9]*/$' ).search( link.url )
if siteMatch:
resp = br.follow_link( link )
print link.url
break
soup = BeautifulSoup(resp.get_data())
title = re.sub(' - IMDb','',soup.find('title').string)
title = re.sub('\([0-9]*\)','',title)
return title
示例3: fetch
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def fetch():
br = Browser() # Create a browser
map = {};
# br.open(login_url) # Open the login page
# br.select_form(id="signform") # Find the login form
# br['username'] = username # Set the form values
# br['password'] = password
# resp = br.submit() # Submit the form
br.open('http://www.verycd.com/sto/music/china/')
nice_links = [l for l in br.links()
if 'topics' in l.url]
if not nice_links:
return None
for link in nice_links:
if link.url in map.keys():
continue
try:
response = br.follow_link(link)
map[link.url] = br.title()
except Exception, e:
print >> sys.stderr, e
示例4: get_articlelist
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def get_articlelist():
entry_page = "http://www.ausleisure.com.au/default.asp?PageID=2&n=Latest+News"
br = Browser()
br.open(entry_page)
entries = []
for l in br.links():
if l.url.startswith("default.asp?PageID=2&ReleaseID=") and not l.text == '[IMG]':
date = l.text[:10]
title = l.text[15:]
url = l.absolute_url
data = get_article(url)
entry = {
'title': simple_escape(title),
'link': simple_escape(url),
'id': simple_escape(url),
'updated': '%s-%s-%sT00:00:00Z' % (date[6:], date[3:5], date[:2]),
'content': simple_escape(data),
}
entries.append(entry)
return entries
示例5: begin_scraper
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def begin_scraper():
br = Browser()
br.addheaders = [('User-agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_8; rv:16:0) Gecko/20100101 Firefox/16.0')]
br.set_handle_robots(False)
br.open("https://wwws.mint.com/login.event")
assert br.viewing_html()
formcount=0
for f in br.forms():
if str(f.attrs["id"]) == "form-login":
break
formcount = formcount+1
br.select_form(nr=formcount)
br["username"] = "[email protected]" #Put your username here
br["password"] = getpass()
#import pdb; pdb.set_trace()
# Submit the user credentials to login to mint
response = br.submit()
response = br.follow_link(text="Transactions")
links_to_transactions = br.links(text_regex="Export all \d+ transactions")
link = ""
for f in links_to_transactions:
link = f
response2 = br.follow_link(link)
text_file = open("transactions.csv", "w")
text_file.write(response2.read())
text_file.close()
示例6: get_data
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def get_data ():
html = scraperwiki.scrape (edd_url)
process_ex_dividend_data (html)
br = Browser()
br.set_handle_robots (False)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
br.open (edd_url)
links = {}
for link in br.links():
if link.text in ['2', '3', '4']:
links [link.text] = link.url
for k, link in links.items():
m = re.search (edd_pat, link)
br = Browser()
br.set_handle_robots (False)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
br.open (edd_url)
br.select_form(nr=0)
br.set_all_readonly(False)
br["__EVENTTARGET"] = m.group(1)
br["__EVENTARGUMENT"] = ''
for c in br.controls:
if c.type == 'submit':
c.disabled = True
response = br.submit()
process_ex_dividend_data (response.read())
示例7: downloadAll
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def downloadAll(url ,pattern = "", saveto = "", overwrite = 2, suffix = "") :
br = Browser()
br.open(url)
for link in br.links(url_regex=pattern) :
if(link.url.startswith("http://")) :
download(link.url, "", saveto, overwrite, suffix)
elif(link.url.startswith("/")) :
download(link.base_url[:link.base_url.find("/",8)] + link.url, "", saveto , overwrite, suffix)
else :
download(link.base_url[:link.base_url.rfind("/")+1] + link.url, "", saveto, overwrite, suffix)
示例8: downloadAll
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def downloadAll(username, courseName):
br = Browser()
br.addheaders = [
(
"User-agent",
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6; en-us) AppleWebKit/531.9 (KHTML, like Gecko) Version/4.0.3 Safari/531.9",
)
]
br.set_handle_robots(False)
br.open("https://myvideosu.stanford.edu/oce/currentquarter.aspx")
assert br.viewing_html()
br.select_form(name="login")
br["username"] = username
br["password"] = getpass()
# Open the course page for the title you're looking for
print "Logging in to myvideosu.stanford.edu..."
response = br.submit()
print "Logged in, going to course link."
response = br.follow_link(text=courseName)
# print response.read()
# response = br.follow_link(text="HERE")
# print response.read()
# Build up a list of lectures
print "Loading video links."
links = []
for link in br.links(text="WMP"):
links.append(re.search(r"'(.*)'", link.url).group(1))
link_file = open("links.txt", "w")
# So we download the oldest ones first.
links.reverse()
print "Found %d links, getting video streams." % (len(links))
videos = []
for link in links:
response = br.open(link)
soup = BeautifulSoup(response.read())
video = soup.find("object", id="WMPlayer")["data"]
video = re.sub("http", "mms", video)
video = video.replace(" ", "%20") # remove spaces, they break urls
output_name = re.search(r"[a-z]+[0-9]+[a-z]?/[0-9]+", video).group(0).replace("/", "_") # + ".wmv"
output_wmv = output_name + ".wmv"
link_file.write(video + "\n")
print video
output_mp4 = output_name + ".mp4"
videos.append((video, output_wmv, output_mp4))
link_file.close()
print "Downloading %d video streams." % (len(videos))
for video in videos:
download(video)
print "Done!"
示例9: read_all_result_page_links_for
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def read_all_result_page_links_for(mainurl):
br = Browser()
br.set_handle_robots(False)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
br.open(mainurl)
nice_links = [l for l in br.links()
if 'company' in l.url]
for link in nice_links:
read_detail_page(link.url)
示例10: upload_gallery
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def upload_gallery(url):
from mechanize import Browser
br = Browser()
br.set_handle_robots(False)
br.open(url)
urls = set()
for link in br.links(url_regex=".jpg$"):
urls.add(link.url)
upload_images(urls)
示例11: get_soup
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def get_soup(movie):
movie = '+'.join(movie.split())
url = "http://www.imdb.com/find?ref_=nv_sr_fn&q="+movie+"&s=all"
br = Browser()
br.open(url)
try:
link = list(br.links(url_regex = re.compile(r"/title/tt*")))[0]
except:
return ""
else:
res = br.follow_link(link)
soup = BeautifulSoup(res.read())
return soup
示例12: getMapping
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def getMapping():
# get the mapping of cellphone numbers to IP addresses
mapping = dict()
br = Browser()
br.open("http://mahan.webfactional.com/sms/freedns/sms/cellphonedomains/")
# words to exclude irrelevant links
words = ['Name','Last modified','Size','Description','Parent Directory']
# get all the cellphone numbers with registered IP addresses
# .links() optionally accepts the keyword args of .follow_/.find_link()
for link in br.links():
if link.text not in words:
print link.text
# get the corresponding IP address
ipaddr = urllib2.urlopen('http://mahan.webfactional.com/sms/freedns/sms/cellphonedomains/' + link.text).read()
mapping[link.text] = ipaddr
print mapping
log.msg("mapping: " + str(mapping))
log.msg("links: " + str(br.links()))
log.msg("set mapping 1: " + str(p2presolver.mapping))
p2presolver.mapping = mapping
log.msg("set mapping 2: " + str(p2presolver.mapping))
示例13: main
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def main(page, regex, path):
start_time = time.time()
br = Browser()
br.set_handle_robots(False)
br.open(page)
#br.open('http://storage.googleapis.com/books/ngrams/books/datasetsv2.html')
eng_all = re.compile(regex)
#eng_all = re.compile('.*googlebooks-eng-all.*20120701.*')
#print page, regex, path
n = 0
maxlen = 0
link_list = []
for link in br.links():
if eng_all.match(link.url):
n += 1
maxlen = max(len(os.path.basename(link.url)), maxlen)
link_list.append(link.url)
sys.stderr.write('Found Link: %s\n' % link.url)
answer = raw_input("\n\nAre you sure you want to download the above %i file(s)? (Y/N): " % n)
if answer == 'N' or answer == 'n':
sys.exit(0)
sys.stderr.write('\n\nDownloading files to: %s\n' % path)
digits = len('%d' % n)
disp_time = datetime.datetime.now
for i, link in enumerate(link_list):
download_start = time.time()
file_name = os.path.basename(link)
full_path = os.path.join(path, file_name)
if os.path.exists(full_path):
sys.stderr.write('%s exists, not downloading\n' % full_path)
continue
try:
sys.stderr.write('[%s] Downloading(%-*i of %i): %*s' % (str(disp_time().time())[:8], digits, i+1, n,
maxlen + 2, file_name))
br.retrieve(link, filename=full_path)
except:
sys.stderr.write('\n\nSomething happened, deleting last file: %s\n' % full_path)
os.remove(full_path)
sys.exit(0)
sys.stderr.write(' of size %s MB in %5.2f min\n' % ("{:7.2f}".format(float(os.stat(full_path).st_size)/1000000),
(time.time() - download_start)/60))
br.clear_history()
sys.stderr.write('\ndownloaded %i files to %s directory in %15f seconds\n' % (n, path, time.time()-start_time))
示例14: test_no_external_login
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def test_no_external_login(self):
"""websession - openid, oauth1 or oauth2 external login option in log in page"""
base_url = CFG_SITE_SECURE_URL + '/youraccount'
login_url = base_url + '/login'
browser = Browser()
response = browser.open(login_url)
#Check all the links and see if any of them is of class openid (external login button)
for link in browser.links():
for value in link.attrs:
if (value[0] == 'class'):
if value[1] == 'openid_url':
self.fail("Openid external login in login page: %s" % link.attrs)
return
示例15: downloadAll
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import links [as 别名]
def downloadAll(username, courseName):
br = Browser()
br.addheaders = [('User-agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6; en-us) AppleWebKit/531.9 (KHTML, like Gecko) Version/4.0.3 Safari/531.9')]
br.set_handle_robots(False)
br.open('https://myvideosu.stanford.edu/oce/currentquarter.aspx')
assert br.viewing_html()
br.select_form(name='login')
br['username'] = username
br['password'] = getpass()
# Open the course page for the title you're looking for
print 'Logging in to myvideosu.stanford.edu...'
response = br.submit()
print 'Logged in, going to course link.'
response = br.follow_link(text=courseName)
# Build up a list of lectures.
print 'Loading video links.'
links = []
for link in br.links(text='WMP'):
links.append(re.search(r"'(.*)'", link.url).group(1))
# So we download the oldest ones first.
links.reverse()
print 'Found %d links, getting video streams.' % len(links)
videos = []
for link in links:
response = br.open(link)
soup = BeautifulSoup(response.read())
video = soup.find('object', id='WMPlayer')['data']
video = re.sub('http', 'mms', video)
video = video.replace(' ', '%20') # remove spaces, they break urls
output_name = re.search(r'[a-z]+[0-9]+[a-z]?/[0-9]+', video).group(0).replace('/', '_')
output_wmv = output_name + '.wmv'
print video
videos.append((video, output_wmv))
print 'Downloading %d video streams.' % (len(videos))
for video in videos:
download(video)
print 'Done!'