本文整理汇总了Python中mechanize.Browser类的典型用法代码示例。如果您正苦于以下问题:Python Browser类的具体用法?Python Browser怎么用?Python Browser使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Browser类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: lookup_offers_isbn
def lookup_offers_isbn(item_id):
offers = []
br = Browser()
res = br.open("http://books.half.ebay.com/ws/web/HalfISBNSearch?isbn=%s" % item_id)
soup = BeautifulSoup(res.read())
ratings = soup.findAll('span',{'class': 'Header'})
for r in ratings:
rating = r.text
prices= r.parent.parent.parent.findNextSibling('table').findAll('tr')[1:]
linktext = r.parent.parent.parent.findNextSiblings('table')[1].find(text=re.compile('View all.*'))
if linktext:
all = linktext.parent['href']
# get link
res2 = br.open(all)
soup = BeautifulSoup(res2.read())
rating2 = soup.findAll('span',{'class': 'Header'})
prices = rating2[0].parent.parent.parent.parent.findAll('table')[3].findAll('tr')[1:]
for row in prices:
m = re.search("itemid=(\d+)",row.find('a',href=re.compile("itemid=\d+"))['href'])
itemid=m.group(1)
seller = row.find('a',{'class':'SellerDisplayLink'}).text
price = row.find('span',{'class':'ItemPrice'}).text
price = string.replace(price,",","")
if price.startswith("$"):
price = price[1:]
offers.append({ 'rating' : rating, 'seller' : seller, 'listing_id' : itemid, 'price' : str(price) })
print rating,seller,itemid,price
return offers
示例2: down_image
def down_image(self, img):
print "down image from " + img
down_br = Browser()
down_cj = CookieJar()
down_br.set_cookiejar(down_cj)
fn = tempfile.mktemp(suffix='.png')
return down_br.retrieve(img, filename = fn)[0]
示例3: on_task_start
def on_task_start(self, task, config):
try:
from mechanize import Browser
except ImportError:
raise PluginError('mechanize required (python module), please install it.', log)
userfield = config.get('userfield', 'username')
passfield = config.get('passfield', 'password')
url = config['url']
username = config['username']
password = config['password']
br = Browser()
br.set_handle_robots(False)
try:
br.open(url)
except Exception as e:
# TODO: improve error handling
raise PluginError('Unable to post login form', log)
#br.set_debug_redirects(True)
#br.set_debug_responses(True)
#br.set_debug_http(True)
for form in br.forms():
loginform = form
try:
loginform[userfield] = username
loginform[passfield] = password
break
except Exception as e:
pass
else:
received = os.path.join(task.manager.config_base, 'received')
if not os.path.isdir(received):
os.mkdir(received)
filename = os.path.join(received, '%s.formlogin.html' % task.name)
with open(filename, 'w') as f:
f.write(br.response().get_data())
log.critical('I have saved the login page content to %s for you to view' % filename)
raise PluginError('Unable to find login fields', log)
br.form = loginform
br.submit()
cookiejar = br._ua_handlers["_cookies"].cookiejar
# Add cookiejar to our requests session
task.requests.add_cookiejar(cookiejar)
# Add handler to urllib2 default opener for backwards compatibility
handler = urllib2.HTTPCookieProcessor(cookiejar)
if urllib2._opener:
log.debug('Adding HTTPCookieProcessor to default opener')
urllib2._opener.add_handler(handler)
else:
log.debug('Creating new opener and installing it')
urllib2.install_opener(urllib2.build_opener(handler))
示例4: login_to_kaggle
def login_to_kaggle(self):
""" Login to Kaggle website
Parameters:
-----------
None
Returns:
browser: Browser
a mechanizer Browser object to be used for further access to site
"""
if self.verbose:
print("Logging in to Kaggle..."),
br = Browser()
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
br.open(self.kag_login_url)
br.select_form(nr=0)
br['UserName'] = self.kag_username
br['Password'] = self.kag_password
br.submit(nr=0)
if br.title() == "Login | Kaggle":
raise KaggleError("Unable to login Kaggle with username %s (response title: %s)" % (self.kag_username,br.title()))
if self.verbose:
print("done!")
return br
示例5: GetXboxLiveFriends
def GetXboxLiveFriends(self):
"""Return a list of tuples (gamer_tag, gamer_presence)."""
br = Browser()
br.open('http://live.xbox.com/en-US/profile/Friends.aspx')
br.select_form(name='f1')
br['login'] = self.login
br['passwd'] = self.passwd
br.submit() # Submit login form.
br.select_form(name='fmHF')
response = br.submit() # Submit redirect form.
friend_list = response.read()
response.close()
soup = BeautifulSoup(friend_list)
friend_table = soup.find('table', {'class': FRIEND_TABLE_CLASS})
if friend_table is None:
raise XboxLiveError('Parsing failure.')
friends = []
for row in friend_table.contents[1:]: # Skip header row.
gamer_tag = row.find('td', {'class': GAMER_TAG_CLASS})
gamer_tag = str(gamer_tag.find('a').contents[0])
gamer_presence = row.find('td', {'class': GAMER_PRESENCE_CLASS})
gamer_presence = str(gamer_presence.find('h4').contents[0])
friends.append((gamer_tag, gamer_presence))
return friends
示例6: newBrowser
def newBrowser(self):
# Create new browsers all the time because its data structures grow
# unboundedly (texas#135)
br = Browser()
br.add_password(self.hostname, self.username, self.password)
br.set_handle_robots(None)
return br
示例7: fetch_laws_page_from_year
def fetch_laws_page_from_year(year, temporaryDirectory):
lawsDirectory = os.path.join(temporaryDirectory, 'all_laws');
if not os.path.exists(lawsDirectory):
os.makedirs(lawsDirectory)
print('The laws directory did not exist so I created it')
print(lawsDirectory)
fileToWriteLawsListIn = os.path.join(lawsDirectory, year + '.html')
print('File to write in is ' + fileToWriteLawsListIn)
lawWasNotDownloaded = not os.path.isfile(fileToWriteLawsListIn)
if lawWasNotDownloaded:
startDownload = int(round(time.time() * 1000))
print('Getting laws from year ' + year)
url = get_ugly_url_for_laws(year)
browser = Browser()
browser.open(url)
html = browser.response().get_data()
with open(fileToWriteLawsListIn, 'a') as f:
f.write (html)
endDownload = int(round(time.time() * 1000))
print('Finished downloading laws for year ' + year + '. It took only '
+ str(endDownload - startDownload) + ' milliseconds')
else:
print('This year was already fetched ' + year
+ '. Skipping to the next year')
示例8: __init__
def __init__(self):
Browser.__init__(self)
self.set_handle_robots(False)
self.addheaders = [(
'Accept',
'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
)]
示例9: mrsc
def mrsc(gid):
mech = Browser()
url = "http://espn.go.com/ncf/playbyplay?gameId="+gid+"&period=0"
#print url
page = mech.open(url)
html = page.read()
print url
if html.count('Play-by-play not currently available.') == 0:
soup = BeautifulSoup(html)
table = soup.findAll("table")[-1]
rows = table.findAll('tr')[::-1]
c=0
toret=''
keepgoing=True
cup=html[::-1][:html[::-1].find(' left; font: 700 14px/25px Helvetica,Arial,sans-serif;" colspan="3"><div style="margin-right: 6px;"'[::-1])][::-1]
cup=cup[cup.find('a name="')+len('a name="'):]
cup=cup[:cup.find('"')]
while c < 7 and keepgoing and c < len(rows):
cols = rows[c].findAll('td')
#print rows[c]
if len(cols) > 2:
#if str(cols[2]) != '<td> </td>' and str(cols[3]) != '<td> </td>':
toret=str(' '.join(cols[0].findAll(text=True)))+'. '+str(' '.join(cols[1].findAll(text=True)))
keepgoing=False
c=c+1
toret=toret.replace(' ',' ').strip()
if toret != '': toret=toret+' '
poss=''
if cup != '' and len(cup) < 30: poss=cup
else:
toret=''
poss=''
return [toret,poss]
示例10: getLastEntries
def getLastEntries(self, url, lastDate):
""" get all entries from an HTML table list if it is newer
than prevEntry. Format is from graz FF site """
mech = Browser()
mech.set_handle_robots(False)
try:
page = mech.open(url)
except urllib2.HTTPError:
if url == None:
url = "(empty url)"
self.logger.error("Could not read url "+url)
return []
html = page.read()
soup = BeautifulSoup(html)
link = soup.findAll('a')
if len(link) == 0:
logger.error('No links in the page: %s', url)
return []
returnLinks = []
for l in link:
try:
date = datetime.strptime(l.string, "topo-%Y-%m-%d-%H:%M.tsv.gz")
except ValueError:
continue
if date > lastDate:
returnLinks.append(url+l.string)
else:
break
return returnLinks
示例11: main
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--total-jobs', metavar='<total-jobs>', help='total number of jobs downloading documents', type=int)
parser.add_argument('--job', metavar='<job>', help='job number between 1 and <total-jobs>', type=int)
args = parser.parse_args()
check_args(parser, args)
br = Browser()
br.set_handle_robots(False)
# br.set_debug_responses(True)
data = urlencode({'user': USERNAME, 'pass': getpass()})
document_urls = [LOGIN_PREFIX + url.strip() + '&view=etext' for url in file(DOCUMENT_URLS_FILE)]
start = args.job - 1
step = args.total_jobs
for url in iterview(document_urls[start::step]):
try:
get_document_pages(br, url, data)
except Exception as e:
print >> sys.stderr, '\n', (url, e)
示例12: gen_path
def gen_path(request):
x = json.loads(request.POST['data']) #fetches data
print x
adj_mat = [] #creates empty adjacency matrix
i1 = j1 = 0
num_cities = len(x)
for i in x:
tmp_mat = []
for j in x:
if i!=j:
API_KEY = "AIzaSyDBOSr6_XxvISPGX54P9bPnooE3RUpRTp0"
orig_coord = x[i]
dest_coord = x[j]
br = Browser() #creates mechanize instance
br.set_handle_robots(False)
# print "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&key={2}".format(orig_coord, dest_coord, API_KEY)
result = br.open("https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&key={2}".format(orig_coord, dest_coord, API_KEY)).read() #makes a call to GoogleMapsAPI
json_result = json.loads(result)
tmp_mat.append(int(json_result['rows'][0]['elements'][0]['distance']['value']))
else:
tmp_mat.append(0)
adj_mat.append(tmp_mat)
obj = ArpanDaErCode()
ans = ""
ans = ArpanDaErCode.solve(obj, adj_mat, num_cities) #gets sequence from model
print ans
ret = {'data': [str(ii) for ii in ans]}
return HttpResponse(str(json.dumps(ret))) #returns the sequens in JSON format for the JS to handle
示例13: scrap_query
def scrap_query(query, bang=None):
r = ddg_query('imbd ' + query, bang=bang)
if 'redirect' in dir(r) and 'primary' in dir(r.redirect):
url = r.redirect.primary
else:
logger.info('Could not find imdb searchpage from DuckDuckGo bang')
return None
br = Browser()
br.set_handle_robots(False)
br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.2;\
WOW64) AppleWebKit/537.11 (KHTML, like Gecko)\
Chrome/23.0.1271.97 Safari/537.11')]
r = br.open(url)
soup = BeautifulSoup(r)
for link in soup.find_all('a'):
href = link.get('href','')
match = re.search(r"imdb\.com/.*tt(?P<number>[^/]*)", href)
if match:
imdb_id = check_imdb(match.group('number'))
return imdb_id
return None
示例14: __init__
def __init__(self,options):
(self.configfile,self.config,self.moduleconfig) = self.initialize_config(options)
# If we have a particular log level for this module, use that,
# otherwise use the global log level. If that isn't defined
# either, use the INFO loglevel.
if 'log' in self.moduleconfig:
loglevel = self.moduleconfig['log']
else:
loglevel = self.config.get('log','INFO')
self.log = self.setup_logger(self.module_dir,loglevel)
self.base_dir = self.config['datadir']
if self.browser_use_robustfactory:
self.browser = Browser(factory=RobustFactory())
else:
self.browser = Browser()
self.browser.addheaders = [('User-agent', 'lagen.nu-bot ([email protected])')]
# logger = logging.getLogger("mechanize")
# logger.addHandler(logging.StreamHandler(sys.stdout))
# logger.setLevel(logging.DEBUG)
# self.browser.set_debug_http(True)
# self.browser.set_debug_responses(True)
# self.browser.set_debug_redirects(True)
self.ns = {'rinfo': Namespace(Util.ns['rinfo']),
'rinfoex':Namespace(Util.ns['rinfoex']),
'dct': Namespace(Util.ns['dct'])}
示例15: num_itens
def num_itens(self,busca, data_inicial, data_final):
br = Browser()
response1 = \
br.open("http://portal.in.gov.br/in/imprensa1/pesquisa_avancada")
br.select_form(name="formBusca")
br["texto_todas"] = busca
br["dataPublicacaoInicial"] = data_inicial[:5]
br["dataPublicacaoFinal"] = data_final[:5]
br["ano"] = [data_final[-4:]]
br["idJornal"] = ["1", "2", "3", "4"]
# print(br.form)
br.form.action = \
"http://www.in.gov.br/imprensa/pesquisa/pesquisaresultado.jsp"
res = br.submit()
texto = res.read()
x1, x2, x3 = texto.partition("ite")
x1, x2, x3 = x1.rpartition(">")
try:
arq = open(self.retornar_html(),"w")
arq.write(texto)
arq.close()
except:
print("Erro ao tentar salvar página de buscas!")
x3 = x3.replace(",","")
x3 = x3.strip()
#Retorna o número de itens achados
if x3 == "Um":
return 1
if len(x3) > 0:
return int(x3)
else:
return 0