当前位置: 首页>>代码示例>>Python>>正文


Python Browser.set_proxies方法代码示例

本文整理汇总了Python中mechanize.Browser.set_proxies方法的典型用法代码示例。如果您正苦于以下问题:Python Browser.set_proxies方法的具体用法?Python Browser.set_proxies怎么用?Python Browser.set_proxies使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在mechanize.Browser的用法示例。


在下文中一共展示了Browser.set_proxies方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: testPx

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
def testPx(px):
  B=Browser()
  B.addheaders = [('User-agent', userAgents[randint(0,len(userAgents)-1)])]
  B.set_proxies(px)
  try:
    B.open('http://graphicriver.net/',timeout=5)
    pxQ.put(px)
    print(px['http']+"  ok")
    
    B.open('http://graphicriver.net/category/all',timeout=5)
  except:
    print(px['http']+"  error")
  page = pageQ.get()
  try:  
#    pass
#  finally:
    count=0
    while(count<5):
      O = B.open('http://graphicriver.net/category/all?page='+str(page),timeout=8)
      turls = lxml.html.document_fromstring(O.get_data()).xpath('//div[@class="item-info"]/h3/a/@href')
      for url in turls:
        urlsQ.put(url)
      print(str(page)+" got")  
      pageDoneQ.put(page)
      page = pageQ.get()
      count+=1
  except:  
    pageQ.put(page)
    print(str(page)+" error")
开发者ID:eugenpt,项目名称:pythonStockAnalyzer,代码行数:31,代码来源:test_GetAllURLS_andNPX.py

示例2: DOMScanner

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
class DOMScanner(threading.Thread):
    def __init__(self, engine, queue):
        threading.Thread.__init__(self)
        self.queue = queue
        self.engine = engine

        self.errors = {}
        self.results = []
        self.javascript = []
        
        self.whitelist = []

        self.browser = Browser()
        self._setProxies()
        self._setHeaders()
  

    def _setHeaders(self):
        if self.engine.getOption('ua') is not None:
            if self.engine.getOption('ua') is "RANDOM":
                self.browser.addheaders = [('User-Agent', random.choice(USER_AGENTS))]
            else:
                self.browser.addheaders = [('User-Agent', self.engine.getOption('ua'))]
        if self.engine.getOption("cookie") is not None:
            self.browser.addheaders = [("Cookie", self.engine.getOption("cookie"))]
    
    def _setProxies(self):
         if self.engine.getOption('http-proxy') is not None:
            self.browser.set_proxies({'http': self.engine.getOption('http-proxy')})

    def _addError(self, key, value):
        if self.errors.has_key(key):
            self.errors[key].append(value)
        else:
            self.errors[key] = [value]


        
    def _parseJavascript(self, target):
        if self.engine.getOption("ua") is "RANDOM": self._setHeaders() 
        
        url = target.getFullUrl()
        
        try:
            to = 10 if self.engine.getOption('http-proxy') is None else 20
            response = self.browser.open(url, timeout=to) #urlopen(req, timeout=to)
            
        except HTTPError, e:
            self._addError(e.code, target.getAbsoluteUrl())
            return
        except URLError, e:
            self._addError(e.reason, target.getAbsoluteUrl())
            return
开发者ID:Ashroyal,项目名称:OWASP-Xenotix-XSS-Exploit-Framework,代码行数:55,代码来源:DOM+XSS+SCANNER.py

示例3: init_browser

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
def init_browser():
    browser = Browser()
    browser.addheaders = (
        ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
        ('User-agent', 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)') # We're Firefox! :P
    )
    # browser.set_handle_gzip(True) # Currently experimental in mechanize
    browser.set_handle_redirect(True)
    browser.set_handle_refresh(False)
    browser.set_handle_robots(True)
    browser.set_handled_schemes(['http', 'https'])
    browser.set_proxies({})
    return browser
开发者ID:crazydreamer,项目名称:corpuscatcher,代码行数:15,代码来源:corpus_collect.py

示例4: browseUrlList

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
 def browseUrlList(self, urlList, proxies = {}):
   browseData = {}
   from mechanize import Browser
   br = Browser()
   # Explicitly configure proxies (Browser will attempt to set good defaults).
   br.set_proxies(proxies)
   curUrlList = []
   for i in urlList:
     print 'connecting:',i
     response = br.open(i)
     browseData[i] = response.read()
     #Check cache first.
     curUrlList.append(i)
     self.cache(curUrlList, browseData[i])
   return browseData
开发者ID:weijia,项目名称:ufs,代码行数:17,代码来源:urlCache.py

示例5: getsoup

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
def getsoup(URL, proxy = None):
    br = Browser()
    if proxy is not None:
        br.set_proxies(proxy)
    br.open(URL)
    try:
        title_URL = br.find_link(url_regex = re.compile(r'/title/tt.*'))
    except LinkNotFoundError:
        return None
    try:
        res = br.follow_link(title_URL)
    except URLError:
        return None
    
    soup = BeautifulSoup(res.read())
    return soup
开发者ID:jayrambhia,项目名称:DeskWid,代码行数:18,代码来源:imdb.py

示例6: Crawler

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
class Crawler(threading.Thread):
    def __init__(self, engine, queue, crawl_links = False, crawl_forms = False):
        threading.Thread.__init__(self)
        self.engine = engine
        self.queue = queue

        self.results = []
        self.errors = {}

        self.crawl_links = crawl_links
        self.crawl_forms = crawl_forms

        self.browser = Browser()
        self._setProxies()
        self._setHeaders()

    def _setHeaders(self):
        if self.engine.getOption('ua') is not None:
            if self.engine.getOption('ua') is "RANDOM":
                self.browser.addheaders = [('User-Agent', random.choice(USER_AGENTS))]
            else:
                self.browser.addheaders = [('User-Agent', self.engine.getOption('ua'))]
        if self.engine.getOption("cookie") is not None:
            self.browser.addheaders = [("Cookie", self.engine.getOption("cookie"))]
    
    def _setProxies(self):
         if self.engine.getOption('http-proxy') is not None:
            self.browser.set_proxies({'http': self.engine.getOption('http-proxy')})

    def _addError(self, key, value):
        if self.errors.has_key(key):
            self.errors[key].append(value)
        else:
            self.errors[key] = [value]

    def _crawlLinks(self, target):
        # If UA is RANDOM we need to refresh browser's headers
        if self.engine.getOption("ua") is "RANDOM": self._setHeaders()
        
        try: self.browser.open(target.getAbsoluteUrl())
        except HTTPError, e:
            self._addError(e.code, target.getAbsoluteUrl())
            return False 
        except URLError, e:
            self._addError(e.reason, target.getAbsoluteUrl())
            return False
开发者ID:Drx51,项目名称:Framework,代码行数:48,代码来源:crawler.py

示例7: BeautifulSoup

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
import urllib
from bs4 import BeautifulSoup
import urllib2
from mechanize import Browser

url = "http://www.google.com"
#URLLIB PROXY SUPPORT.

#proxies = {'http': 'http://localhost:8008'}
#urlhandle = urllib.urlopen(url, proxies=proxies)
#print urlhandle.read()

#URLLIB2 PROXY SUPPORT AND THEN USING BEAUTIFULSOUP
proxy = urllib2.ProxyHandler( {'http': 'localhost:8008'} )
opener = urllib2.build_opener( proxy )
urllib2.install_opener( opener )
request = urllib2.Request( url )
response = urllib2.urlopen( request )
html = response.read()
soup = BeautifulSoup(html, "lxml")
div = soup.find_all( 'div', id="gs_lc0" )

#print div

#MECHANIZE PROXY SUPPORT.
br = Browser()
br.set_proxies({"http": "localhost:8008"})
response = br.open(url)

print response
开发者ID:Adastra-thw,项目名称:pyHacks,代码行数:32,代码来源:ProxySupportExamples.py

示例8: getSolutions

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
def getSolutions (path_prefix, path_proxy):
    global br, username, password

    # create a browser object
    br = Browser()

    # add proxy support to browser
    if len(path_proxy) != 0: 
        protocol,proxy = options.proxy.split("://")
        br.set_proxies({protocol:proxy})
    
    # let browser fool robots.txt
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; \
              rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
    br.set_handle_robots(False)

    print "Enter yout SPOJ username :",
    username = raw_input()
    password = getpass.getpass()

    # authenticate the user
    print "Authenticating " + username
    br.open ("http://spoj.com/login")
    #br.select_form (name="login")
    #the form no longer is named "login" therefore to access it by id:
    formcount=0
    for frm in br.forms():  
    	if str(frm.attrs["id"])=="login-form":
    		break
  	formcount=formcount+1
    br.select_form(nr=formcount)
    
    br["login_user"] = username
    br["password"] = password

    # sign in for a day to avoid timeouts
    #br.find_control(name="autologin").items[0].selected = True
    #this attribute is missing in the new spoj format
    br.form.action = "http://www.spoj.com/login"
    response = br.submit()

    verify = response.read()
    if (verify.find("Authentication failed!") != -1):
        print "Error authenticating - " + username
        exit(0)

    # grab the signed submissions list
    print "Grabbing siglist for " + username
    siglist = br.open("http://www.spoj.pl/status/" + username + "/signedlist")

    # dump first nine useless lines in signed list for formatting
    for i in xrange(9):
        siglist.readline()

    # make a list of all AC's and challenges
    print "Filtering siglist for AC/Challenge solutions..."
    mysublist = list()

    while True:
        temp = siglist.readline()

        if temp=='\------------------------------------------------------------------------------/\n':
            # reached end of siglist
            break

        if not len(temp) :
            print "Reached EOF, siglist format has probably changed," + \
                    " contact author."
            exit(1)

        entry = [x.strip() for x in temp.split('|')]

        if entry[4] == 'AC' or entry[4].isdigit():
            mysublist.append (entry)

    print "Done !!!"
    return mysublist
开发者ID:,项目名称:,代码行数:79,代码来源:

示例9: __init__

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
class FlickrBot:

    # use favorite authors as contacts.
    __url = "http://www.flickr.com"

    __favorites_url = "http://www.flickr.com/photos/%s/favorites"

    __favorite_regex = "/photos/[[email protected]]+/[0-9]+/"
    __favorite_prefix = ""
    __favorite_sufix = ""

    __contact_regex = __favorite_regex

    __complete_name_regex = __contact_regex
    __complete_name_prefix = "/photos/"
    __complete_name_sufix = "/"

    __tag_regex = "/photos/tags/[a-zA-Z_0-9\-]+/"

    __tag_prefix = "/photos/tags/"
    __tag_sufix = "/"

    __forb_tags = ["the", "and", "their", "at", "is", "in", "of", "a", "on", "for", "an", "with"]

    def __init__(self, proxies_per_proto={}, debug=False):
        self.__br = Browser()
        self.__br.set_proxies(proxies_per_proto)
        self.__br.set_debug_http(debug)
        # no respect for robots.txt
        self.__br.set_handle_robots(False)
        self.__sleep_secs = 0
        self.__sleep_module = 1
        self.__gets = 0
        #  no sign in
        # but i have a dummy user
        # user: [email protected]
        # password: zarasa123
        pass

    def set_sleep_secs(self, secs):
        self.__sleep_secs = secs

    def set_sleep_module(self, iterations):
        self.__sleep_module = iterations

    def __try_sleep(self):
        self.__gets += 1
        if self.__gets % self.__sleep_module == 0:
            print "Sleeping for %f seconds, every %d GETs" % (self.__sleep_secs, self.__sleep_module)
            time.sleep(self.__sleep_secs)

    # most_viewed
    # top_rated
    # recently_featured
    # watch_on_mobile
    def seeds(self):
        self.__try_sleep()
        resp = self.__br.open("http://www.flickr.com/")
        cont = resp.read()
        matches = re.findall(self.__contact_regex, cont)
        users = map(self.__strip_complete_name, matches)
        return users

    def search(self, query):

        br = self.__br
        # check if name exists.
        try:
            url = "http://www.flickr.com/photos/%s/favorites/" % query
            print url
            self.__try_sleep()
            resp = self.__br.open(url)
            cont = resp.read()
            if not "favorites" in cont:
                return []
        except Exception, e:
            if str(e) == "HTTP Error 404: Not Found":
                return []
            else:
                raise e
        return [cont]
开发者ID:therm000,项目名称:rankbytags,代码行数:83,代码来源:FlickrBot.py

示例10: Runner

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
class Runner(QThread):
    '''
    Authenticates to polimi and downloads cert
    '''
    statusChanged = Signal(int,unicode)
    error = Signal(unicode)
    
    def __init__(self,nm,user,password,anonuser,certPass,ptext,pbar):
        '''
        Constructor
        '''
        QThread.__init__(self)
        self.triumph = True
        self.user = user
        self.nm_iface = nm.applet
        self.anonuser = anonuser
        self.password = password
        self.passphrase = certPass
        self.certLocation = os.path.expanduser(CERT_LOCATION)
        self.certFolder = os.path.split(self.certLocation)[0]
        self.bro = Browser()
        if PROXY:
            self.bro.set_proxies({"http":PROXY,"https":PROXY})
        self.bro.set_handle_robots(0)
        
    def run(self):
        self.statusChanged.emit(0,self.tr("Connecting to ASICT..."))
        self.bro.open(START_URL)
        self.bro.follow_link(text='logon')
        self.statusChanged.emit(15,self.tr("Logging in..."))
        self.bro.select_form('')
        self.bro.form['login'] = str(self.user)
        self.bro.form['password'] = str(self.password)
        self.bro.submit()
        
        self.response = self.bro.open(START_URL)
        #Controlliamo se e' loggato
        if "logon" in self.response.read().split('<div id="preamble">')[-1].split("</div>")[0]:
            self.error.emit(self.tr("Wrong username or password"))
            return
        self.statusChanged.emit(40,self.tr("Downloading certificate.."))
        self.response = self.bro.follow_link(text='nuovo certificato')
        self.bro.select_form(name='exists')
        self.bro.submit()
        
        self.bro.select_form(name='passphrase')
        self.bro.form['passphrase'] = self.passphrase
        self.bro.form['passphraseCheck'] = self.passphrase
        tempfile = self.bro.retrieve(self.bro.form.click('_qf_passphrase_next'))
        response = self.bro.open(DOWNLOAD_URL)
        del tempfile
        if not os.path.exists(self.certFolder):
            os.mkdir(self.certFolder, FOLDER_MODE) 
        try:
            f = open(self.certLocation, "w") 
            f.write(response.read())
            f.close()
        except IOError:
            print "I/O Error during file writing"
            self.error.emit(self.tr("Can not save certificate"))
            return
        
        self.statusChanged.emit(70,self.tr("Creating CA certificate..."))
        popen_obj=Popen(CMD_OSSL.format(self.certLocation,os.path.join(self.certFolder,"asi.cer")), shell=True, stdin=PIPE, stdout=PIPE)
        popen_obj.communicate(self.passphrase+"\n"+self.passphrase)
        if popen_obj.poll() != 0:
            self.error.emit(self.tr("OpenSSL error: can not generate CA file"))
            return
        
        self.statusChanged.emit(90,self.tr("Creating NM connection..."))
        c = settings.WiFi(CLOSED_AP)
        c["connection"]["autoconnect"]=False
        c["802-11-wireless"]["security"]="802-11-wireless-security"
        c["802-11-wireless-security"]={}
        c["802-11-wireless-security"]["key-mgmt"]="wpa-eap"
        c["802-11-wireless-security"]["auth-alg"]="open"
        c["802-1x"]={}
        c["802-1x"]["eap"]=['tls']
        c["802-1x"]["client-cert"]=dbus.ByteArray("file://"+self.certLocation+"\0")
        c["802-1x"]["anonymous-identity"]= self.anonuser
        c["802-1x"]["ca-cert"]=dbus.ByteArray("file://"+os.path.join(self.certFolder,"asi.cer")+"\0")
        c["802-1x"]["private-key"]=dbus.ByteArray("file://"+self.certLocation+"\0")
        c["802-1x"]["private-key-password"]=self.passphrase
        c["802-1x"]["phase2-auth"]="mschapv2"
        try:
            self.nm_iface.AddConnection(c.conmap)
        except:
            self.error.emit(self.tr("Can not create network profile"))
            return
        self.statusChanged.emit(100,self.tr("Done"))
        Popen("gksu python2 /usr/share/poliwireless/quirks.py",shell=True).communicate("")
开发者ID:Politecnico-Open-unix-Labs,项目名称:Polinux,代码行数:93,代码来源:workers.py

示例11: __init__

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
class YouTubeBot:

    #__name_regex = 'href="http://profile.myspace.com/index.cfm?fuseaction=user.viewprofile&friendID=[0-9]+" linkindex='

    # use favorite authors as contacts.
    __contact_regex = 'http://www.youtube.com/profile\?user=[a-zA-Z_0-9]+'

    __complete_name_regex = 'http://www.youtube.com/profile\?user=[a-zA-Z_0-9]+'
    __complete_name_prefix = 'http://www.youtube.com/profile?user='
    __complete_name_sufix = ''

    __favorite_regex = 'http://www.youtube.com/watch\?v=[a-zA-Z_0-9\-]+'

    __favorite_regex = 'http://www.youtube.com/watch\?v=[a-zA-Z_0-9\-]+'
    __favorite_prefix = 'http://www.youtube.com/watch\?v='
    __favorite_sufix = ''

    __tag_regex = ' term=\'[a-zA-Z_0-9\-]+\'/>'

    __tag_regex = ' term=\'[a-zA-Z_0-9\-]+\'/>'
    __tag_prefix = ' term=\''
    __tag_sufix = '\'/>'

    __forb_tags = ['the', 'and', 'their', 'at', 'is', 'in', 'of', 
                   'a', 'on', 'for', 'an', 'with', 'to']    

    def __init__(self, proxies_per_proto={}, debug=False):
        self.__br = Browser()
        self.__br.set_proxies(proxies_per_proto)
        self.__br.set_debug_http(debug)
        # no respect for robots.txt
        self.__br.set_handle_robots(False)
        self.__sleep_secs = 0.0
        self.__sleep_module = 9999999
        self.__sleep_failure = 120.0
        self.__gets = 0
        #  no sign in
        # but i have a dummy user
        # user: [email protected]
        # password: zarasa123
        pass

    def set_sleep_secs(self, secs):
        self.__sleep_secs = float(secs)

    def set_sleep_module(self, iterations):
        self.__sleep_module = iterations

    def set_sleep_failure(self, secs):
        self.__sleep_failure = float(secs)        

    def __try_sleep(self):
        self.__gets += 1
        if self.__gets % self.__sleep_module == 0:
            print 'Sleeping for %f seconds, every %d GETs' % (self.__sleep_secs, self.__sleep_module)
            time.sleep(self.__sleep_secs)

    #most_viewed
    #top_rated
    #recently_featured
    #watch_on_mobile
    def seeds(self, type='most_viewed'):
        self.__try_sleep()
        resp = self.__br.open('http://gdata.youtube.com/feeds/standardfeeds/' + type)
        cont = resp.read()
        matches = re.findall(self.__contact_regex, cont)
        featured_users  = map(self.__strip_complete_name, matches)
        return featured_users

    def search(self, query):

        br = self.__br
        # check if name exists.
        try:
            print 'http://gdata.youtube.com/feeds/users/%s/favorites' % query
            self.__try_sleep()
            resp = self.__br.open('http://gdata.youtube.com/feeds/users/%s/favorites' % query)
        except Exception, e:
            if str(e) == 'HTTP Error 404: Not Found':
                return []
            else:
                raise e
        return [resp.read()]
开发者ID:therm000,项目名称:rankbytags,代码行数:85,代码来源:YouTubeBot.py

示例12: DOMScanner

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
class DOMScanner(threading.Thread):
    def __init__(self, engine, queue):
        threading.Thread.__init__(self)
        self.queue = queue
        self.engine = engine

        self.errors = {}
        self.results = []
        self.javascript = []
        self.whitelisted_js = []
        self.whitelist = []

        self.browser = Browser()
        self._setProxies()
        self._setHeaders()
        self._getWhitelist()

    def _setHeaders(self):
        if self.engine.getOption('ua') is not None:
            if self.engine.getOption('ua') is "RANDOM":
                self.browser.addheaders = [('User-Agent', random.choice(USER_AGENTS))]
            else:
                self.browser.addheaders = [('User-Agent', self.engine.getOption('ua'))]
        if self.engine.getOption("cookie") is not None:
            self.browser.addheaders = [("Cookie", self.engine.getOption("cookie"))]
    
    def _setProxies(self):
         if self.engine.getOption('http-proxy') is not None:
            self.browser.set_proxies({'http': self.engine.getOption('http-proxy')})

    def _addError(self, key, value):
        if self.errors.has_key(key):
            self.errors[key].append(value)
        else:
            self.errors[key] = [value]

    def _getWhitelist(self):
        path = os.path.split(os.path.realpath(__file__))[0]
        path = os.path.join(path, "../lib/whitelist.xml")
        f = open(path, "rb")
        xml = f.read()
        root = etree.XML(xml)

        for element in root.iterfind("javascript"):
            el = {
                'hash' : element.find("hash").text,
                'description': element.find("description").text,
                'reference': element.find("reference").text
                }
            self.whitelist.append(el)
        
    def _parseJavascript(self, target):
        if self.engine.getOption("ua") is "RANDOM": self._setHeaders() 
        
        url = target.getFullUrl()
        
        try:
            to = 10 if self.engine.getOption('http-proxy') is None else 20
            response = self.browser.open(url, timeout=to) #urlopen(req, timeout=to)
            
        except HTTPError, e:
            self._addError(e.code, target.getAbsoluteUrl())
            return
        except URLError, e:
            self._addError(e.reason, target.getAbsoluteUrl())
            return
开发者ID:Drx51,项目名称:Framework,代码行数:68,代码来源:domscanner.py

示例13: CourseraDownloader

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
class CourseraDownloader(object):
    """
    Class to download content (videos, lecture notes, ...) from coursera.org for
    use offline.

    https://github.com/dgorissen/coursera-dl
    """

    BASE_URL =    'http://class.coursera.org/%s'
    HOME_URL =    BASE_URL + '/class/index'
    LECTURE_URL = BASE_URL + '/lecture/index'
    LOGIN_URL =   BASE_URL + '/auth/auth_redirector?type=login&subtype=normal'
    QUIZ_URL =    BASE_URL + '/quiz/index'

    DEFAULT_PARSER = "lxml"

    def __init__(self,username,password,proxy=None,parser=DEFAULT_PARSER):
        """Requires your coursera username and password. 
        You can also specify the parser to use (defaults to lxml), see http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser
        """
        self.username = username
        self.password = password
        self.parser = parser

        self.browser = Browser()
        
        if proxy:
            self.browser.set_proxies({"http":proxy})

        self.browser.set_handle_robots(False)

    def login(self,course_name):
        print "* Authenticating as %s..." % self.username

        # open the course login page
        page = self.browser.open(self.LOGIN_URL % course_name)

        # check if we are already logged in by checking for a password field
        bs = BeautifulSoup(page,self.parser)
        pwdfield = bs.findAll("input",{"id":"password_login"})

        if pwdfield:
            self.browser.form = self.browser.forms().next()
            self.browser['email'] = self.username
            self.browser['password'] = self.password
            r = self.browser.submit()

            # check that authentication actually succeeded
            bs2 = BeautifulSoup(r.read(),self.parser)
            title = bs2.title.string
            if title.find("Login Failed") > 0:
                raise Exception("Failed to authenticate as %s" % (self.username,))
 
        else:
            # no login form, already logged in
            print "* Already logged in"


    def course_name_from_url(self,course_url):
        """Given the course URL, return the name, e.g., algo2012-p2"""
        return course_url.split('/')[3]

    def lecture_url_from_name(self,course_name):
        """Given the name of a course, return the video lecture url"""
        return self.LECTURE_URL % course_name

    def get_downloadable_content(self,course_url):
        """Given the video lecture URL of the course, return a list of all
        downloadable resources."""

        cname = self.course_name_from_url(course_url)

        print "* Collecting downloadable content from " + course_url

        # get the course name, and redirect to the course lecture page
        vidpage = self.browser.open(course_url)

        # extract the weekly classes
        soup = BeautifulSoup(vidpage,self.parser)
        headers = soup.findAll("div", { "class" : "course-item-list-header" })

        weeklyTopics = []
        allClasses = {}

        # for each weekly class
        for header in headers:
            h3 = header.findNext('h3')
            sanitisedHeaderName = sanitiseFileName(h3.text)
            weeklyTopics.append(sanitisedHeaderName)
            ul = header.next_sibling
            lis = ul.findAll('li')
            weekClasses = {}

            # for each lecture in a weekly class
            classNames = []
            for li in lis:
                className = sanitiseFileName(li.a.text)
                classNames.append(className)
                classResources = li.find('div', {'class':'course-lecture-item-resource'})

#.........这里部分代码省略.........
开发者ID:darkserman,项目名称:coursera-dl,代码行数:103,代码来源:courseradownloader.py

示例14: getSolutions

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
def getSolutions (path_prefix, path_proxy):
    global br, username, password

    # create a browser object
    br = Browser()

    # add proxy support to browser
    if len(path_proxy) != 0: 
        protocol,proxy = options.proxy.split("://")
        br.set_proxies({protocol:proxy})
    
    # let browser fool robots.txt
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; \
              rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
    br.set_handle_robots(False)

    print ("Enter yout SPOJ username :"),
    username = raw_input()
    password = getpass.getpass()

    # authenticate the user
    print ("Authenticating " + username)
    br.open ("http://www.spoj.com/login")

    br.select_form (nr = 0)
    br["login_user"] = username
    br["password"] = password

    response = br.submit()
    
    verify = response.read()
    if (verify.find("Authentication failed!") != -1):
        print ("Error authenticating - " + username)
        exit(0)

    # grab the signed submissions list
    print ("Grabbing siglist for " + username)
    siglist = br.open("http://www.spoj.com/status/" + username + "/signedlist")

    # dump first nine useless lines in signed list for formatting
    for i in xrange(9):
        siglist.readline()

    # make a list of all AC's and challenges
    print ("Filtering siglist for AC/Challenge solutions...")
    mysublist = list()

    while True: 
        temp = siglist.readline()
        
        if temp=='\------------------------------------------------------------------------------/\n':
            # reached end of siglist
            break

        if not len(temp) :
            print ("Reached EOF, siglist format has probably changed," + \
                    " contact author.")
            exit(1)
            
        entry = [x.strip() for x in temp.split('|')]
        
        if entry[4] == 'AC' or entry[4].isdigit():
            mysublist.append (entry)

    print ("Done !!!")
    return mysublist
开发者ID:hardikdosi,项目名称:SPOJ-BACKUP-TOOL,代码行数:68,代码来源:SPOJ+AC+Solutions+Download+Script.py

示例15: __init__

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import set_proxies [as 别名]
class TwitterBot:

    __contact_regex = '<a href="http://twitter.com/[a-zA-Z_]+" rel="contact"><img alt=".*" class='
    
    __complete_name_regex = 'alt=".*" class='
    __complete_name_prefix = 'alt="'
    __complete_name_sufix = '" class='
    
    __url_regex = 'href=".*" rel'
    __url_prefix = 'href="'
    __url_sufix = '" rel'


    def __init__(self, user='zarasa123', passw='zarasa123', proxies_per_proto={}, debug=False):
        self.__br = Browser()
        self.__br.set_proxies(proxies_per_proto)
        self.__br.set_debug_http(debug)

        # sign in
        self.__br.open("http://twitter.com/")
        forms = self.__br.forms()
        form = forms.next()
        self.__br.select_form(nr=0)
        self.__br['username_or_email'] = user
        self.__br['password'] = passw
        resp = self.__br.submit()

    def search(self, query):
        br = self.__br
        self.__br.select_form(name='user_search_form')
        self.__br['q'] = query
        resp = self.__br.submit()
        links_urls = []
        for link in br.links(url_regex="twitter.com/[a-zA-Z_]+"):
            if  not 'index.php' in link.url and \
                not 'twitter.com/blog' in link.url and \
                not 'twitter.com/home' in link.url:
                links_urls.append(link.url)
        br.back()
        return links_urls

    def __strip_complete_name(self, html_match):
        match = re.search(self.__complete_name_regex, html_match)
        match = match.group()[len(self.__complete_name_prefix):-len(self.__complete_name_sufix)]
        return match

    def __strip_url(self, html_match):
        match = re.search(self.__url_regex, html_match)
        match = match.group()[len(self.__url_prefix):-len(self.__url_sufix)]
        return match


    def contacts(self, name):
        br = self.__br
        # check if name exists.
        results = self.search(name)
        if len(results) == 0:
            raise Exception('name "%s" doesn\'t exist in Twitter' % name)
        # assume the first person that matches
        name_link = results[0]
        # retrieve the first n (20?) contacts as tuples (complete_name, twitter_url).
        resp = self.__br.open(name_link + '/friends')
        cont =  resp.read()
        matches = re.findall(self.__contact_regex, cont)
        complete_names = map(self.__strip_complete_name, matches)
        urls = map(self.__strip_url, matches)
        return zip(complete_names, urls)
开发者ID:therm000,项目名称:rankbytags,代码行数:69,代码来源:TwitterBot.py


注:本文中的mechanize.Browser.set_proxies方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。