當前位置: 首頁>>代碼示例>>Python>>正文


Python user_agent.generate_user_agent方法代碼示例

本文整理匯總了Python中user_agent.generate_user_agent方法的典型用法代碼示例。如果您正苦於以下問題:Python user_agent.generate_user_agent方法的具體用法?Python user_agent.generate_user_agent怎麽用?Python user_agent.generate_user_agent使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在user_agent的用法示例。


在下文中一共展示了user_agent.generate_user_agent方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: getBaiduDictCate

# 需要導入模塊: import user_agent [as 別名]
# 或者: from user_agent import generate_user_agent [as 別名]
def getBaiduDictCate():
    """
    功能:得到百度詞庫的分類,有三級分類,因為三級分類太細而且較少,所以將三級分類納入其二級分類
    :return:兩個詞典,第一個詞典記錄大類的ID和內容的對應關係,第二個詞典記錄了第一個詞典中每一類大類下的所有分類
    """
    bigCateDict = {}
    smallCateDict ={}
    initPageURL = r'https://shurufa.baidu.com/dict'
    cateBaseURL = r'https://shurufa.baidu.com/dict_list?cid='

    # 防止502錯誤
    userAgent = generate_user_agent()
    referrer = 'http://shurufa.baidu.com/dict.html'  
    headers = {}
    headers['User-Agent'] = userAgent
    headers['Referer'] = referrer

    # 抓取大類
    try:
        request = urllib2.Request(url=initPageURL, headers=headers)
        response = urllib2.urlopen(request)
        data = response.read()
    except urllib2.HTTPError, e:
        print 'Error while getting the big category,error code:',e.code
        sys.exit() 
開發者ID:WuLC,項目名稱:ThesaurusSpider,代碼行數:27,代碼來源:getCategory.py

示例2: __init__

# 需要導入模塊: import user_agent [as 別名]
# 或者: from user_agent import generate_user_agent [as 別名]
def __init__(self, proxy):
        """init the webdriver by setting the proxy and user-agent
        
        Args:
            proxy (str): proxy in the form of ip:port
        """
        # set proxy
        ip, port = proxy.split(':')
        profile = webdriver.FirefoxProfile()
        profile.set_preference("network.proxy.type", 1)
        profile.set_preference("network.proxy.http", ip)
        profile.set_preference("network.proxy.http_port", port)
        # set user_agent
        profile.set_preference("general.useragent.override", generate_user_agent())

        profile.update_preferences()
        self.driver = webdriver.Firefox(firefox_profile=profile)
        
        print 'current proxy: %s'%proxy 
開發者ID:WuLC,項目名稱:AmazonRobot,代碼行數:21,代碼來源:Robot.py

示例3: is_valid

# 需要導入模塊: import user_agent [as 別名]
# 或者: from user_agent import generate_user_agent [as 別名]
def is_valid(target_url, ip, referer):
    """judge if a proxy ip is valid for target_url
    
    Args:
        target_url (str): url that need to visite with a proxy
        ip (str): the set in redis to get 
        referer (str, optional): referer part of  headers  of the request
    
    Returns:
        boolean
    """
    ignore_warnings()
    proxy = {
    'http': 'http://%s' %ip
    }
    headers = {'user-agent': generate_user_agent(), 'referer': referer}
    try:
        r = requests.get(target_url, headers = headers, proxies = proxy, timeout = 6)
        return True
    except Exception:
        return False 
開發者ID:WuLC,項目名稱:AmazonRobot,代碼行數:23,代碼來源:GetProxy.py

示例4: get_phone_visa

# 需要導入模塊: import user_agent [as 別名]
# 或者: from user_agent import generate_user_agent [as 別名]
def get_phone_visa():
    """fetch phone, visa from http://www.fakeaddressgenerator.com/World/us_address_generator"""
    url = r'http://www.fakeaddressgenerator.com/World/us_address_generator'
    referer = r'http://www.fakeaddressgenerator.com/World'
    header = {'user-agent' : generate_user_agent() , 'referer':referer }
    text = requests.get(url, headers = header).text
    soup = BeautifulSoup(text, 'lxml')
    info = soup.find_all('input')
    """
    print 'name:',info[0]['value']
    print 'phone:',info[9]['value']
    print 'visa:',info[11]['value']
    print 'expires:',info[13]['value']
    """
    name_phone =  info[0]['value']+'#'+info[9]['value']
    name_visa = info[0]['value']+'#'+info[11]['value']+'#'+info[13]['value']
    print name_phone, name_visa
    return name_phone, name_visa 
開發者ID:WuLC,項目名稱:AmazonRobot,代碼行數:20,代碼來源:GetUserInfo.py

示例5: download_page

# 需要導入模塊: import user_agent [as 別名]
# 或者: from user_agent import generate_user_agent [as 別名]
def download_page(url):
    """download raw content of the page
    
    Args:
        url (str): url of the page 
    
    Returns:
        raw content of the page
    """
    try:
        headers = {}
        headers['User-Agent'] = generate_user_agent()
        headers['Referer'] = 'https://www.google.com'
        req = urllib.request.Request(url, headers = headers)
        resp = urllib.request.urlopen(req)
        return str(resp.read())
    except Exception as e:
        print('error while downloading page {0}'.format(url))
        logging.error('error while downloading page {0}'.format(url))
        return None 
開發者ID:WuLC,項目名稱:GoogleImagesDownloader,代碼行數:22,代碼來源:download_with_urllib.py

示例6: http_request_get

# 需要導入模塊: import user_agent [as 別名]
# 或者: from user_agent import generate_user_agent [as 別名]
def http_request_get(url, session=None, payload=None, parse=True):
    """ Sends a GET HTTP request to a website and returns its HTML content and full url address. """

    if payload is None:
        payload = {}

    try:
        if session:
            content = session.get(url, params=payload, verify_ssl=False, headers={'User-Agent': generate_user_agent()})
        else:
            content = requests.get(url, params=payload, verify=False, headers={'User-Agent': generate_user_agent()})

        content.raise_for_status()  # Raise HTTPError for bad requests (4xx or 5xx)

        if parse:
            return html.fromstring(content.text), content.url
        else:
            return content.text, content.url
    except (asyncio.TimeoutError, requests.exceptions.Timeout):
        raise ConnectionTimeout(url) 
開發者ID:mariostoev,項目名稱:finviz,代碼行數:22,代碼來源:request_functions.py

示例7: downloadSingleCate

# 需要導入模塊: import user_agent [as 別名]
# 或者: from user_agent import generate_user_agent [as 別名]
def downloadSingleCate(cateID, dirName, downloadLog, tryBest = True):
    """下載某一類別的詞庫

    :param cateID: 類別ID
    :param dirName: 下載的目錄
    :parm downloadLog: 下載日誌,記錄下載不成功的文件
    :parm downloadLog: 是否達到最大嘗試次數
    :return: None
    """
    pageBaseUrl = r'https://shurufa.baidu.com/dict_list?cid=%s' %cateID
    fileBaseUrl = r'https://shurufa.baidu.com/dict_innerid_download?innerid='

    pagePattern = re.compile(r'page=(\d+)#page')  # 非貪婪匹配,查找跳轉到其他頁麵的url
    filePattern = re.compile(r'dict-name="(.*?)" dict-innerid="(\d+)"')   # 非貪婪匹配,查找可下載的文件的id和

    visited = set()       # 記錄某個url是否已經被訪問了
    downloaded = set()    # 記錄某個文件是否被下載了


    # 防止502錯誤
    userAgent = generate_user_agent()
    referrer = 'http://shurufa.baidu.com/dict.html'  
    headers = {}
    headers['User-Agent'] = userAgent
    headers['Referer'] = referrer

    # 找到最大頁的頁碼,然後所有頁麵就是1到最大頁麵
    try:
        request = urllib2.Request(url=pageBaseUrl, headers=headers)
        response = urllib2.urlopen(request)
        data = response.read()
    except urllib2.HTTPError, e:
        if tryBest:
            with io.open(downloadLog.decode('utf8'), mode = 'a', encoding = 'utf8') as f:
                f.write((str(e.code)+' error while parsing url '+pageBaseUrl+'\n').decode('utf8'))
        return False 
開發者ID:WuLC,項目名稱:ThesaurusSpider,代碼行數:38,代碼來源:singleThreadDownload.py

示例8: getCategoryPages

# 需要導入模塊: import user_agent [as 別名]
# 或者: from user_agent import generate_user_agent [as 別名]
def getCategoryPages(caterotyID,downloadDIR):
    """通過類別的初始頁麵得到該類別的總頁數,並將所有的頁數放到 PAGE_QUEUE 中供所有線程下載

    :param caterotyID: 下載的詞庫類型的 ID,用於找到正確 url
    :param downloadDIR: 下載詞庫的存放目錄
    :return:
    """
    global CATEID, DOWNLOAD_DIR, PAGE_BASE_URL, THREAD_LOCK
    CATEID = caterotyID
    DOWNLOAD_DIR = downloadDIR
    PAGE_BASE_URL = 'https://shurufa.baidu.com/dict_list?cid=%s' % CATEID
    pagePattern = re.compile(r'page=(\d+)#page')    # 在網頁源碼找到其他頁麵的URL的正則表達匹配模式
    
    # 防止502錯誤
    userAgent = generate_user_agent()
    referrer = 'http://shurufa.baidu.com/dict.html'  
    headers = {}
    headers['User-Agent'] = userAgent
    headers['Referer'] = referrer

    # 找到最大頁的頁碼,然後所有頁麵就是1到最大頁麵
    # 可能會返回502,500錯誤,最多嘗試5次
    maxTry = 8
    data = None
    for i in xrange(maxTry):
        try:
            request = urllib2.Request(url=PAGE_BASE_URL, headers=headers)
            response = urllib2.urlopen(request)
            data = response.read()
            break
        except urllib2.HTTPError, e:
            if i == maxTry-1:
                with io.open(DOWNLOAD_LOG.decode('utf8'), mode = 'a', encoding = 'utf8') as f:
                    f.write((str(e.code)+' error while parsing url '+PAGE_BASE_URL+'\n').decode('utf8'))
        except: 
開發者ID:WuLC,項目名稱:ThesaurusSpider,代碼行數:37,代碼來源:multiThreadDownload.py

示例9: generate_profile

# 需要導入模塊: import user_agent [as 別名]
# 或者: from user_agent import generate_user_agent [as 別名]
def generate_profile(useragent="(default)"):
    profile = FirefoxProfile()
    if useragent.strip().lower()=="(default)":
        status("Using the default useragent")
        return profile
    elif useragent.strip().lower()=="(random)":
        random_useragent = generate_user_agent(os=('mac', 'linux'))
        profile.set_preference("general.useragent.override", random_useragent) # To make our useragent random
        status("Using random useragent "+random_useragent)
        return profile
    else:
        profile.set_preference("general.useragent.override", useragent)
        status("Using useragent "+useragent)
        return profile 
開發者ID:OWASP,項目名稱:QRLJacking,代碼行數:16,代碼來源:browser.py

示例10: get_user_agent

# 需要導入模塊: import user_agent [as 別名]
# 或者: from user_agent import generate_user_agent [as 別名]
def get_user_agent(os=None, navigator=None, device_type=None):
    try:
        u = generate_user_agent(os=os, navigator=navigator, device_type=device_type)
    except Exception as e:
        u = str(e)
    return u 
開發者ID:Hopetree,項目名稱:izone,代碼行數:8,代碼來源:useragent.py

示例11: get_proxies

# 需要導入模塊: import user_agent [as 別名]
# 或者: from user_agent import generate_user_agent [as 別名]
def get_proxies(proxy_type, ip_set, start_page, end_page):
    """extract proxies from page source code, store them in redis
    
    Args:
        proxy_type (str): base url for proxy type, like the global variables CHINA and OTHER
        ip_set (str): which set should the ips be stored in redis
        start_page (int):  which page to start crawling
        end_page (int): which page to stop crawling
    """
    try:
        conn = get_connection()
    except Exception:
        print 'Error while connecting to redis'
        return
    proxies, curr_proxy =[], None
    for page in xrange(start_page, end_page+1):
        if page % 2 == 0:
            time.sleep(20)
        # get page source code
        headers = {'user-agent': generate_user_agent(), 'referer': 'http://www.xicidaili.com/'}
        text = requests.get(proxy_type+str(page), headers = headers).text
        # extract ips from source code
        soup = BeautifulSoup(text, 'lxml')
        for tr in soup.find_all('tr')[1:]:
            tds = tr.find_all('td')
            #if u'美國' in tds[3].text:
            proxy = tds[1].text+':'+tds[2].text               
            if is_valid('https://www.amazon.com/', proxy):
                conn.sadd(ip_set, proxy)
                print '%s added to ip set %s' %(proxy, ip_set) 
開發者ID:WuLC,項目名稱:AmazonRobot,代碼行數:32,代碼來源:GetProxy.py

示例12: get_address

# 需要導入模塊: import user_agent [as 別名]
# 或者: from user_agent import generate_user_agent [as 別名]
def get_address(proxy):
    """fetch american address from https://fakena.me/random-real-address/
    
    Args:
        proxy (str): proxy to visit the target site, ip:port
    
    Returns:
        format_addr (str): american address in the form of "address_line # city # state # zip"
    """
    ignore_warnings()
    url = r'https://fakena.me/random-real-address/'
    referer = r'https://fakena.me'
    header = {'user-agent' : generate_user_agent() , 'referer':referer }
    curr_proxy ={
    'http': 'http://%s'%proxy
    }

    text = requests.get(url, headers = header, proxies = curr_proxy).text
    pattern = re.compile('<strong>(.+)<br>(.+)</strong>')
    result = re.findall(pattern, text)
    if result: # sometimes the result is empty
        print result[0][0], result[0][1]
        address_line = result[0][0]
        city, state_zip = result[0][1].split(',')
        state, zip = state_zip.split()
        format_addr = address_line+'#'+city+'#'+state+'#'+zip
        return format_addr
    else:
        return '' 
開發者ID:WuLC,項目名稱:AmazonRobot,代碼行數:31,代碼來源:GetUserInfo.py

示例13: __http_request__async

# 需要導入模塊: import user_agent [as 別名]
# 或者: from user_agent import generate_user_agent [as 別名]
def __http_request__async(self, url, session):
        """ Sends asynchronous http request to URL address and scrapes the webpage. """

        try:
            async with session.get(url, headers={'User-Agent': generate_user_agent()}) as response:
                page_html = await response.read()

                if self.cssselect is True:
                    return self.scrape_function(html.fromstring(page_html), url=url, *self.arguments)
                else:
                    return self.scrape_function(page_html, url=url, *self.arguments)
        except (asyncio.TimeoutError, requests.exceptions.Timeout):
            raise ConnectionTimeout(url) 
開發者ID:mariostoev,項目名稱:finviz,代碼行數:15,代碼來源:request_functions.py

示例14: __async_scraper

# 需要導入模塊: import user_agent [as 別名]
# 或者: from user_agent import generate_user_agent [as 別名]
def __async_scraper(self):
        """ Adds a URL's into a list of tasks and requests their response asynchronously. """

        async_tasks = []
        conn = aiohttp.TCPConnector(limit_per_host=connection_settings['CONCURRENT_CONNECTIONS'])
        timeout = aiohttp.ClientTimeout(total=connection_settings['CONNECTION_TIMEOUT'])

        async with aiohttp.ClientSession(connector=conn,
                                         timeout=timeout,
                                         headers={'User-Agent': generate_user_agent()}) as session:
            for n in self.tasks:
                async_tasks.append(self.__http_request__async(n, session))

            self.data = await asyncio.gather(*async_tasks) 
開發者ID:mariostoev,項目名稱:finviz,代碼行數:16,代碼來源:request_functions.py

示例15: send_to_proxy_from_definition

# 需要導入模塊: import user_agent [as 別名]
# 或者: from user_agent import generate_user_agent [as 別名]
def send_to_proxy_from_definition(running_config: RunningConfig):
    openapi3_content: dict = await openapi3_from_db(running_config.api_id)

    session_user_agent = generate_user_agent()

    async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(
            verify_ssl=False)) as session:

        raw_endpoints = search(openapi3_content, "paths")
        query = request_generator(openapi3_content)
        resolver = ref_resolver(openapi3_content)
        endpoints = transform_tree(raw_endpoints, resolver)

        http_scheme, netloc, path, *_ = urlparse(
            running_config.api_url
        )

        host, port = split_netloc(netloc, http_scheme)

        for url, endpoint in endpoints.items():

            logger.info(f"Generating data for End Point: {url}")

            try:
                for method in ("get", "put", "post", "delete"):
                    if method in endpoint:
                        gen = query(url, method=method)
                        req: dict = next(gen)
                        break
                else:
                    raise APICheckException("Unknown method in url: ", url)

            except ValueError as ve:
                logger.error(f"cannot generate data: {ve} - {url}")

            url = f"{http_scheme}://{host}:{port}{path}{req['path']}"

            custom_headers = req["headers"]
            custom_headers["user-agent"] = session_user_agent

            fn_params = dict(
                url=url,
                headers=custom_headers,
                proxy=f"http://{running_config.proxy_ip}:"
                      f"{running_config.proxy_port}",
                skip_auto_headers=("content-type", "user-agent")
            )

            try:
                fn_params["data"] = req["body"]
            except KeyError:
                fn_params["data"] = None

            fn_method = getattr(session, req["method"])

            async with fn_method(**fn_params) as response:
                resp = await response.text() 
開發者ID:BBVA,項目名稱:apicheck,代碼行數:59,代碼來源:run.py


注:本文中的user_agent.generate_user_agent方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。