当前位置: 首页>>代码示例>>Python>>正文


Python user_agent.generate_user_agent函数代码示例

本文整理汇总了Python中user_agent.generate_user_agent函数的典型用法代码示例。如果您正苦于以下问题:Python generate_user_agent函数的具体用法?Python generate_user_agent怎么用?Python generate_user_agent使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了generate_user_agent函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_navigator_option

    def test_navigator_option(self):
        for x in range(100):
            ua = generate_user_agent(navigator='firefox')
            self.assertTrue('firefox' in ua.lower())

            ua = generate_user_agent(navigator='chrome')
            self.assertTrue('chrome' in ua.lower())
开发者ID:jamb0ss,项目名称:user_agent,代码行数:7,代码来源:test.py

示例2: test_platform_option_tuple

 def test_platform_option_tuple(self):
     for x in range(100):
         ua = generate_user_agent(platform=('win', 'linux'))
         ua = generate_user_agent(platform=('win', 'linux', 'mac'))
         ua = generate_user_agent(platform=('win',))
         ua = generate_user_agent(platform=('linux',))
         ua = generate_user_agent(platform=('mac',))
开发者ID:alexfalcucc,项目名称:user_agent,代码行数:7,代码来源:test.py

示例3: test_device_type_smartphone_chrome

def test_device_type_smartphone_chrome():
    for _ in range(50):
        agent = generate_user_agent(device_type='smartphone',
                                    navigator='chrome')
        assert 'Mobile' in agent
        agent = generate_user_agent(device_type='tablet', navigator='chrome')
        assert 'Mobile' not in agent
开发者ID:lorien,项目名称:user_agent,代码行数:7,代码来源:user_agent.py

示例4: test_platform_option_tuple

def test_platform_option_tuple():
    for _ in range(50):
        generate_user_agent(os=('win', 'linux'))
        generate_user_agent(os=('win', 'linux', 'mac'))
        generate_user_agent(os=('win',))
        generate_user_agent(os=('linux',))
        generate_user_agent(os=('mac',))
开发者ID:lorien,项目名称:user_agent,代码行数:7,代码来源:user_agent.py

示例5: test_platform_navigator_option

    def test_platform_navigator_option(self):
        for x in range(100):
            ua = generate_user_agent(platform='win', navigator='firefox')
            self.assertTrue('firefox' in ua.lower())
            self.assertTrue('windows' in ua.lower())

            ua = generate_user_agent(platform='win', navigator='chrome')
            self.assertTrue('chrome' in ua.lower())
            self.assertTrue('windows' in ua.lower())
开发者ID:jamb0ss,项目名称:user_agent,代码行数:9,代码来源:test.py

示例6: test_platform_option

def test_platform_option():
    for _ in range(50):
        agent = generate_user_agent(os='linux')
        assert 'linux' in agent.lower()

        agent = generate_user_agent(os='win')
        assert 'windows' in agent.lower()

        agent = generate_user_agent(os='mac')
        assert 'mac' in agent.lower()
开发者ID:lorien,项目名称:user_agent,代码行数:10,代码来源:user_agent.py

示例7: test_navigator_option

def test_navigator_option():
    for _ in range(50):
        agent = generate_user_agent(navigator='firefox')
        assert 'firefox' in agent.lower()

        agent = generate_user_agent(navigator='chrome')
        assert 'chrome' in agent.lower()

        agent = generate_user_agent(navigator='ie')
        assert 'msie' in agent.lower() or 'rv:11' in agent.lower()
开发者ID:lorien,项目名称:user_agent,代码行数:10,代码来源:user_agent.py

示例8: test_platform_option

    def test_platform_option(self):
        for x in range(100):
            ua = generate_user_agent(platform='linux')
            self.assertTrue('linux' in ua.lower())

            ua = generate_user_agent(platform='win')
            self.assertTrue('windows' in ua.lower())

            ua = generate_user_agent(platform='mac')
            self.assertTrue('mac' in ua.lower())

            self.assertRaises(UserAgentRuntimeError,
                              generate_user_agent,
                              platform=11)
开发者ID:alexfalcucc,项目名称:user_agent,代码行数:14,代码来源:test.py

示例9: getheadline

def getheadline(companyName, day, firstlink, prevdatelink):
    '''
    scrap headlines from finance.yahoo.com
    '''
    #date = '2016-02-'+str(day)
    searchUrl = 'http://finance.yahoo.com/q/h?s='+companyName+'&t=2016-04-'+str(day)
    #use fake useragent
    #ua = generate_user_agent()
    
    head = generate_user_agent().encode('ascii', 'ignore')
    headers = {'useragent':head}
    response = requests.get(searchUrl, headers=headers)
    
    soup = BeautifulSoup(response.content, 'html.parser')
    links = soup.select('div.yfi_quote_headline ul > li > a')
    #write the search results in file, a new file for each day
    filename = 'links'+str(day)+'.txt'

    with io.open(filename, encoding='utf-8', mode='w+') as ns:
        count = 1
        for link in links:
            nextlinks = link.get('href')+'\n'
            if count == 1:
                ns.write(nextlinks)
                firstlink = nextlinks
            elif prevdatelink == nextlinks:
                print "All uniques headlines scraped"
                break
            else:
                ns.write(nextlinks)
            count += 1
        ns.close()
    return firstlink
开发者ID:aizaazali,项目名称:StockMarketAnalyzer-Hive_Pig,代码行数:33,代码来源:get_headlines.py

示例10: get_proxies

def get_proxies(proxy_type, ip_set, start_page, end_page):
    """extract proxies from page source code, store them in redis
    
    Args:
        proxy_type (str): base url for proxy type, like the global variables CHINA and OTHER
        ip_set (str): which set should the ips be stored in redis
        start_page (int):  which page to start crawling
        end_page (int): which page to stop crawling
    """
    try:
        conn = get_connection()
    except Exception:
        print 'Error while connecting to redis'
        return
    proxies, curr_proxy =[], None
    for page in xrange(start_page, end_page+1):
        if page % 2 == 0:
            time.sleep(20)
        # get page source code
        headers = {'user-agent': generate_user_agent(), 'referer': 'http://www.xicidaili.com/'}
        text = requests.get(proxy_type+str(page), headers = headers).text
        # extract ips from source code
        soup = BeautifulSoup(text, 'lxml')
        for tr in soup.find_all('tr')[1:]:
            tds = tr.find_all('td')
            #if u'美国' in tds[3].text:
            proxy = tds[1].text+':'+tds[2].text               
            if is_valid('https://www.amazon.com/', proxy):
                conn.sadd(ip_set, proxy)
                print '%s added to ip set %s' %(proxy, ip_set)
开发者ID:bdchinacs,项目名称:AmazonRobot,代码行数:30,代码来源:GetProxy.py

示例11: getBaiduDictCate

def getBaiduDictCate():
    """
    功能:得到百度词库的分类,有三级分类,因为三级分类太细而且较少,所以将三级分类纳入其二级分类
    :return:两个词典,第一个词典记录大类的ID和内容的对应关系,第二个词典记录了第一个词典中每一类大类下的所有分类
    """
    bigCateDict = {}
    smallCateDict ={}
    initPageURL = r'https://shurufa.baidu.com/dict'
    cateBaseURL = r'https://shurufa.baidu.com/dict_list?cid='

    # 防止502错误
    userAgent = generate_user_agent()
    referrer = 'http://shurufa.baidu.com/dict.html'  
    headers = {}
    headers['User-Agent'] = userAgent
    headers['Referer'] = referrer

    # 抓取大类
    try:
        request = urllib2.Request(url=initPageURL, headers=headers)
        response = urllib2.urlopen(request)
        data = response.read()
    except urllib2.HTTPError, e:
        print 'Error while getting the big category,error code:',e.code
        sys.exit()
开发者ID:WuLC,项目名称:ThesaurusSpider,代码行数:25,代码来源:getCategory.py

示例12: getCategoryPages

def getCategoryPages(caterotyID,downloadDIR):
    """通过类别的初始页面得到该类别的总页数,并将所有的页数放到 PAGE_QUEUE 中供所有线程下载

    :param caterotyID: 下载的词库类型的 ID,用于找到正确 url
    :param downloadDIR: 下载词库的存放目录
    :return:
    """
    global CATEID, DOWNLOAD_DIR, PAGE_BASE_URL, THREAD_LOCK
    CATEID = caterotyID
    DOWNLOAD_DIR = downloadDIR
    PAGE_BASE_URL = 'https://shurufa.baidu.com/dict_list?cid=%s' % CATEID
    pagePattern = re.compile(r'page=(\d+)#page')    # 在网页源码找到其他页面的URL的正则表达匹配模式
    
    # 防止502错误
    userAgent = generate_user_agent()
    referrer = 'http://shurufa.baidu.com/dict.html'  
    headers = {}
    headers['User-Agent'] = userAgent
    headers['Referer'] = referrer

    # 找到最大页的页码,然后所有页面就是1到最大页面
    # 可能会返回502,500错误,最多尝试5次
    maxTry = 8
    data = None
    for i in xrange(maxTry):
        try:
            request = urllib2.Request(url=PAGE_BASE_URL, headers=headers)
            response = urllib2.urlopen(request)
            data = response.read()
            break
        except urllib2.HTTPError, e:
            if i == maxTry-1:
                with io.open(DOWNLOAD_LOG.decode('utf8'), mode = 'a', encoding = 'utf8') as f:
                    f.write((str(e.code)+' error while parsing url '+PAGE_BASE_URL+'\n').decode('utf8'))
        except:
开发者ID:WuLC,项目名称:ThesaurusSpider,代码行数:35,代码来源:multiThreadDownload.py

示例13: getarticle

def getarticle(readfile):
    ''' get the article and save it in a different file '''
    try:
        fileopen = open(readfile)
    except IOError:
        print "file " + readfile + " not in the location specified"
        return

    i = 1
    for line in fileopen:
        try:
        	ua = generate_user_agent()
        	head = ua.encode('ascii', 'ignore')
        	headers = {'useragent':head}

        	print "reading article :"
        	print line
        	html = requests.get(line, headers = headers).text
        	tex = fulltext(html)
        	writefile = "201604"+str(j)+"_"+str(i)+".txt"
        	with io.open(writefile, encoding='utf-8', mode='w+') as ns:
        		strng = ' '.join(tex.split())
        		ns.write(strng)
        		ns.close()
        	i = i + 1       	
       	except:
       	    pass
开发者ID:aizaazali,项目名称:StockMarketAnalyzer-Hive_Pig,代码行数:27,代码来源:getarticle.py

示例14: get_address

def get_address(proxy):
    """fetch american address from https://fakena.me/random-real-address/
    
    Args:
        proxy (str): proxy to visit the target site, ip:port
    
    Returns:
        format_addr (str): american address in the form of "address_line # city # state # zip"
    """
    ignore_warnings()
    url = r'https://fakena.me/random-real-address/'
    referer = r'https://fakena.me'
    header = {'user-agent' : generate_user_agent() , 'referer':referer }
    curr_proxy ={
    'http': 'http://%s'%proxy
    }

    text = requests.get(url, headers = header, proxies = curr_proxy).text
    pattern = re.compile('<strong>(.+)<br>(.+)</strong>')
    result = re.findall(pattern, text)
    if result: # sometimes the result is empty
        print result[0][0], result[0][1]
        address_line = result[0][0]
        city, state_zip = result[0][1].split(',')
        state, zip = state_zip.split()
        format_addr = address_line+'#'+city+'#'+state+'#'+zip
        return format_addr
    else:
        return ''
开发者ID:bdchinacs,项目名称:AmazonRobot,代码行数:29,代码来源:GetUserInfo.py

示例15: send_query

    def send_query(self, query):
        # TODO: Randomize query, i.e. remove/change unused arguments to vary query signature
        self.queries_sent += 1
        if self.queries_sent % self.queries_change == 0:
            self.queries_change = randint(3, 13)
            ScholarConf.USER_AGENT = generate_user_agent()

        return super(BibDLQuerier, self).send_query(query)
开发者ID:igsor,项目名称:bibdl,代码行数:8,代码来源:bibdl.py


注:本文中的user_agent.generate_user_agent函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。