本文整理汇总了Python中user_agent.generate_user_agent函数的典型用法代码示例。如果您正苦于以下问题:Python generate_user_agent函数的具体用法?Python generate_user_agent怎么用?Python generate_user_agent使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了generate_user_agent函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_navigator_option
def test_navigator_option(self):
for x in range(100):
ua = generate_user_agent(navigator='firefox')
self.assertTrue('firefox' in ua.lower())
ua = generate_user_agent(navigator='chrome')
self.assertTrue('chrome' in ua.lower())
示例2: test_platform_option_tuple
def test_platform_option_tuple(self):
for x in range(100):
ua = generate_user_agent(platform=('win', 'linux'))
ua = generate_user_agent(platform=('win', 'linux', 'mac'))
ua = generate_user_agent(platform=('win',))
ua = generate_user_agent(platform=('linux',))
ua = generate_user_agent(platform=('mac',))
示例3: test_device_type_smartphone_chrome
def test_device_type_smartphone_chrome():
for _ in range(50):
agent = generate_user_agent(device_type='smartphone',
navigator='chrome')
assert 'Mobile' in agent
agent = generate_user_agent(device_type='tablet', navigator='chrome')
assert 'Mobile' not in agent
示例4: test_platform_option_tuple
def test_platform_option_tuple():
for _ in range(50):
generate_user_agent(os=('win', 'linux'))
generate_user_agent(os=('win', 'linux', 'mac'))
generate_user_agent(os=('win',))
generate_user_agent(os=('linux',))
generate_user_agent(os=('mac',))
示例5: test_platform_navigator_option
def test_platform_navigator_option(self):
for x in range(100):
ua = generate_user_agent(platform='win', navigator='firefox')
self.assertTrue('firefox' in ua.lower())
self.assertTrue('windows' in ua.lower())
ua = generate_user_agent(platform='win', navigator='chrome')
self.assertTrue('chrome' in ua.lower())
self.assertTrue('windows' in ua.lower())
示例6: test_platform_option
def test_platform_option():
for _ in range(50):
agent = generate_user_agent(os='linux')
assert 'linux' in agent.lower()
agent = generate_user_agent(os='win')
assert 'windows' in agent.lower()
agent = generate_user_agent(os='mac')
assert 'mac' in agent.lower()
示例7: test_navigator_option
def test_navigator_option():
for _ in range(50):
agent = generate_user_agent(navigator='firefox')
assert 'firefox' in agent.lower()
agent = generate_user_agent(navigator='chrome')
assert 'chrome' in agent.lower()
agent = generate_user_agent(navigator='ie')
assert 'msie' in agent.lower() or 'rv:11' in agent.lower()
示例8: test_platform_option
def test_platform_option(self):
for x in range(100):
ua = generate_user_agent(platform='linux')
self.assertTrue('linux' in ua.lower())
ua = generate_user_agent(platform='win')
self.assertTrue('windows' in ua.lower())
ua = generate_user_agent(platform='mac')
self.assertTrue('mac' in ua.lower())
self.assertRaises(UserAgentRuntimeError,
generate_user_agent,
platform=11)
示例9: getheadline
def getheadline(companyName, day, firstlink, prevdatelink):
'''
scrap headlines from finance.yahoo.com
'''
#date = '2016-02-'+str(day)
searchUrl = 'http://finance.yahoo.com/q/h?s='+companyName+'&t=2016-04-'+str(day)
#use fake useragent
#ua = generate_user_agent()
head = generate_user_agent().encode('ascii', 'ignore')
headers = {'useragent':head}
response = requests.get(searchUrl, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
links = soup.select('div.yfi_quote_headline ul > li > a')
#write the search results in file, a new file for each day
filename = 'links'+str(day)+'.txt'
with io.open(filename, encoding='utf-8', mode='w+') as ns:
count = 1
for link in links:
nextlinks = link.get('href')+'\n'
if count == 1:
ns.write(nextlinks)
firstlink = nextlinks
elif prevdatelink == nextlinks:
print "All uniques headlines scraped"
break
else:
ns.write(nextlinks)
count += 1
ns.close()
return firstlink
示例10: get_proxies
def get_proxies(proxy_type, ip_set, start_page, end_page):
"""extract proxies from page source code, store them in redis
Args:
proxy_type (str): base url for proxy type, like the global variables CHINA and OTHER
ip_set (str): which set should the ips be stored in redis
start_page (int): which page to start crawling
end_page (int): which page to stop crawling
"""
try:
conn = get_connection()
except Exception:
print 'Error while connecting to redis'
return
proxies, curr_proxy =[], None
for page in xrange(start_page, end_page+1):
if page % 2 == 0:
time.sleep(20)
# get page source code
headers = {'user-agent': generate_user_agent(), 'referer': 'http://www.xicidaili.com/'}
text = requests.get(proxy_type+str(page), headers = headers).text
# extract ips from source code
soup = BeautifulSoup(text, 'lxml')
for tr in soup.find_all('tr')[1:]:
tds = tr.find_all('td')
#if u'美国' in tds[3].text:
proxy = tds[1].text+':'+tds[2].text
if is_valid('https://www.amazon.com/', proxy):
conn.sadd(ip_set, proxy)
print '%s added to ip set %s' %(proxy, ip_set)
示例11: getBaiduDictCate
def getBaiduDictCate():
"""
功能:得到百度词库的分类,有三级分类,因为三级分类太细而且较少,所以将三级分类纳入其二级分类
:return:两个词典,第一个词典记录大类的ID和内容的对应关系,第二个词典记录了第一个词典中每一类大类下的所有分类
"""
bigCateDict = {}
smallCateDict ={}
initPageURL = r'https://shurufa.baidu.com/dict'
cateBaseURL = r'https://shurufa.baidu.com/dict_list?cid='
# 防止502错误
userAgent = generate_user_agent()
referrer = 'http://shurufa.baidu.com/dict.html'
headers = {}
headers['User-Agent'] = userAgent
headers['Referer'] = referrer
# 抓取大类
try:
request = urllib2.Request(url=initPageURL, headers=headers)
response = urllib2.urlopen(request)
data = response.read()
except urllib2.HTTPError, e:
print 'Error while getting the big category,error code:',e.code
sys.exit()
示例12: getCategoryPages
def getCategoryPages(caterotyID,downloadDIR):
"""通过类别的初始页面得到该类别的总页数,并将所有的页数放到 PAGE_QUEUE 中供所有线程下载
:param caterotyID: 下载的词库类型的 ID,用于找到正确 url
:param downloadDIR: 下载词库的存放目录
:return:
"""
global CATEID, DOWNLOAD_DIR, PAGE_BASE_URL, THREAD_LOCK
CATEID = caterotyID
DOWNLOAD_DIR = downloadDIR
PAGE_BASE_URL = 'https://shurufa.baidu.com/dict_list?cid=%s' % CATEID
pagePattern = re.compile(r'page=(\d+)#page') # 在网页源码找到其他页面的URL的正则表达匹配模式
# 防止502错误
userAgent = generate_user_agent()
referrer = 'http://shurufa.baidu.com/dict.html'
headers = {}
headers['User-Agent'] = userAgent
headers['Referer'] = referrer
# 找到最大页的页码,然后所有页面就是1到最大页面
# 可能会返回502,500错误,最多尝试5次
maxTry = 8
data = None
for i in xrange(maxTry):
try:
request = urllib2.Request(url=PAGE_BASE_URL, headers=headers)
response = urllib2.urlopen(request)
data = response.read()
break
except urllib2.HTTPError, e:
if i == maxTry-1:
with io.open(DOWNLOAD_LOG.decode('utf8'), mode = 'a', encoding = 'utf8') as f:
f.write((str(e.code)+' error while parsing url '+PAGE_BASE_URL+'\n').decode('utf8'))
except:
示例13: getarticle
def getarticle(readfile):
''' get the article and save it in a different file '''
try:
fileopen = open(readfile)
except IOError:
print "file " + readfile + " not in the location specified"
return
i = 1
for line in fileopen:
try:
ua = generate_user_agent()
head = ua.encode('ascii', 'ignore')
headers = {'useragent':head}
print "reading article :"
print line
html = requests.get(line, headers = headers).text
tex = fulltext(html)
writefile = "201604"+str(j)+"_"+str(i)+".txt"
with io.open(writefile, encoding='utf-8', mode='w+') as ns:
strng = ' '.join(tex.split())
ns.write(strng)
ns.close()
i = i + 1
except:
pass
示例14: get_address
def get_address(proxy):
"""fetch american address from https://fakena.me/random-real-address/
Args:
proxy (str): proxy to visit the target site, ip:port
Returns:
format_addr (str): american address in the form of "address_line # city # state # zip"
"""
ignore_warnings()
url = r'https://fakena.me/random-real-address/'
referer = r'https://fakena.me'
header = {'user-agent' : generate_user_agent() , 'referer':referer }
curr_proxy ={
'http': 'http://%s'%proxy
}
text = requests.get(url, headers = header, proxies = curr_proxy).text
pattern = re.compile('<strong>(.+)<br>(.+)</strong>')
result = re.findall(pattern, text)
if result: # sometimes the result is empty
print result[0][0], result[0][1]
address_line = result[0][0]
city, state_zip = result[0][1].split(',')
state, zip = state_zip.split()
format_addr = address_line+'#'+city+'#'+state+'#'+zip
return format_addr
else:
return ''
示例15: send_query
def send_query(self, query):
# TODO: Randomize query, i.e. remove/change unused arguments to vary query signature
self.queries_sent += 1
if self.queries_sent % self.queries_change == 0:
self.queries_change = randint(3, 13)
ScholarConf.USER_AGENT = generate_user_agent()
return super(BibDLQuerier, self).send_query(query)