本文整理汇总了Python中link_finder.LinkFinder.get_links方法的典型用法代码示例。如果您正苦于以下问题:Python LinkFinder.get_links方法的具体用法?Python LinkFinder.get_links怎么用?Python LinkFinder.get_links使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类link_finder.LinkFinder
的用法示例。
在下文中一共展示了LinkFinder.get_links方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: gather_links
# 需要导入模块: from link_finder import LinkFinder [as 别名]
# 或者: from link_finder.LinkFinder import get_links [as 别名]
def gather_links(page_url):
html_str=''
try:
response=urlopen(page_url)
if 'text/html' in response.info().getheader('Content-Type'):
html_bytes=response.read()
html_string=html_bytes.decode("utf-8")
finder=LinkFinder(Spider.base_url)
finder.feed(html_string)
# 返回爬取的url集合
return finder.get_links();
except:
print('Error:can not crawl page.')
return set()
示例2: Friends_finder
# 需要导入模块: from link_finder import LinkFinder [as 别名]
# 或者: from link_finder.LinkFinder import get_links [as 别名]
class Friends_finder():
def __init__(self, user_name, password):
self.user_name = user_name
self.password = password
self.facebook_url = "https://www.facebook.com/"
self.more_clicks = 0
self.existent_people_links = set()
self.setup()
self.log_in()
while 1:
self.scroll_down_mannualy()
self.gather_links()
self.append_links_to_queue()
def setup(self):
print('Seting up WebDriver')
self.driver = webdriver.Firefox()
self.driver.get(self.facebook_url)
def log_in(self):
ready = False
while ready == False:
ready = True
try:
self.driver.find_element_by_id("email").send_keys(self.user_name)
except:
ready = False
self.driver.find_element_by_id("pass").send_keys(self.password)
self.driver.find_element_by_id("pass").send_keys(Keys.RETURN)
sleep(2)
try:
self.driver.find_element_by_xpath('//*[@id="u_0_2"]')
print('Conected')
except:
print('Unable to conect, Please do it manually')
ready = False
while ready == False:
try:
self.driver.find_element_by_xpath('//*[@id="u_0_2"]')
ready = True
except:
pass
def scroll_down_mannualy(self):
print("please scroll down the page")
print("When done, press any key to start gathering links")
input()
def gather_links(self):
print('gathering links, please wait ...')
self.link_finder = LinkFinder()
self.link_finder.feed(self.driver.page_source)
self.gathered_links = self.link_finder.get_links()
print( str(len(self.gathered_links)) + ' Links was gathered')
def append_links_to_queue(self):
print('Apending links and updating the queue file...')
self.get_existent_links()
self.update_queue()
def get_existent_links(self):
with open("data/people_to_add.txt", "r") as f:
for line in f:
self.existent_people_links.add(line.replace('\n', ''))
with open("data/errors.txt", "r") as f:
for line in f:
self.existent_people_links.add(line.replace('\n', ''))
with open("data/added_friends.txt", "r") as f:
for line in f:
self.existent_people_links.add(line.replace('\n', ''))
def update_queue(self):
self.new_links_added = 0
with open("data/people_to_add.txt", "a") as f:
for item in self.gathered_links:
if item not in self.existent_people_links:
self.new_links_added += 1
f.write(item + '\n')
print( str(self.new_links_added) + ' Items were added to the queue file')