本文整理汇总了Python中selenium.webdriver.Firefox.find_elements_by_css_selector方法的典型用法代码示例。如果您正苦于以下问题:Python Firefox.find_elements_by_css_selector方法的具体用法?Python Firefox.find_elements_by_css_selector怎么用?Python Firefox.find_elements_by_css_selector使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类selenium.webdriver.Firefox
的用法示例。
在下文中一共展示了Firefox.find_elements_by_css_selector方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_elements_by_css_selector [as 别名]
def main(argv=sys.argv[1:]):
parser = argparse.ArgumentParser()
parser.add_argument('--url', default='http://127.0.0.1:8000/static/index.html')
args = parser.parse_args(argv)
url = args.url
browser = WebDriver()
browser.get(url)
tags = browser.find_elements_by_css_selector('li')
for tag in tags:
print(tag.text)
browser.close()
示例2: WeixinSelenium
# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_elements_by_css_selector [as 别名]
class WeixinSelenium(Base):
def __init__(self):
self.start_page = START_PAGE
self.end_page = END_PAGE
self.weixin_url = REFER_FIRST
self.driver = Firefox()
self.client = MongoClient(HOST, PORT)
self.collection = self.client[DB][COLLECTION]
self.all_uids = self.uids
def open_weixin_browser(self, word):
try:
self.driver.get(self.weixin_url)
self.driver.set_page_load_timeout(3)
self.driver.find_element_by_id('upquery').send_keys(word)
self.driver.find_element_by_class_name('swz').click()
self.driver.implicitly_wait(3)
urls_uids = self.extract_urls_uids(word=word)
Article(urls_uids=urls_uids, word=word).extract()
except Exception as e:
storage_word.append([word, 0])
self.logger.info('Open weixin error: type <{}>, mag <{}>'.format(e.__class__, e))
self.close_browser()
return True
return False
def get_total_pages_to_word(self):
pages = []
page_id_css = 'pagebar_container'
try:
e = self.driver.find_element_by_id(page_id_css)
for _p in e.text.split():
_p = _p.strip()
if not _p.isdigit():
return pages[-1]
else:
pages.append(int(_p))
return 1
except (NoSuchElementException, NoSuchWindowException, TypeError, IndexError):
pass
def get_query_words(self):
query_words = []
for docs in self.collection.find({}, {'rel': 1, 'conp': 1}).sort([('_id', 1)]):
w = docs['conp']
if w not in query_words:
query_words.append(w)
for item in docs['rel']:
if item not in query_words:
query_words.append(item)
self.client.close()
return query_words
@property
def uids(self):
return {docs['uid'] for docs in in_collection.find({}, {'uid': 1}) if 'uid' in docs}
def extract_urls_uids(self, word):
urls_uids = []
timestamp = [_t.get_attribute('t') for _t in self.driver.find_elements_by_css_selector('div.s-p')]
urls_tits = [(t.get_attribute('href'), self.trim(t.text))
for t in self.driver.find_elements_by_css_selector('h4 a')]
if len(urls_tits) != len(timestamp):
return urls_uids
for index, url_tit in enumerate(urls_tits):
try:
uid = self.md5(timestamp[index] + url_tit[1] + word)
if uid not in self.all_uids:
self.all_uids.add(uid)
urls_uids.append({'url': url_tit[0], 'uid': uid})
except (TypeError, IndexError):
pass
return urls_uids
@staticmethod
def query_index(words, cut_word):
try:
index = words.index(cut_word)
return index
except ValueError:
pass
return 0
@property
def is_forbidden(self):
css_id = 'seccodeForm'
#.........这里部分代码省略.........
示例3: __init__
# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_elements_by_css_selector [as 别名]
class Scraper:
""" A Simple Scraper Example using Selenium """
def __init__(self, base_url, query_params):
self.__take_results_backup()
options = Options()
options.add_argument("--headless")
try:
self.driver=Chrome(options=options)
except Exception as e:
print(f'Error occured during Chrome driver : {e}')
self.driver=Firefox()
self.driver.get(base_url + query_params)
# set up the next page element
self.nextpage_element=self.driver.find_element_by_css_selector(
".pager-next a")
def __take_results_backup(self):
if os.path.exists('outfile.csv'):
stamp=f'outfile{time.asctime().replace(":", "-").replace(" ","_")}'
shutil.move('outfile.csv', stamp)
def __save_info(self, lines):
"""
This method saves the recently collected information line from webpage
"""
with open('outfile.csv', 'a') as f:
for line in lines:
f.write(line)
def nextpage(self, css_locator):
self.driver.find_element_by_css_selector(
css_locator).click()
def scrape_page(self):
providers = self.driver.find_elements_by_css_selector(".provider-row")
for provider in providers:
try:
name = provider.find_element_by_css_selector(
".provider-base-info h3 a").text
email = provider.find_element_by_css_selector(
".provider-link-details .icon-mail+a").get_attribute(
'href').replace('mailto:','')
website = provider.find_element_by_css_selector(
".provider-link-details .website-link a").get_attribute('href')
location = provider.find_element_by_css_selector(
".provider-info__details div.list-item:nth-of-type(4)").text
lineitem=f'{name.replace(",","-")},{email},{website},{location.replace(",","-")}'
# append the results
self.__save_info(lineitem + "\n")
except NoSuchElementException:
# skip information and continue scraping the page
continue
except Exception as e:
# discontinue in case of unknown error
raise ScrapePageError(f"Error occured during scrape page : {e}")
def scrape(self):
# scrape until nextpage function doesn't fail
while True:
print(f"scraping the website... ")
try:
self.scrape_page()
self.nextpage(".pager-next a")
except ScrapePageError as e:
print(e)
self.nextpage(".pager-next a")
continue
except Exception as e:
print("Something went wrong: ", e)
self.driver.close()
break