当前位置: 首页>>代码示例>>Python>>正文


Python Firefox.find_elements_by_css_selector方法代码示例

本文整理汇总了Python中selenium.webdriver.Firefox.find_elements_by_css_selector方法的典型用法代码示例。如果您正苦于以下问题:Python Firefox.find_elements_by_css_selector方法的具体用法?Python Firefox.find_elements_by_css_selector怎么用?Python Firefox.find_elements_by_css_selector使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在selenium.webdriver.Firefox的用法示例。


在下文中一共展示了Firefox.find_elements_by_css_selector方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_elements_by_css_selector [as 别名]
def main(argv=sys.argv[1:]):
    parser = argparse.ArgumentParser()
    parser.add_argument('--url', default='http://127.0.0.1:8000/static/index.html')
    args = parser.parse_args(argv)

    url = args.url

    browser = WebDriver()
    browser.get(url)
    tags = browser.find_elements_by_css_selector('li')
    for tag in tags:
        print(tag.text)
    browser.close()
开发者ID:shimizukawa,项目名称:happy-scraping,代码行数:15,代码来源:can-scrape.py

示例2: WeixinSelenium

# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_elements_by_css_selector [as 别名]
class WeixinSelenium(Base):
    def __init__(self):
        self.start_page = START_PAGE
        self.end_page = END_PAGE
        self.weixin_url = REFER_FIRST

        self.driver = Firefox()

        self.client = MongoClient(HOST, PORT)
        self.collection = self.client[DB][COLLECTION]
        self.all_uids = self.uids

    def open_weixin_browser(self, word):
        try:
            self.driver.get(self.weixin_url)
            self.driver.set_page_load_timeout(3)

            self.driver.find_element_by_id('upquery').send_keys(word)
            self.driver.find_element_by_class_name('swz').click()
            self.driver.implicitly_wait(3)

            urls_uids = self.extract_urls_uids(word=word)
            Article(urls_uids=urls_uids, word=word).extract()
        except Exception as e:
            storage_word.append([word, 0])
            self.logger.info('Open weixin error: type <{}>, mag <{}>'.format(e.__class__, e))
            self.close_browser()
            return True
        return False

    def get_total_pages_to_word(self):
        pages = []
        page_id_css = 'pagebar_container'

        try:
            e = self.driver.find_element_by_id(page_id_css)
            for _p in e.text.split():
                _p = _p.strip()

                if not _p.isdigit():
                    return pages[-1]
                else:
                    pages.append(int(_p))
            return 1
        except (NoSuchElementException, NoSuchWindowException, TypeError, IndexError):
            pass

    def get_query_words(self):
        query_words = []

        for docs in self.collection.find({}, {'rel': 1, 'conp': 1}).sort([('_id', 1)]):
            w = docs['conp']

            if w not in query_words:
                query_words.append(w)

            for item in docs['rel']:
                if item not in query_words:
                    query_words.append(item)

        self.client.close()
        return query_words

    @property
    def uids(self):
        return {docs['uid'] for docs in in_collection.find({}, {'uid': 1}) if 'uid' in docs}

    def extract_urls_uids(self, word):
        urls_uids = []
        timestamp = [_t.get_attribute('t') for _t in self.driver.find_elements_by_css_selector('div.s-p')]
        urls_tits = [(t.get_attribute('href'), self.trim(t.text))
                     for t in self.driver.find_elements_by_css_selector('h4 a')]

        if len(urls_tits) != len(timestamp):
            return urls_uids

        for index, url_tit in enumerate(urls_tits):
            try:
                uid = self.md5(timestamp[index] + url_tit[1] + word)

                if uid not in self.all_uids:
                    self.all_uids.add(uid)
                    urls_uids.append({'url': url_tit[0], 'uid': uid})
            except (TypeError, IndexError):
                pass
        return urls_uids

    @staticmethod
    def query_index(words, cut_word):
        try:
            index = words.index(cut_word)
            return index
        except ValueError:
            pass
        return 0

    @property
    def is_forbidden(self):
        css_id = 'seccodeForm'

#.........这里部分代码省略.........
开发者ID:xutaoding,项目名称:csf_scraper,代码行数:103,代码来源:wx_selenium.py

示例3: __init__

# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_elements_by_css_selector [as 别名]
class Scraper:
    """ A Simple Scraper Example using Selenium """

    def __init__(self, base_url, query_params):
        self.__take_results_backup()
        options = Options()
        options.add_argument("--headless")
        try:
            self.driver=Chrome(options=options)
        except Exception as e:
            print(f'Error occured during Chrome driver : {e}')
            self.driver=Firefox()
        self.driver.get(base_url + query_params)
        # set up the next page element
        self.nextpage_element=self.driver.find_element_by_css_selector(
                ".pager-next a")


    def __take_results_backup(self):
        if os.path.exists('outfile.csv'):
            stamp=f'outfile{time.asctime().replace(":", "-").replace(" ","_")}'
            shutil.move('outfile.csv', stamp)

    def __save_info(self, lines):
        """
        This method saves the recently collected information line from webpage
        """

        with open('outfile.csv', 'a') as f:
            for line in lines:
                f.write(line)

    def nextpage(self, css_locator):
        self.driver.find_element_by_css_selector(
                css_locator).click()

    def scrape_page(self):
        providers = self.driver.find_elements_by_css_selector(".provider-row")

        for provider in providers:
            try:
                name = provider.find_element_by_css_selector(
                        ".provider-base-info h3 a").text
                email = provider.find_element_by_css_selector(
                        ".provider-link-details .icon-mail+a").get_attribute(
                                'href').replace('mailto:','')
                website = provider.find_element_by_css_selector(
                        ".provider-link-details .website-link a").get_attribute('href')
                location = provider.find_element_by_css_selector(
                        ".provider-info__details div.list-item:nth-of-type(4)").text

                lineitem=f'{name.replace(",","-")},{email},{website},{location.replace(",","-")}'

                # append the results
                self.__save_info(lineitem + "\n")

            except NoSuchElementException:
                # skip information and continue scraping the page
                continue

            except Exception as e:
                # discontinue in case of unknown error
                raise ScrapePageError(f"Error occured during scrape page : {e}")

    def scrape(self):
        # scrape until nextpage function doesn't fail
        while True:
            print(f"scraping the website... ")
            try:
                self.scrape_page()
                self.nextpage(".pager-next a")

            except ScrapePageError as e:
                print(e)
                self.nextpage(".pager-next a")
                continue

            except Exception as e:
                print("Something went wrong: ", e)
                self.driver.close()
                break
开发者ID:stupidnetizen,项目名称:expat,代码行数:83,代码来源:selenium_scraper.py


注:本文中的selenium.webdriver.Firefox.find_elements_by_css_selector方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。