本文整理汇总了Python中selenium.webdriver.Firefox.find_element_by_id方法的典型用法代码示例。如果您正苦于以下问题:Python Firefox.find_element_by_id方法的具体用法?Python Firefox.find_element_by_id怎么用?Python Firefox.find_element_by_id使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类selenium.webdriver.Firefox
的用法示例。
在下文中一共展示了Firefox.find_element_by_id方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_air_data
# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_element_by_id [as 别名]
def get_air_data(positionsets):
# Dictionary hourdata is for holding data, DataStructure like:
# {'baiyunshan': [44, 5], 'haizhubaogang': [55, 6]}
hourdata = {}
# Calling selenium, need linux X
browser = Firefox()
browser.get(URL)
# Added 10 seconds for waiting page for loading.
time.sleep(10)
# Click button one-by-one
for position in positionsets:
# After clicking, should re-get the page_source.
browser.find_element_by_id(position).click()
page_source = browser.page_source
# Cooking Soup
soup = BeautifulSoup(page_source, 'html.parser')
# pm2.5 value would be something like xx 微克/立方米, so we need an regex for
# matching, example: print int(pattern.match(input).group())
try:
PM25 = int(pattern.match(soup.find('td',{'id': 'pmtow'}).contents[0]).group())
PM25_iaqi = int(pattern.match(soup.find('td',{'id': 'pmtow_iaqi'}).contents[0]).group())
PM10 = int(pattern.match(soup.find('td',{'id': 'pmten'}).contents[0]).group())
PM10_iaqi = int(pattern.match(soup.find('td',{'id': 'pmten_iaqi'}).contents[0]).group())
SO2 = int(pattern.match(soup.find('td',{'id': 'sotwo'}).contents[0]).group())
SO2_iaqi = int(pattern.match(soup.find('td',{'id': 'sotwo_iaqi'}).contents[0]).group())
NO2 = int(pattern.match(soup.find('td',{'id': 'notwo'}).contents[0]).group())
NO2_iaqi = int(pattern.match(soup.find('td',{'id': 'notwo_iaqi'}).contents[0]).group())
# Special notice the CO would be float value
CO = float(floatpattern.match(soup.find('td',{'id': 'co'}).contents[0]).group())
CO_iaqi = int(pattern.match(soup.find('td',{'id': 'co_iaqi'}).contents[0]).group())
O3 = int(pattern.match(soup.find('td',{'id': 'othree'}).contents[0]).group())
O3_iaqi = int(pattern.match(soup.find('td',{'id': 'othree_iaqi'}).contents[0]).group())
hourdata_key = pinyin.get(position)
hourdata[hourdata_key] = []
hourdata[hourdata_key].append(PM25)
hourdata[hourdata_key].append(PM25_iaqi)
hourdata[hourdata_key].append(PM10)
hourdata[hourdata_key].append(PM10_iaqi)
hourdata[hourdata_key].append(SO2)
hourdata[hourdata_key].append(SO2_iaqi)
hourdata[hourdata_key].append(NO2)
hourdata[hourdata_key].append(NO2_iaqi)
hourdata[hourdata_key].append(CO)
hourdata[hourdata_key].append(CO_iaqi)
hourdata[hourdata_key].append(O3)
hourdata[hourdata_key].append(O3_iaqi)
except ValueError, Argument:
# won't add the data, simply ignore this position
print "The argument does not contain numbers\n", Argument
示例2: Scraper
# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_element_by_id [as 别名]
class Scraper(object):
'''
classdocs
'''
def __init__(self):
'''
Constructor
'''
self.url = 'http://www.jerryvarghese.com/Job-Search/ReqSearch.aspx?p=0&locID=121&loc=Saudi%20Arabia'
#self.base_job_url = 'https://sjobs.brassring.com/TGWebHost/jobdetails.aspx?'
self.browser = Firefox()
self.first_page_search_opening_id = 'srchOpenLink'
self.second_page_search_btn_id = 'ctl00_MainContent_submit2'
self.next_link_id = 'yui-pg0-0-next-link'
def page_links(self):
job_link_id = 'Openings1_Rptr_FieldName_ct{}_lknReqTitle'
for i in range(100, 115):
link_id = job_link_id.format(str(i))
link = self.browser.find_element_by_id(link_id)
link.click()
def main(self):
try:
self.browser.get(self.url)
self.page_links()
counter = 1
except Exception as e:
print 'exception= ', str(e)
#print 'stacktrace= ', traceback.print_exc()
print 'Line Number= ' + str(sys.exc_traceback.tb_lineno)
示例3: test_add_extension_web_extension_with_id
# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_element_by_id [as 别名]
def test_add_extension_web_extension_with_id(capabilities, webserver):
current_directory = os.path.dirname(os.path.realpath(__file__))
root_directory = os.path.join(current_directory, '..', '..', '..', '..', '..')
# TODO: This file should probably live in a common directory.
extension_path = os.path.join(root_directory, 'javascript', 'node', 'selenium-webdriver',
'lib', 'test', 'data', 'firefox', 'webextension.xpi')
profile = FirefoxProfile()
profile.add_extension(extension_path)
driver = Firefox(capabilities=capabilities, firefox_profile=profile)
profile_path = driver.firefox_profile.path
extension_path_in_profile = os.path.join(profile_path, 'extensions', '[email protected]')
assert os.path.exists(extension_path_in_profile)
driver.get(webserver.where_is('simpleTest.html'))
driver.find_element_by_id('webextensions-selenium-example')
driver.quit()
示例4: PPLogBackendBrowser
# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_element_by_id [as 别名]
class PPLogBackendBrowser(object):
def __init__(self):
self._browser = None
def start(self):
print('START')
secret = createObject('pplog.secret')
# desired_capabilities={'phantomjs.page.settings.userAgent': ""})
self._browser = WebDriver()
self._browser.implicitly_wait = 10 # pageをloadするまでの待ち時間を設定
# browser.delete_allcookies() # Cookieを全消し
self._browser.get('https://pplog.net/')
time.sleep(1)
self._browser.get('https://pplog.net/users/auth/twitter')
self._browser.find_element_by_id('username_or_email').send_keys(secret.username)
self._browser.find_element_by_id('password').send_keys(secret.password)
self._browser.find_element_by_css_selector('.submit').click()
def post(self, *args, **kwds):
print('POST')
for ii in range(100): # retry count
print('COUNT: {}'.format(ii))
try:
self._post(*args, **kwds)
except:
self._browser.get('https://www.pplog.net/')
else:
self._browser.get('https://www.pplog.net/')
break
time.sleep(1)
else:
print('retry error')
def _post(self, data):
time.sleep(1)
self._browser.find_element_by_css_selector('.new-btn').click()
form = self._browser.find_element_by_css_selector('form#new_post_')
form.find_element_by_tag_name('textarea').send_keys(data)
time.sleep(1)
form.find_element_by_css_selector('input.btn-primary').click()
print('OK')
示例5: login
# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_element_by_id [as 别名]
def login(username, password):
"""Login into website, return cookies, api and sso token using geckodriver/firefox headless"""
display = Display(visible=0, size=(800, 600))
display.start()
# options = Options()
# options.add_argument('-headless')
# driver = Firefox(executable_path='/usr/local/bin/geckodriver', firefox_options=options)
driver = Firefox()
wait = WebDriverWait(driver, timeout=10)
driver.get(url)
time.sleep(10)
username_field = driver.find_element_by_name("emailOrPcrNumber")
# There are multiple entries with the name pin, use the xpath instead even though it is more error prone
# password_field = driver.find_element_by_name("pin")
password_field = driver.find_element_by_xpath('/html/body/div[1]/div/div/div[2]/div[1]/div[2]/form/div/div[1]/div[2]/input')
username_field.clear()
username_field.send_keys(username)
password_field.clear()
password_field.send_keys(password)
time.sleep(2)
driver.find_element_by_id("tpiSubmitButton").click()
time.sleep(3)
cookies = driver.get_cookies()
for cookie in cookies:
if cookie['name'] == 'X-IHG-SSO-TOKEN':
sso_token = cookie['value']
api_key = driver.execute_script('return AppConfig.featureToggle.apiKey')
driver.get('https://apis.ihg.com')
cookies.extend(driver.get_cookies())
driver.quit()
display.stop()
return api_key, sso_token, cookies
示例6: WeixinSelenium
# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_element_by_id [as 别名]
class WeixinSelenium(Base):
def __init__(self):
self.start_page = START_PAGE
self.end_page = END_PAGE
self.weixin_url = REFER_FIRST
self.driver = Firefox()
self.client = MongoClient(HOST, PORT)
self.collection = self.client[DB][COLLECTION]
self.all_uids = self.uids
def open_weixin_browser(self, word):
try:
self.driver.get(self.weixin_url)
self.driver.set_page_load_timeout(3)
self.driver.find_element_by_id('upquery').send_keys(word)
self.driver.find_element_by_class_name('swz').click()
self.driver.implicitly_wait(3)
urls_uids = self.extract_urls_uids(word=word)
Article(urls_uids=urls_uids, word=word).extract()
except Exception as e:
storage_word.append([word, 0])
self.logger.info('Open weixin error: type <{}>, mag <{}>'.format(e.__class__, e))
self.close_browser()
return True
return False
def get_total_pages_to_word(self):
pages = []
page_id_css = 'pagebar_container'
try:
e = self.driver.find_element_by_id(page_id_css)
for _p in e.text.split():
_p = _p.strip()
if not _p.isdigit():
return pages[-1]
else:
pages.append(int(_p))
return 1
except (NoSuchElementException, NoSuchWindowException, TypeError, IndexError):
pass
def get_query_words(self):
query_words = []
for docs in self.collection.find({}, {'rel': 1, 'conp': 1}).sort([('_id', 1)]):
w = docs['conp']
if w not in query_words:
query_words.append(w)
for item in docs['rel']:
if item not in query_words:
query_words.append(item)
self.client.close()
return query_words
@property
def uids(self):
return {docs['uid'] for docs in in_collection.find({}, {'uid': 1}) if 'uid' in docs}
def extract_urls_uids(self, word):
urls_uids = []
timestamp = [_t.get_attribute('t') for _t in self.driver.find_elements_by_css_selector('div.s-p')]
urls_tits = [(t.get_attribute('href'), self.trim(t.text))
for t in self.driver.find_elements_by_css_selector('h4 a')]
if len(urls_tits) != len(timestamp):
return urls_uids
for index, url_tit in enumerate(urls_tits):
try:
uid = self.md5(timestamp[index] + url_tit[1] + word)
if uid not in self.all_uids:
self.all_uids.add(uid)
urls_uids.append({'url': url_tit[0], 'uid': uid})
except (TypeError, IndexError):
pass
return urls_uids
@staticmethod
def query_index(words, cut_word):
try:
index = words.index(cut_word)
return index
except ValueError:
pass
return 0
@property
def is_forbidden(self):
css_id = 'seccodeForm'
#.........这里部分代码省略.........
示例7: scrape_section
# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_element_by_id [as 别名]
def scrape_section(browser, section_name, total_page, page_link_id_prefix, table_id):
print "scrape {0} section...".format(section_name)
records = []
for i in range(1, total_page + 1):
print "click page " + str(i) + "..."
link = browser.find_element_by_id(page_link_id_prefix + str(i))
link.click()
time.sleep(5)
table = scrape_table(browser, table_id)
for row in table:
print row
records.append(row)
return records
def scrape_pru(browser, num, parliament_total_page, state_total_page):
# records = scrape_section(browser, "parliament", parliament_total_page, "ContentPlaceHolder1_PlnkPage", "ContentPlaceHolder1_gridParliamentResul")
# with open("pru{0}_parliament.json".format(num), 'w') as f:
# json.dump(records, f)
records = scrape_section(browser, "state", state_total_page, "ContentPlaceHolder1_NlnkPage", "ContentPlaceHolder1_grdAssemblyResult")
with open("pru{0}_state.json".format(num), 'w') as f:
json.dump(records, f)
browser = Firefox()
print "open website..."
browser.get("http://www.pru13.com")
browser.implicitly_wait(30)
scrape_pru(browser, 13, 12, 33)
browser.find_element_by_id("tab_menu2").click()
scrape_pru(browser, 12, 23, 51)
示例8: __init__
# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_element_by_id [as 别名]
class BaiDuWaiMaiCrawler:
def __init__(self):
self.comment_root_url = "http://waimai.baidu.com/shopui/?qt=shopcomment&shop_id="
self.comment_root_path = "files/baiduwaimai_comments-%s.json" % datetime.now().strftime("%Y-%m-%d")
self.browser = Firefox()
self.ids = defaultdict(list)
self.crawled_ids = []
self.crawled_id_filepath = "files/crawled_ids.txt"
self.get_crawled_ids()
def __del__(self):
self.browser.quit()
def get_crawled_ids(self):
if exists(self.crawled_id_filepath):
with open(self.crawled_id_filepath, encoding="utf-8") as f:
for line in f:
self.crawled_ids.append(line.strip())
def record_crawled_id(self, shop_id):
with open(self.crawled_id_filepath, mode="a", encoding="utf-8") as f:
f.write("%s\n" % shop_id)
@staticmethod
def get_address_urls_from_file():
urls = []
pattern = re.compile("\s+")
with open("files/baiduwaimai_address_urls.txt") as f:
for line in f:
results = pattern.split(line.strip())
if len(results) >= 2:
urls.append(results[0])
print("从文件内得到所有地址的url")
return urls
def get_shop_ids_from_file(self, filepath, encoding="utf-8"):
pattern = re.compile("\s+")
with open(filepath, encoding=encoding) as f:
for line in f:
results = pattern.split(line.strip())
if len(results) >= 2:
self.ids[results[0]] = results[1].split(",")
def get_shop_ids_from_net(self):
address_urls = self.get_address_urls_from_file()
for index, url in enumerate(address_urls):
self.shop_urls_at_a_address(url, index)
def shop_urls_at_a_address(self, url, line_index):
self.browser.get(url)
self.browser.maximize_window()
for i in range(10):
self.browser.find_element_by_id("baiducopy").click()
time.sleep(2)
page_source = self.browser.page_source
# self.browser.close()
soup = BeautifulSoup(page_source, "html.parser")
if soup.find("ul", class_="shopcards-list"):
for li in soup.find("ul", class_="shopcards-list").find_all("li", class_="list-item"):
key = li.get("class")[2][4:]
address_id = str(line_index)
self.ids[key].append(address_id)
def get_comments_in_one_shop(self, shop_id):
self.browser.get("%s%s" % (self.comment_root_url, shop_id))
self.browser.maximize_window()
while True:
footer = self.browser.find_element_by_xpath("//div[@class='footer-items']")
for i in range(2):
ActionChains(self.browser).move_to_element(footer).perform()
time.sleep(1)
page_source = self.browser.page_source
soup = BeautifulSoup(page_source, "html.parser")
div = soup.find("section", "comment-list").find("div", "comment-con")
if div.find("div", class_="no-result") is not None:
break
else:
for a_div in div.find_all("div", class_="list clearfix"):
self.get_one_comment(a_div, shop_id)
try:
the_next = self.browser.find_element_by_xpath(
"//div[@class='pagination']//a[@class='mod-page-item mod-page-item-next']")
the_next.click()
time.sleep(2)
except NoSuchElementException:
break
# self.browser.close()
print("爬完ID为 '", shop_id, "' 的餐厅的评论信息。")
self.record_crawled_id(shop_id)
self.crawled_ids.append(shop_id)
def get_one_comment(self, div, shop_id):
try:
comment_info = {"shop_id": shop_id}
top_sec = div.find("div", class_="top-section").get_text("|", strip=True).split("|")
comment_info["user_name"] = top_sec[0] # a_div.find("span", class_="user-name").string.strip()
comment_info["mark"] = top_sec[1][:-1]
comment_info["delivery_time"] = top_sec[2] # a_div.find("span", class_="delivery-time").string.strip()
#.........这里部分代码省略.........
示例9: TestMaxlifeFeature
# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_element_by_id [as 别名]
class TestMaxlifeFeature(object):
"""
Checks if the maxlife feature is working
"""
def setup_class(self):
"""
Setup: Open a mozilla browser, login
"""
self.browser = Firefox()
self.browser.get('http://localhost:5000/')
token = self.browser.find_element_by_name("token")
password = "foo"
# login
token.send_keys(password)
token.send_keys(Keys.ENTER)
time.sleep(.1)
try:
self.browser.find_element_by_xpath("//input[@value='Logout']")
except NoSuchElementException:
raise ValueError("Can't login!!! Create a user 'foo' with the permissions"
"'read' and 'write' in your PERMISSIONS in the config")
def teardown_class(self):
"""
Tear down: Close the browser
"""
self.browser.quit()
def test_unit_input_exists(self):
unit_input = self.browser.find_element_by_name("maxlife-unit")
assert unit_input is not None
value_input = self.browser.find_element_by_name("maxlife-value")
assert value_input is not None
def fill_form(self):
paste_input = self.browser.find_element_by_id("formupload")
paste_input.send_keys("This is test")
filename_input = self.browser.find_element_by_id("filename")
filename_input.send_keys("test.txt")
contenttype_input = self.browser.find_element_by_id("contenttype")
contenttype_input.send_keys("text/plain")
contenttype_input.send_keys(Keys.ENTER)
def delete_current_file(self):
self.browser.find_element_by_id("del-btn").click()
time.sleep(.2)
self.browser.find_element_by_class_name("btn-primary").click()
def test_paste_keep_forever(self):
self.browser.find_element_by_xpath("//select[@name='maxlife-unit']/option[@value='forever']").click()
value_input = self.browser.find_element_by_name("maxlife-value")
value_input.clear()
value_input.send_keys(1)
self.fill_form()
assert "max life" not in self.browser.find_element_by_tag_name("body").text.lower()
self.delete_current_file()
def test_paste_keep_minutes(self):
self.browser.find_element_by_xpath("//select[@name='maxlife-unit']/option[@value='minutes']").click()
value_input = self.browser.find_element_by_name("maxlife-value")
value_input.clear()
value_input.send_keys(1)
self.fill_form()
assert "max time" in self.browser.find_element_by_tag_name("body").text.lower()
self.delete_current_file()
@pytest.mark.slow
def test_file_gets_deleted(self):
self.browser.find_element_by_xpath("//select[@name='maxlife-unit']/option[@value='minutes']").click()
value_input = self.browser.find_element_by_name("maxlife-value")
value_input.clear()
value_input.send_keys(1)
self.fill_form()
time.sleep(61)
self.browser.find_element_by_id("inline-btn").click()
assert "not found" in self.browser.find_element_by_tag_name("body").text.lower()
示例10: NewTrainingData
# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_element_by_id [as 别名]
class NewTrainingData(LiveServerTestCase) :
def setUp(self) :
# TODO setup lists with test data here, and make the 2 methods user_sees and check_entered accept these lists
self.browser = Firefox()
self.browser.implicitly_wait(3)
def tearDown(self) :
self.browser.quit()
def user_sees_inputfields_and_enters_data(self, distance, executed_time, in_zone, average_heart_rate) :
# user sees 4 edit boxes to enter distance, executed time, in zone and average HR
distance_editbox = self.browser.find_element_by_name('distance')
self.assertIsNotNone(distance_editbox)
executed_time_editbox = self.browser.find_element_by_name('executed_time')
self.assertIsNotNone(executed_time_editbox)
in_zone_editbox = self.browser.find_element_by_name('in_zone')
self.assertIsNotNone(in_zone_editbox)
average_heart_rate_editbox = self.browser.find_element_by_name('average_heart_rate')
self.assertIsNotNone(average_heart_rate_editbox)
# TODO: user sees km, bpm, .... next to the edit boxes
# user sees a submit button with the text 'submit' on it
submit_button = self.browser.find_element_by_id('submit_button')
self.assertEqual(submit_button.get_attribute('value'), 'submit')
# user enters data in the 4 fields an presses submit
# TODO : user sees he gets redirected to the same home url
distance_editbox.send_keys(distance)
executed_time_editbox.send_keys(executed_time)
in_zone_editbox.send_keys(in_zone)
average_heart_rate_editbox.send_keys(average_heart_rate)
submit_button.submit()
def check_entered_data_on_screen(self, data) :
# user sees a table and an entry in a table on the page with the entered data
try :
table_rows = self.browser.find_elements_by_tag_name('tr')
except StaleElementReferenceException :
# wait for all the elements to be attached to the DOM (stale exception selenium)
self.browser.implicitly_wait(3)
table_rows = self.browser.find_elements_by_tag_name('tr')
# user checks if the number of rows entered equals the number of rows displayed in the table
self.assertEqual(len(table_rows), len(data), 'not all data records are in the DB')
for table_row in table_rows :
self.assertIn(table_row.text, data, 'runners record wrong data or not in DB')
def test_enter_training_data(self) :
# user gets the url
self.browser.get(self.live_server_url)
# user enters a first set of data and checks if the data is receptioned by the system
self.user_sees_inputfields_and_enters_data('9', '00:46:48', '00:38:42', '162')
# user sees the row in the table on the page matching the data entered
self.check_entered_data_on_screen(['9.0 0:46:48 0:38:42 162'])
# user enters a second set of data checks if the data is receptioned by the system
self.user_sees_inputfields_and_enters_data('14.182', '01:08:53', '00:52:23', '159')
# user sees both rows in the table on the page matching the data from the second training session
self.check_entered_data_on_screen(['9.0 0:46:48 0:38:42 162', '14.182 1:08:53 0:52:23 159'])
# user closes the browser and reopens, to see his previously entered data
self.tearDown()
self.setUp()
# user gets the url
self.browser.get(self.live_server_url)
self.check_entered_data_on_screen(['9.0 0:46:48 0:38:42 162', '14.182 1:08:53 0:52:23 159'])
def test_web_page_is_loaded(self) :
# user gets the url
self.browser.get(self.live_server_url)
# user sees the title in the browser window
self.assertIn('Runners Log', self.browser.title)
示例11: append_df
# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_element_by_id [as 别名]
def append_df(a, b):
return a.append(b)
def save_df_list(p_df_list, filename):
big_df = reduce(append_df, p_df_list, pd.DataFrame())
big_df.to_csv(filename, index=None, encoding='utf-8')
if __name__ == '__main__':
driver = Firefox()
azure_linux = 'http://azure.microsoft.com/en-us/pricing/details/virtual-machines/#Linux'
driver.get(azure_linux)
ddlCurrency = Select(driver.find_element_by_id('wa-dropdown-currency'))
ddlCurrency.select_by_value('USD')
ddlRegion = Select(driver.find_element_by_id('wa-dropdown-region'))
pricing_list = []
suse_list = []
# Iterate over all Azure compute regions
for opt in ddlRegion.options:
current_region = opt.get_attribute('value')
ddlRegion.select_by_value(current_region)
html_doc = driver.page_source
soup = BeautifulSoup(html_doc, 'html.parser')
# All info is in the same page, we can exract it in a single pass
div = soup.find('div', attrs={'class': 'wa-tabs-container'})
div_active = div.find('div', attrs={'class': 'active'}, recursive=False)
示例12: print
# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_element_by_id [as 别名]
current_url = browser.current_url.split('&')[0]
link = current_url != draft_results_url
try:
username_login_box = browser.find_element_by_name("username")
username_login_box.send_keys(username)
except NoSuchElementException:
print('No username element') # log
try:
password_login_box = browser.find_element_by_name("password")
password_login_box.send_keys(password)
except NoSuchElementException:
print('No password element') # log
try:
button = browser.find_element_by_id("login-signin")
button.click()
except NoSuchElementException:
print('No button element')
browser.get(draft_results_url)
sleep(delay)
break
soup = BeautifulSoup(browser.page_source, 'html.parser')
players = soup.select('span[title="This player is a keeper."]')
players = [keeper(player)
for player in players]
keepers[season.year] = players
if set(season.draft_years) <= keepers.keys():
for y in season.draft_years:
示例13: RedFin
# 需要导入模块: from selenium.webdriver import Firefox [as 别名]
# 或者: from selenium.webdriver.Firefox import find_element_by_id [as 别名]
#.........这里部分代码省略.........
except:
property_data['beds'] = 'N/A';print('beds not found')
try:
property_data['baths'] = self.soup.find('div', attrs={'data-rf-test-id': 'abp-baths'}).find(
'div').get_text()
except:
property_data['baths'] = 'N/A';print('baths not found')
try:
property_data['sqFt'] = self.soup.find('div', attrs={'data-rf-test-id': 'abp-sqFt'}).find('span', attrs={
'class': 'main-font statsValue'}).get_text()
except:
property_data['sqFt'] = 'N/A';print('sqFt not found')
try:
property_data['price_per_sqFt'] = self.soup.find('div', attrs={'data-rf-test-id': 'abp-sqFt'}).find('div',
attrs={
"data-rf-test-id": "abp-priceperft"}).get_text()
except:
property_data['price_per_sqFt'] = 'N/A';print('price_per_sqFt not found')
try:
property_data['year_built'] = self.soup.find('span', attrs={"data-rf-test-id": "abp-yearBuilt"}).find(
'span', attrs={'class': 'value'}).get_text()
except:
property_data['year_built'] = 'N/A';print('year_built not found')
try:
property_data['days_on_redfin'] = self.soup.find('span',
attrs={"data-rf-test-id": "abp-daysOnRedfin"}).find('span',
attrs={
'class': 'value'}).get_text()
except:
property_data['days_on_redfin'] = 'N/A';print('days_on_redfin not found')
try:
property_data['status'] = self.soup.find('span', attrs={"data-rf-test-id": "abp-status"}).find('span',
attrs={
'class': 'value'}).get_text()
except:
property_data['status'] = 'N/A';print('status not found')
property_data['summary'] = self.soup.find('div', attrs={'class': 'remarks'}).get_text()
for row in self.soup.find('div', attrs={'class': 'more-info-div'}).find_all('tr'):
cells = row.find_all('td')
property_data[cells[0].get_text().strip()] = cells[1].get_text().strip()
# use loops to maintain data structure ina dict
property_data['property_details'] = OrderedDict()
for category in self.soup.find('div', attrs={'class': 'amenities-container'}).children:
key = category.contents[0].get_text().strip()
property_data['property_details'][key] = OrderedDict()
for row in category.contents[1].find_all('div', attrs={'class': 'amenity-group'}):
key2 = row.find('h4').get_text()
property_data['property_details'][key][key2] = []
for row2 in row.find_all('li'):
property_data['property_details'][key][key2].append(row2.get_text())
property_data['propert_history'] = []
for row in self.soup.find_all('tr', attrs={'id': reg_property_history_row}):
data_cells = row.find_all('td')
history_data_row = OrderedDict()
history_data_row['date'] = data_cells[0].get_text()
history_data_row['event & source'] = data_cells[1].get_text()
history_data_row['price'] = data_cells[2].get_text()
history_data_row['appreciation'] = data_cells[3].get_text()
property_data['propert_history'].append(history_data_row)
property_data['url'] = 'https://www.redfin.com' + property_url
self.output_data.append(property_data)
return property_data
def use_browser(self):
self.use_selenium = True
firefox_profile = FirefoxProfile()
# might as well turn off images since we don't need them
if self.use_proxies:
# if use proxies is true load firefox with proxies
firefox_profile.set_preference("permissions.default.image", 2)
proxy_host, proxy_port = choice(self.proxies).split(':')
firefox_profile.set_preference("network.proxy.type", 1)
firefox_profile.set_preference("network.proxy.http", proxy_host)
firefox_profile.set_preference("network.proxy.http_port", int(proxy_port))
firefox_profile.set_preference("network.proxy.ssl", proxy_host)
firefox_profile.set_preference("network.proxy.ssl_port", int(proxy_port))
self.driver = Firefox(firefox_profile)
self.driver.implicitly_wait(2)
def get_page_selenium(self, page_url):
self.driver.get(page_url)
self.selenium_bypass_captcha()
return self.driver.page_source
def selenium_bypass_captcha(self):
# basic code for handling captcha
# this requires the user to actually solve the captcha and then continue
try:
self.driver.switch_to_frame(self.driver.find_element_by_xpath('//iframe[@title="recaptcha widget"]'))
self.driver.find_element_by_class_name('recaptcha-checkbox-checkmark').click()
print('solve captcha ( pop up only ) and press enter to continue')
raw_input()
self.driver.switch_to_default_content()
self.driver.find_element_by_id('submit').click()
except Exception as e:
pass