本文整理汇总了Python中ghost.Ghost.wait_for_text方法的典型用法代码示例。如果您正苦于以下问题:Python Ghost.wait_for_text方法的具体用法?Python Ghost.wait_for_text怎么用?Python Ghost.wait_for_text使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类ghost.Ghost
的用法示例。
在下文中一共展示了Ghost.wait_for_text方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: download
# 需要导入模块: from ghost import Ghost [as 别名]
# 或者: from ghost.Ghost import wait_for_text [as 别名]
def download():
from ghost import Ghost
ghost = Ghost(wait_timeout=timeout, download_images=False, display=False)
ghost.open(url)
ghost.click(b"#fn_stat")
ghost.wait_for_text("总播放数:")
return ghost.content
示例2: download
# 需要导入模块: from ghost import Ghost [as 别名]
# 或者: from ghost.Ghost import wait_for_text [as 别名]
def download(url, timeout = 30):
from ghost import Ghost
ghost = Ghost(wait_timeout=timeout, download_images=False, display=False)
ghost.open(url)
ghost.click(b'.playNums')
ghost.wait_for_text("播放量数据")
content = ghost.content
ghost.exit()
return content
示例3: GAGetter
# 需要导入模块: from ghost import Ghost [as 别名]
# 或者: from ghost.Ghost import wait_for_text [as 别名]
class GAGetter(object):
def __init__(self, email, password):
self.email = email
self.password = password
self.ghost = Ghost(wait_timeout=20)
def test(self):
self.sign_in()
self.go_to_realtime_site()
sleep(10)
print 'COUNTER: {0}'.format(self.get_counter())
def sign_in(self):
page, resources = self.ghost.open(
'http://www.google.com/analytics/index.html')
self.ghost.wait_for_text('Analytics')
self.ghost.wait_for_selector('a.secondary-button')
self.ghost.click('a.secondary-button')
self.ghost.wait_for_text("Can't access your account?")
self._fill_signin_form()
self.ghost.wait_for_text('All Accounts')
def _fill_signin_form(self):
result, resources = self.ghost.fill(
"form", {
"Email": self.email,
"Passwd": self.password
}
)
page, resources = self.ghost.fire_on(
"form", "submit", expect_loading=True)
self.ghost.wait_for_page_loaded()
def go_to_realtime_site(self):
m = re.search(
r"#report/visitors-overview/([a-z0-9]+)/",
self.ghost.content
)
self.ghost.evaluate(
"window.location.href = '#realtime/rt-overview/{0}/'".format(
m.group(1)
)
)
self.ghost.wait_for_text('Right now')
return self.ghost.content
def get_counter(self):
d = pq(self.ghost.content)
return d('#ID-overviewCounterValue').html()
示例4: WxGhost
# 需要导入模块: from ghost import Ghost [as 别名]
# 或者: from ghost.Ghost import wait_for_text [as 别名]
class WxGhost(object):
def __init__(self):
self.ghost = Ghost(log_level=logging.CRITICAL).start()
self.ghost.download_images = False
try:
self.ghost.load_cookies("cookie.txt")
print 'load cookie'
except IOError:
print 'load cookie error'
self.ghost.show()
def handle_frequency(self):
if u"您的访问过于频繁" in self.ghost.content:
print 'frequency'
self.ghost.show()
self.ghost.capture_to("seccode.png", selector="#seccodeImage")
self.ghost.wait_for_text(u'以下内容来自微信公众号', timeout=1800) # 输入验证码
self.ghost.save_cookies("cookie.txt")
def open(self, url):
try:
self.ghost.open(url)
self.handle_frequency()
except TimeoutError:
print 'timeout when open'
return False
return True
def evaluate(self, js, expect_loading=True):
try:
self.ghost.evaluate(js, expect_loading=expect_loading)
self.handle_frequency()
except TimeoutError:
return False
return True
def sleep(self, value):
self.ghost.sleep(value)
def get_lxml(self):
return lxml.html.fromstring(self.ghost.content)
示例5: RunExport
# 需要导入模块: from ghost import Ghost [as 别名]
# 或者: from ghost.Ghost import wait_for_text [as 别名]
def RunExport():
ghost = Ghost(viewport_size=(1200, 2400), display=False, wait_timeout=30, cache_dir=CACHE_DIRECTORY)#, log_level=logging.ERROR
#
#login_password
#submit.x
#submit
page, resources = ghost.open('https://www.paypal.com/ie/cgi-bin/webscr?cmd=_login-run')
result, resources = ghost.fill("form[name=login_form]", {
"login_email": PAYPAL_USERNAME,
"login_password": PAYPAL_PASSWORD
})
page, resources = ghost.fire_on("form[name=login_form]", "submit", expect_loading=True)
result, resources = ghost.wait_for_page_loaded()
#wait for 10 seconds
#time.sleep(10)
page, resources = ghost.open('https://www.paypal.com/ie/cgi-bin/webscr?cmd=_account')
result, resources = ghost.wait_for_text("Welcome, %s" % PAYPAL_NAME)
getHistoryListing(ghost)
first_run = True
#get the next url
#print ghost.evaluate('document.querySelectorAll("#tableWrapperID .pagination:nth-child(1) a.btnLink");')[0]
nav_links_eval = """
var links = document.querySelectorAll(".pagination a.btnLink");
links.length;
"""
nav_links = ghost.evaluate(nav_links_eval)
page_count = START_AT_PAGE
transaction_count = 0
if page_count > 0:
transaction_count = page_count * 20
goToPage(ghost,page_count)
#transaction_list_url = resources[0].url
#print transaction_list_url
while nav_links[0] > 0 or first_run==True:
first_run = False
page_count = page_count + 1
filteredlisting_export = os.path.join(EXPORT_DIRECTORY,'filteredhistory%d.png' % page_count)
if not os.path.isfile(filteredlisting_export):
ghost.capture_to(filteredlisting_export, selector="body")
transaction_urls = ghost.evaluate("""
var links = document.querySelectorAll("#transactionTable tr.primary td.detailsNoPrint a");
var listRet = [];
for (var i=0; i<links.length; i++){
listRet.push(links[i].href);
}
listRet;
""")
for transaction_href in transaction_urls[0]:
transaction_count = transaction_count + 1
#print urllib.unquote(transaction_href)
page, resources = ghost.open(urllib.unquote(transaction_href))
ghost.wait_for_page_loaded()
payee_name = None
date_string = None
date = ghost.evaluate("""
document.querySelectorAll("#historyMiniLog tbody tr")[2].querySelectorAll('td')[0].innerHTML;
""")
if date and date[0]:
date_string = date[0].replace(' ','')
payee = ghost.evaluate("""
document.querySelectorAll("#historyMiniLog tbody tr")[2].querySelectorAll('td')[1].innerHTML;
""")
if payee and payee[0]:
payee_name = safeFilename(payee[0].replace(' ',''))
if payee_name and date_string:
date_object = datetime.strptime(date_string, '%d-%b-%Y')
date_string=datetime.strftime(date_object,'%Y-%m-%d')
print 'page %d transaction %d [%s - %s]' % (page_count, transaction_count, date_string, payee_name)
purchasedetails_export = os.path.join(EXPORT_DIRECTORY,'%s_%s_%s.png' % (date_string,payee_name,transaction_count ))
if not os.path.isfile(purchasedetails_export):
print '\t\tsaving to %s' % purchasedetails_export
ghost.capture_to(purchasedetails_export, selector="#xptContentMain")
else:
print '\t\tAlready saved to %s' % purchasedetails_export
else:
purchasedetails_export = os.path.join(EXPORT_DIRECTORY,'no date and payee - page-%d_ transaction %d.png' % (page_count,transaction_count ))
print '\t\tsaving to %s' % purchasedetails_export
if not os.path.isfile(purchasedetails_export):
#.........这里部分代码省略.........
示例6: __init__
# 需要导入模块: from ghost import Ghost [as 别名]
# 或者: from ghost.Ghost import wait_for_text [as 别名]
#.........这里部分代码省略.........
set crawler to use checkin/out dates from inputParams
"""
self.checkIn = checkIn
self.checkOut = checkOut
return 0
def setRootUrl(self, rootUrlName):
"""
set the root url of the target site for current crawler task
"""
if (rootUrlName == 'ctrip'):
self.rootUrl = ''
self.targetSite = rootUrlName
return 0
def getPriceRegex(self, text):
"""
"""
# testReg = 'UID=&page_id=102104&VERSION=1&Country=ӡ��������&From=���嵺&FromTime=2014-08-10&ToTime=2014-08-12&Star=5&Price=1683&HotelName=AYANA%20Resort%20and%20Spa%20Bali(%e5%b7%b4%e5%8e%98%e5%b2%9b%e9%98%bf%e9%9b%85%e5%a8%9c%e6%b0%b4%e7%96%97%e5%ba%a6%e5%81%87%e9%85%92%e5%ba%97)&CityId=723"'
reg = ur"&Price=\d+"
result = re.findall(reg, text)
regDig = ur"\d+"
result = re.findall(regDig, str(result))
return result[0]
def isNoResult(self, html):
"""
"""
priceSelectorJs = """(function () {
var element = document.querySelector(".search_noresult strong").textContent;
return element;
})();"""
result, resources = self.ghost.evaluate(priceSelectorJs);
return result
def getLowestPrice(self, html):
"""
"""
priceSelectorJs = """(function () {
var element = document.querySelector(".map_mark_price span").textContent;
return element;
})();"""
result, resources = self.ghost.evaluate(priceSelectorJs);
return result
def getLowestPriceCtrip(self, html):
"""
"""
# http://hotels.ctrip.com/international/14540.html?CheckIn=2014-08-04&CheckOut=2014-08-05&Rooms=1
# get their hotel id
domestic = 0
hotelIdContainer = 'hotel_list_item'
if (detail['country'] in ['china', 'China']):
domestic = 1
hotelIdContainer = 'searchresult_list'
hotelIdSelectorJs = """(function () {
var element = document.querySelector('.""" + hotelIdContainer + """').id
return element;
})();"""
hotelId, resources = self.ghost.evaluate(hotelIdSelectorJs);
if (hotelId == None):
return 'NF' # hotel not found
# generate hotel page url
if (domestic == 0):
detailPageUrl = self.rootUrl['ctrip'] + 'international/' + str(
hotelId) + '.html?CheckIn=' + self.checkIn + '&CheckOut=' + self.checkOut + '&Rooms=1'
else:
detailPageUrl = self.rootUrl['ctrip'] + 'hotel/' + str(
hotelId) + '.html?CheckIn=' + self.checkIn + '&CheckOut=' + self.checkOut + '&Rooms=1'
# goto hotel detail page with params using GET
self.ghost.open(detailPageUrl, wait=False)
# wait for detail_price
try:
priceContainer = '#detail_price dfn'
if (domestic == 1):
# priceContainer = '#HideIsNoneLogin'
# priceContainer = '&Price='
priceContainer = 'hotel.detail'
self.ghost.wait_for_text(priceContainer)
else:
self.ghost.wait_for_selector(priceContainer)
except Exception, e:
print '[ERROR]price cannot be found ... ' + str(detailPageUrl)
print Exception, ":", e
# self.logPage()
return 'NP' # lowest price not found, usually no room available for selected date interval
# &Price=1683&
# self.logPage()
return self.getPriceRegex(self.ghost.content)