本文整理匯總了Python中Classes.CleanText.CleanText類的典型用法代碼示例。如果您正苦於以下問題:Python CleanText類的具體用法?Python CleanText怎麽用?Python CleanText使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了CleanText類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: getLeads
def getLeads(self):
titleDivs = self.driver.find_elements_by_xpath("//h3[not(ancestor::div[@id='scholarship_intro_859'])]")
for i in range(len(titleDivs)):
title = titleDivs[i].get_attribute('textContent')
requirements = ''
sourceWebsite = ''
description = ''
if title != 'Quick Links' and title != 'About Us':
if i == 0:
description = self.driver.find_element_by_xpath("//div[@class='intro']/p").get_attribute(
'textContent')
sourceWebsite = self.driver.find_element_by_xpath("//div[@class='intro']/p/a").get_attribute('href')
requirements = self.driver.find_element_by_xpath(
"//div[@class='intro']/following-sibling::*[1][self::ul]").get_attribute('textContent')
else:
j = i + 1
if self.checkIfElementExists(
"//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[1]" % j):
description = self.driver.find_element_by_xpath(
"//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[1]" % j).get_attribute(
'textContent')
if self.checkIfElementExists(
"//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[2][(preceding-sibling::*[1][self::p])]" % j):
requirements = self.driver.find_element_by_xpath(
"//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[2][(preceding-sibling::*[1][self::p])]" % j).get_attribute(
'textContent')
if self.checkIfElementExists(
"//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[1]/a" % j):
sourceWebsite = self.driver.find_element_by_xpath(
"//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[1]/a" % j).get_attribute(
'href')
elif self.checkIfElementExists(
"//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[2][(preceding-sibling::*[1][self::p])]" % j):
if self.checkIfElementExists(
"//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[2][(preceding-sibling::*[1][self::p])]/a" % j):
sourceWebsite = self.driver.find_element_by_xpath(
"//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[2][(preceding-sibling::*[1][self::p])]/a" % j).get_attribute(
'href')
sourceText = RipPage.getPageSource(sourceWebsite)
title = CleanText.cleanALLtheText(title)
description = CleanText.cleanALLtheText(description)
requirements = CleanText.cleanALLtheText(requirements)
sourceText = CleanText.cleanALLtheText(sourceText)
leadArray = [title, description, requirements, sourceWebsite, sourceText]
self.teacherDotOrgLeadArrays.append(leadArray)
self.driver.close()
return self.teacherDotOrgLeadArrays
示例2: getTitleAbstractList
def getTitleAbstractList(self):
wholeList = []
titles = self.getTitles()
abstracts = self.getAbstracts()
for i in range(len(abstracts)):
abstract = CleanText.cleanALLtheText(abstracts[i])
title = CleanText.cleanALLtheText(titles[i])
listOfItems = [title, abstract]
wholeList.append(listOfItems)
return wholeList
示例3: getResultPageInfo
def getResultPageInfo(self):
sponsor = CleanText.cleanALLtheText(self.driver.find_element_by_xpath(
"//div/p/strong[text() = 'Awarded By']/../../following-sibling::div/p").get_attribute('textContent'))
awardAmount = CleanText.cleanALLtheText(self.driver.find_element_by_xpath(
"//div/p/strong[text() = 'Award Amount']/../../following-sibling::div/p").get_attribute('textContent'))
recipients = CleanText.cleanALLtheText(self.driver.find_element_by_xpath(
"//div/p/strong[text() = 'Recipients']/../../following-sibling::div/p").get_attribute('textContent'))
requirements = CleanText.cleanALLtheText(self.driver.find_element_by_xpath(
"//div/p/strong[text() = 'Requirements']/../../following-sibling::div").get_attribute('textContent'))
additionalInfo = CleanText.cleanALLtheText(self.driver.find_element_by_xpath(
"//div/p/strong[text() = 'Additional Information']/../../following-sibling::div/p").get_attribute(
'textContent'))
contact = CleanText.cleanALLtheText(self.driver.find_element_by_xpath(
"//div/p/strong[text() = 'Contact']/../../following-sibling::div/p").get_attribute('textContent'))
address = CleanText.cleanALLtheText(self.driver.find_element_by_xpath(
"//div/p/strong[text() = 'Address']/../../following-sibling::div").get_attribute('textContent'))
if self.checkIfElementExists("//a[@class='button secondary']"):
sourceWebsite = self.driver.find_element_by_xpath("//a[@class='button secondary']").get_attribute('href')
sourceText = CleanText.cleanALLtheText(RipPage.getPageSource(sourceWebsite))
else:
sourceWebsite = ''
sourceText = ''
resultPageArray = [sponsor, awardAmount, recipients, requirements, additionalInfo, contact, address,
sourceWebsite, sourceText]
return resultPageArray
示例4: getResultPageInfo
def getResultPageInfo(self):
url = self.driver.current_url
sponsor = ''
awardAmount = ''
recipients = ''
requirements = ''
additionalInfo = ''
contact = ''
address = ''
deadlineInformation = ''
if self.checkIfElementExists("//div/p/strong[text() = 'Awarded By']/../../following-sibling::div/p"):
sponsor = CleanText.cleanALLtheText(self.driver.find_element_by_xpath(
"//div/p/strong[text() = 'Awarded By']/../../following-sibling::div/p").get_attribute(
'textContent'))
sponsor = re.sub('» More Info', '', sponsor)
if self.checkIfElementExists("//div/p/strong[text() = 'Award Amount']/../../following-sibling::div/p"):
awardAmount = CleanText.cleanALLtheText(self.driver.find_element_by_xpath(
"//div/p/strong[text() = 'Award Amount']/../../following-sibling::div/p").get_attribute(
'textContent'))
if self.checkIfElementExists("//div/p/strong[text() = 'Recipients']/../../following-sibling::div/p"):
recipients = CleanText.cleanALLtheText(self.driver.find_element_by_xpath(
"//div/p/strong[text() = 'Recipients']/../../following-sibling::div/p").get_attribute(
'textContent'))
if self.checkIfElementExists("//div/p/strong[text() = 'Requirements']/../../following-sibling::div"):
requirements = CleanText.cleanALLtheText(self.driver.find_element_by_xpath(
"//div/p/strong[text() = 'Requirements']/../../following-sibling::div").get_attribute(
'textContent'))
if self.checkIfElementExists(
"//div/p/strong[text() = 'Additional Information']/../../following-sibling::div/p"):
additionalInfo = CleanText.cleanALLtheText(self.driver.find_element_by_xpath(
"//div/p/strong[text() = 'Additional Information']/../../following-sibling::div/p").get_attribute(
'textContent'))
if self.checkIfElementExists("//div/p/strong[text() = 'Contact']/../../following-sibling::div/p"):
contact = CleanText.cleanALLtheText(self.driver.find_element_by_xpath(
"//div/p/strong[text() = 'Contact']/../../following-sibling::div/p").get_attribute('textContent'))
if self.checkIfElementExists("//div/p/strong[text() = 'Address']/../../following-sibling::div"):
address = CleanText.cleanALLtheText(self.driver.find_element_by_xpath(
"//div/p/strong[text() = 'Address']/../../following-sibling::div").get_attribute('textContent'))
if self.checkIfElementExists(
"//strong[text() ='Deadline Information']/following-sibling::span[@class='smalltext']"):
deadlineInformation = CleanText.cleanALLtheText(self.driver.find_element_by_xpath(
"//strong[text() ='Deadline Information']/following-sibling::span[@class='smalltext']").get_attribute(
'textContent'))
if self.checkIfElementExists("//a[@class='button cta']"):
sourceWebsite = self.driver.find_element_by_xpath("//a[@class='button cta']").get_attribute('href')
sourceText = CleanText.cleanALLtheText(RipPage.getPageSource(sourceWebsite))
else:
sourceWebsite = ''
sourceText = ''
resultPageArray = [url, sponsor, awardAmount, recipients, requirements, additionalInfo, contact, address,
deadlineInformation, sourceWebsite, sourceText]
return resultPageArray
示例5: test_getListConcatenatedDescriptionEligibility
def test_getListConcatenatedDescriptionEligibility(self):
# set up
db = SUDBConnect()
keyword = 'East Asian Studies'
testListConcatenatedDescriptionEligibility = GrantForwardItemsGetDatabaseInfo(
keyword=keyword).getListStringConcatenatedDescriptionEligibility()
firstCombo = testListConcatenatedDescriptionEligibility[0]
# test
rows = db.getRowsDB("select * from dbo.GrantForwardItems where Keyword='" + keyword + "'")
description = CleanText.cleanALLtheText(rows[0].Description)
eligibility = CleanText.cleanALLtheText(rows[0].Eligibility)
testCombo = '%s %s' % (description, eligibility)
self.assertEqual(testCombo, firstCombo)
示例6: getListofListofItems
def getListofListofItems():
titles = GetPivotTagsTitleAbstractEligibility.getTitles()
abstracts = GetPivotTagsTitleAbstractEligibility.getAbstracts()
eligibilities = GetPivotTagsTitleAbstractEligibility.getEligibilities()
wholeList = []
for i in range(len(abstracts)):
abstract = CleanText.cleanALLtheText(abstracts[i])
eligibility = CleanText.cleanALLtheText(eligibilities[i])
title = CleanText.cleanALLtheText(titles[i])
listOfItems = [title, abstract, eligibility]
wholeList.append(listOfItems)
return wholeList
示例7: getLeads
def getLeads(self):
self.expandSeeMore()
arrayOfAmountObjects = self.driver.find_elements_by_xpath(
"//div[@class='amount']/span[@data-bind='text: Aequitas.toCurrency(DollarAmount)']")
arrayOfTitleObjects = self.driver.find_elements_by_xpath(
"//h4[@data-bind='text: $parent.resultLayout ? shortTitle : Title']")
arrayOfDeadlineObjects = self.driver.find_elements_by_xpath(
"//h4[@data-bind='text: $parent.resultLayout ? shortTitle : Title']")
titlesList = self.getTitlesList(arrayOfTitleObjects)
amountsList = self.getAmountsList(arrayOfAmountObjects)
deadlinesList = self.getDeadlinesList(arrayOfDeadlineObjects)
for i in range(len(titlesList)):
title = CleanText.cleanALLtheText(titlesList[i])
amount = CleanText.cleanALLtheText(amountsList[i])
deadline = CleanText.cleanALLtheText(deadlinesList[i])
self.driver.get(self.base_url + 'match/scholarshipresult')
self.driver.implicitly_wait(2)
self.expandSeeMore()
arrayOfClickResultObjects = self.driver.find_elements_by_xpath(
"//a[@data-bind='click: function(scholarship, event) { $parent.showScholarshipDetail(scholarship, event) }']")
if arrayOfClickResultObjects[i]:
objectToClick = arrayOfClickResultObjects[i]
objectToClick.click()
self.driver.implicitly_wait(2)
resultPageArray = self.getResultPageInfo()
sponsor = resultPageArray[0]
awardAmount = resultPageArray[1]
recipients = resultPageArray[2]
requirements = resultPageArray[3]
additionalInfo = resultPageArray[4]
contact = resultPageArray[5]
address = resultPageArray[6]
sourceWebsite = resultPageArray[7]
sourceText = resultPageArray[8]
leadArray = [title, amount, deadline, sponsor, awardAmount, recipients, requirements, additionalInfo,
contact, address, sourceWebsite, sourceText]
self.unigoLeadsArray.append(leadArray)
self.driver.quit()
return self.unigoLeadsArray
示例8: getTitlesList
def getTitlesList(self):
titleDivs = self.driver.find_elements_by_xpath("//h2[@class='col-xs-12']")
titlesList = [titleDiv.get_attribute('textContent') for titleDiv in titleDivs]
titlesList = [CleanText.cleanALLtheText(title) for title in titlesList]
return titlesList
示例9: goToResultPageAndPullInformation
def goToResultPageAndPullInformation(self, resultPageLink):
self.driver.get(resultPageLink)
self.driver.implicitly_wait(2)
description = ''
sponsor = ''
amount = ''
eligibility = ''
submissionInfo = ''
categories = ''
sourceWebsite = ''
sourceText = ''
deadline = ''
if self.checkIfElementExists("//div[@id = 'field-description']/div[@class = 'content-collapsed']"):
description = self.driver.find_element_by_xpath(
"//div[@id = 'field-description']/div[@class = 'content-collapsed']").get_attribute('textContent')
description = CleanText.cleanALLtheText(description)
if self.checkIfElementExists("//div[@class = 'sponsor-content']/div/a"):
sponsor = self.driver.find_element_by_xpath("//div[@class = 'sponsor-content']/div/a").get_attribute(
'textContent')
sponsor = CleanText.cleanALLtheText(sponsor)
if self.checkIfElementExists("//div[@id = 'field-amount_info']/div[@class = 'content-collapsed']"):
amount = self.driver.find_element_by_xpath(
"//div[@id = 'field-amount_info']/div[@class = 'content-collapsed']").get_attribute('textContent')
amount = CleanText.cleanALLtheText(amount)
if self.checkIfElementExists("//div[@id = 'field-eligibility']/div[@class = 'content-collapsed']"):
eligibility = self.driver.find_element_by_xpath(
"//div[@id = 'field-eligibility']/div[@class = 'content-collapsed']").get_attribute('textContent')
eligibility = CleanText.cleanALLtheText(eligibility)
if self.checkIfElementExists("//div[@id = 'field-submission_info']/div[@class = 'content-collapsed']"):
submissionInfo = self.driver.find_element_by_xpath(
"//div[@id = 'field-submission_info']/div[@class = 'content-collapsed']").get_attribute('textContent')
submissionInfo = CleanText.cleanALLtheText(submissionInfo)
if self.checkIfElementExists("//div[@id = 'field-subjects']/ul"):
categories = self.driver.find_element_by_xpath("//div[@id = 'field-subjects']/ul").get_attribute(
'textContent')
categories = CleanText.cleanALLtheText(categories)
if self.checkIfElementExists("//a[@class = 'source-link btn btn-warning']"):
sourceWebsite = self.driver.find_element_by_xpath(
"//a[@class = 'source-link btn btn-warning']").get_attribute('href')
sourceText = CleanText.cleanALLtheText(RipPage.getPageSource(sourceWebsite))
if self.checkIfElementExists("//div[@class='table-responsive deadline-tables']/table/tbody"):
deadline = self.driver.find_element_by_xpath(
"//div[@class='table-responsive deadline-tables']/table/tbody").get_attribute('textContent')
deadline = CleanText.cleanALLtheText(deadline)
resultPageInfo = [description, sponsor, amount, eligibility, submissionInfo, categories, sourceWebsite,
sourceText, deadline]
return resultPageInfo
示例10: test_ListOfItemsList
def test_ListOfItemsList(self):
# set up
db = SUDBConnect()
keyword = 'Accounting'
testListTitleAbstractEligibilityPivotId = PivotLeadsGetDatabaseInfo(
keyword).getTitleAbstractList()
firstList = testListTitleAbstractEligibilityPivotId[0]
testTitle = firstList[0]
testAbstract = firstList[1]
# test
rows = db.getRowsDB("select * from dbo.PivotLeads where Keyword='" + keyword + "'")
title = CleanText.cleanALLtheText(rows[0].Name)
abstract = CleanText.cleanALLtheText(rows[0].Abstract)
self.assertEqual(title, testTitle)
self.assertEqual(abstract, testAbstract)
示例11: getAmountsList
def getAmountsList(self):
amountsList = []
amountsDivs = self.driver.find_elements_by_xpath("//div[@class='amount']")
for amountDiv in amountsDivs:
amountsList.append(CleanText.cleanALLtheText(re.sub('Amount', '', amountDiv.get_attribute('textContent'))))
return amountsList
示例12: getTitlesList
def getTitlesList(self):
titlesList = []
titlesDivs = self.driver.find_elements_by_xpath("//div[@class='main-details clearfix']/h2/a")
for title in titlesDivs:
titlesList.append(CleanText.cleanALLtheText(title.get_attribute('textContent')))
return titlesList
示例13: test_eligibilitiesList
def test_eligibilitiesList(self):
dbinfo = GetDatabaseInfoScholarshipsWithClassStatuses('Senior')
self.assertIsNotNone(dbinfo)
eligibilitesList = dbinfo.getEligibilitiesList()
self.assertIsNotNone(eligibilitesList)
testEligibility = eligibilitesList[0]
testCleanText = CleanText.cleanALLtheText(testEligibility)
self.assertIsNotNone(testCleanText)
示例14: test_ListOfItemsList
def test_ListOfItemsList(self):
# set up
db = SUDBConnect()
keyword = 'Accounting'
testListTitleDescriptionEligibilityPivotId = GrantForwardItemsGetDatabaseInfo(
keyword).getTitleDescriptionList()
firstList = testListTitleDescriptionEligibilityPivotId[0]
testTitle = firstList[0]
testDescription = firstList[1]
# test
rows = db.getRowsDB("select * from dbo.GrantForwardItems where Keyword='" + keyword + "'")
title = CleanText.cleanALLtheText(rows[0].Name)
description = CleanText.cleanALLtheText(rows[0].Description)
self.assertEqual(title, testTitle)
self.assertEqual(description, testDescription)
示例15: test_scholarshipsDescriptionsList
def test_scholarshipsDescriptionsList(self):
dbinfo = GetDatabaseInfoScholarshipsWithClassStatuses('Junior')
self.assertIsNotNone(dbinfo)
descriptionsList = dbinfo.getScholarshipDescriptionsList()
self.assertIsNotNone(descriptionsList)
testDescription = descriptionsList[0]
testCleanText = CleanText.cleanALLtheText(testDescription)
self.assertIsNotNone(testCleanText)