本文整理汇总了Python中utils.Regex.Regex.isFoundPattern方法的典型用法代码示例。如果您正苦于以下问题:Python Regex.isFoundPattern方法的具体用法?Python Regex.isFoundPattern怎么用?Python Regex.isFoundPattern使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类utils.Regex.Regex
的用法示例。
在下文中一共展示了Regex.isFoundPattern方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Scrapper
# 需要导入模块: from utils.Regex import Regex [as 别名]
# 或者: from utils.Regex.Regex import isFoundPattern [as 别名]
class Scrapper(QThread):
notifyScrapper = pyqtSignal(object)
isFinished = False
def __init__(self, urllist):
QThread.__init__(self)
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
self.utils = Utils()
print urllist
self.urllist = urllist
self.csv = Csv('scrapper.csv')
def run(self):
self.scrapData()
self.notifyScrapper.emit(
'<font color=green><b>------------------ Finish! ------------------------- </b></font>')
def scrapData(self):
try:
total = 0
csvHeader = ['URL', 'Title', 'Price', 'Brand', 'Features', 'Material', 'Measurements', 'Category',
'Size', 'Color', 'Design']
self.csv.writeCsvRow(csvHeader)
if self.isFinished: return
for url in self.urllist:
if len(url) > 0:
url = self.regex.replaceData('(?i)\r', '', url)
url = self.regex.replaceData('(?i)\n', '', url)
url = self.regex.getSearchedData('(?i)(http.*?)$', url)
print 'URL: ', url
self.notifyScrapper.emit(('<font color=green><b>URL: %s</b></font>' % url))
data = self.spider.fetchData(url)
if data and len(data) > 0:
data = self.regex.reduceNewLine(data)
data = self.regex.reduceBlankSpace(data)
soup = BeautifulSoup(data)
soup.prettify()
title = ''
price = ''
size = ''
brand = ''
features = ''
material = ''
measurements = ''
category = ''
color = ''
design = ''
if soup.find('span', id='vi-lkhdr-itmTitl') is not None:
title = soup.find('span', id='vi-lkhdr-itmTitl').text
if soup.find('span', id='prcIsum'):
price = soup.find('span', id='prcIsum').text
if soup.find('div', class_='itemAttr'):
specchunk = soup.find('div', class_='itemAttr')
rows = specchunk.find_all('tr')
for row in rows:
cols = row.find_all('td')
for i in range(0, len(cols), 2):
# if self.regex.isFoundPattern('(?i)Condition:', cols[i].text.strip()):
# conditionChunk = cols[i + 1]
# conditionChunk = self.regex.replaceData(u'(?i)<span class="infoLink u-nowrap" id="readFull">.*?</span>', '', unicode(conditionChunk))
# conditionChunk = self.regex.replaceData(u'(?i)<b class="g-hdn">.*?</b>', '', conditionChunk)
# condition = BeautifulSoup(conditionChunk).text
# print condition
if self.regex.isFoundPattern('(?i)Brand:', cols[i].text.strip()):
brand = cols[i + 1].text
if self.regex.isFoundPattern('(?i)Features:', cols[i].text.strip()):
features = cols[i + 1].text
if self.regex.isFoundPattern('(?i)Material:', cols[i].text.strip()):
material = cols[i + 1].text
if self.regex.isFoundPattern('(?i)Measurements:', cols[i].text.strip()):
measurements = cols[i + 1].text
if self.regex.isFoundPattern('(?i)Category:', cols[i].text.strip()):
category = cols[i + 1].text
if self.regex.isFoundPattern('(?i)Color:', cols[i].text.strip()):
color = cols[i + 1].text
if self.regex.isFoundPattern('(?i)Design:', cols[i].text.strip()):
design = cols[i + 1].text
if self.regex.isFoundPattern('(?i)Size:', cols[i].text.strip()):
size = cols[i + 1].text
self.notifyScrapper.emit('<font color=black><b>Writting data to csv file.</b></font>')
csvData = [url, title, price, brand, features, material, measurements, category, size, color, design]
self.notifyScrapper.emit('<font color=black><b>Data: %s</b></font>' % unicode(csvData))
self.csv.writeCsvRow(csvData)
self.notifyScrapper.emit('<font color=black><b>Successfully Written data to csv file.</b></font>')
total += 1
self.notifyScrapper.emit('<font color=green><b>Total Data scrapped: [%s]</b></font>' % str(total))
except Exception, x:
self.notifyScrapper.emit('<font color=red><b>Error scrapping category: %s</b></font>' % x.message)
self.logger.error(x.message)
print x
示例2: MyLinkedInMembers
# 需要导入模块: from utils.Regex import Regex [as 别名]
# 或者: from utils.Regex.Regex import isFoundPattern [as 别名]
class MyLinkedInMembers(QThread):
notifyLinkedIn = pyqtSignal(object)
notifyMembers = pyqtSignal(object)
cookieL = pyqtSignal(object)
def __init__(self, spider, url, pageRange=None):
QThread.__init__(self)
# self.spider = Spider()
self.spider = spider
self.regex = Regex()
self.url = url
self.startPage = None
self.endPage = None
if self.regex.isFoundPattern('(?i)(\d+)-(\d+)', str(pageRange).strip()):
pageRangeFormat = self.regex.getSearchedDataGroups('(?i)(\d+)-(\d+)', str(pageRange).strip())
self.startPage = int(pageRangeFormat.group(1))
self.endPage = int(pageRangeFormat.group(2))
elif self.regex.isFoundPattern('(?i)(\d+)', str(pageRange).strip()):
pageRangeFormat = self.regex.getSearchedDataGroups('(?i)(\d+)', str(pageRange).strip())
self.startPage = int(pageRangeFormat.group(1))
self.endPage = self.startPage
def run(self):
self.getMembers(self.url)
self.notifyLinkedIn.emit('<font color=red><b>Finish scraping members.<b></font>')
def getMembers(self, url, pageNumber=0):
print 'Members URL: ' + url
self.notifyLinkedIn.emit('<font color=green><b>Start Scraping All Members.<b></font>')
self.notifyLinkedIn.emit('<b>Wait For 15 seconds Break...<b>')
time.sleep(15)
self.notifyLinkedIn.emit('<b>15 seconds Break Finish.<b>')
groupData = self.spider.fetchData(str(url).replace('&', '&'))
groupData = self.regex.reduceNewLine(groupData)
groupData = self.regex.reduceBlankSpace(groupData)
print groupData
print 'page number: ' + str(pageNumber)
if pageNumber > 0:
harvestedMembers = []
allMembers = self.regex.getAllSearchedData('(?i)<li class="member" id="member-[^"]*"[^>]*?>(.*?)</div>',
groupData)
for members in allMembers:
memberId = self.regex.getSearchedData('(?i)data-li-memberId="([^"]*)"', members)
memberName = self.regex.getSearchedData('(?i)data-li-fullName="([^"]*)"', members)
memberTitle = self.regex.getSearchedData('(?i)<p class="headline">([^<]*?)</p>', members)
memberTitle = self.regex.replaceData('(?i)&', '&', memberTitle)
harvestedMembers.append((memberId, memberName, memberTitle))
self.notifyLinkedIn.emit('<b>Member ID: </b>%s <b>Member Name: </b>%s' % (memberId, memberName + ' (' + memberTitle + ')'))
# members = self.regex.getAllSearchedData(
# '(?i)class="send-message" data-li-memberId="([^"]*)" data-li-fullName="([^"]*)"', groupData)
# print members
self.notifyMembers.emit(harvestedMembers)
# for member in members:
# print member
# self.notifyLinkedIn.emit('<b>Member Name: </b>%s <b>Member ID: </b>%s' % (member[1], member[0]))
urlNext = self.regex.getSearchedData('(?i)<a href="([^"]*)"[^>]*?>\s*?<strong>\s*?next', groupData)
if urlNext and len(urlNext) > 0:
# nextP = int(self.regex.getSearchedData('(?i).*?(\d+)$', urlNext.strip()))
urlNext = self.regex.replaceData('(?i)&', '&', urlNext)
urlNext = self.regex.replaceData('(?i)split_page=\d+', 'split_page=', urlNext)
pageNumber += 1
if self.startPage <= pageNumber <= self.endPage:
self.notifyLinkedIn.emit('<b>Wait for 15 second break...</b>')
time.sleep(15)
print 'sleep 15 s'
self.notifyLinkedIn.emit('<b>15 second break finish!!!</b>')
self.getMembers('http://www.linkedin.com' + urlNext + str(pageNumber), pageNumber)
elif pageNumber < self.startPage:
pageNumber = self.startPage
self.notifyLinkedIn.emit('<b>Wait for 15 second break...</b>')
time.sleep(15)
print 'page number less 0 sleep'
self.notifyLinkedIn.emit('<b>15 second break finish!!!</b>')
self.getMembers('http://www.linkedin.com' + urlNext + str(pageNumber), pageNumber)
if self.startPage is None and self.endPage is None:
pageNumber += 1
self.notifyLinkedIn.emit('<b>Wait for 15 second break...</b>')
time.sleep(15)
print 'page number less 0 sleep'
self.notifyLinkedIn.emit('<b>15 second break finish!!!</b>')
self.getMembers('http://www.linkedin.com' + urlNext + str(pageNumber), pageNumber)
示例3: MyLinkedIn
# 需要导入模块: from utils.Regex import Regex [as 别名]
# 或者: from utils.Regex.Regex import isFoundPattern [as 别名]
class MyLinkedIn(QThread):
notifyLinkedIn = pyqtSignal(object)
notifyMember = pyqtSignal(object)
cookieL = pyqtSignal(object)
def __init__(self, username, password):
QThread.__init__(self)
self.spider = Spider()
self.regex = Regex()
self.username = username
self.password = password
def run(self):
if self.login():
self.getAllGroups()
def login(self):
print "login start"
self.notifyLinkedIn.emit("<b>Trying to login. Please wait...</b>")
loginPageData = self.spider.fetchData("https://www.linkedin.com/uas/login?goback=&trk=hb_signin")
loginPageData = self.regex.reduceNewLine(loginPageData)
loginPageData = self.regex.reduceBlankSpace(loginPageData)
## <input type="hidden" name="session_redirect" value="" id="session_redirect-login"><input type="hidden" name="csrfToken" value="ajax:9073845200579364133" id="csrfToken-login"><input type="hidden" name="sourceAlias" value="0_7r5yezRXCiA_H0CRD8sf6DhOjTKUNps5xGTqeX8EEoi" id="sourceAlias-login">
self.sessionRedirect = self.regex.getSearchedData(
'(?i)<input type="hidden" name="session_redirect" value="([^"]*)"', loginPageData
)
self.token = self.regex.getSearchedData(
'(?i)<input type="hidden" name="csrfToken" value="([^"]*)"', loginPageData
)
self.alias = self.regex.getSearchedData(
'(?i)<input type="hidden" name="sourceAlias" value="([^"]*)"', loginPageData
)
loginParam = {
"csrfToken": self.token,
"isJsEnabled": "true",
"session_key": self.username,
"session_password": self.password,
# 'session_key': '[email protected]',
# 'session_password': 'ubuntu36',
"session_redirect": self.sessionRedirect,
"signin": "Sign In",
"sourceAlias": self.alias,
"source_app": "",
}
print loginParam
print "start login"
time.sleep(5)
loginData = self.spider.login("https://www.linkedin.com/uas/login-submit", loginParam)
loginData = self.regex.reduceNewLine(loginData)
loginData = self.regex.reduceBlankSpace(loginData)
# print loginData
isLoggedIn = self.regex.isFoundPattern('(?i)<li class="signout">', loginData)
if isLoggedIn:
self.notifyLinkedIn.emit("<font color=green><b>Successfully Logged In.</b></font>")
print "login success"
self.cookieL.emit(self.spider)
return True
else:
self.notifyLinkedIn.emit(
"<font color=red><b>Something wrong with logging in. Please try again or check manually with this username/password</b></font>"
)
return False
def getAllGroups(self):
print "start groups"
self.notifyLinkedIn.emit("<font color=green><b>Start Scraping All Groups.</b></font>")
self.notifyLinkedIn.emit("<b>Wait for 15 second break...</b>")
time.sleep(15)
self.notifyLinkedIn.emit("<b>15 second break finish!!!</b>")
self.notifyLinkedIn.emit("<font color=green><b>Fetching data for scraping your groups.</b></font>")
groupsUrl = "http://www.linkedin.com/myGroups?trk=hb_side_grps_top"
groupsData = self.spider.fetchData(groupsUrl)
self.notifyLinkedIn.emit("<font color=green><b>Data fetching complete for scraping your groups.</b></font>")
if groupsData is not None and len(groupsData) > 0:
print "starting groups"
groupsData = self.regex.reduceNewLine(groupsData)
groupsData = self.regex.reduceBlankSpace(groupsData)
print groupsData
## <a href="/groups?gid=72881&trk=myg_ugrp_ovr" class="private" title="This group is members only">MySQL Professionals</a>
groupInfo = self.regex.getAllSearchedData('(?i)<a href="(/groups\?gid=[^"]*)"[^>]*>([^<]*)</a>', groupsData)
if groupInfo is not None and len(groupInfo) > 0:
members = []
for group in groupInfo:
groupUrl = "http://www.linkedin.com" + str(group[0])
groupName = str(group[1])
self.notifyLinkedIn.emit("<b>Group Name: </b>%s <b>URL: </b>%s" % (groupName, groupUrl))
# http://www.linkedin.com/groups?members=&gid=65688&trk=anet_ug_memb
gid = self.regex.getSearchedData("(?i)gid=(\d+)", group[0])
print gid
groupUrl = "http://www.linkedin.com/groups?members=&gid=" + gid + "&trk=anet_ug_memb"
members.append((groupName, groupUrl))
self.notifyMember.emit(members)
self.notifyLinkedIn.emit("<font color=red><b>Finish Scraping All Groups.</b></font>")
示例4: YoutubeScrapper
# 需要导入模块: from utils.Regex import Regex [as 别名]
# 或者: from utils.Regex.Regex import isFoundPattern [as 别名]
class YoutubeScrapper(object):
def __init__(self):
self.logger = LogManager(__name__)
self.spider = Spider()
self.regex = Regex()
self.utils = Utils()
def scrapVideoDownloadUrl(self, url, filename=None):
data = self.spider.fetchData(url)
if data and len(data) > 0:
title = self.scrapTitle(url)
data = self.regex.reduceNewLine(data)
data = self.regex.reduceBlankSpace(data)
dlUrlChunk = self.regex.getSearchedData('(?i)"url_encoded_fmt_stream_map": "([^"]*)"', data)
dlUrlChunk = self.regex.replaceData('(?i)\\\\u0026', ' ', dlUrlChunk)
dlUrlParts = dlUrlChunk.split(',')
sig = ''
video = ''
videoUrl = ''
print dlUrlParts
for dlUrlPart in dlUrlParts:
dlUrlPart = urllib2.unquote(dlUrlPart)
print dlUrlPart
## TODO
if self.regex.isFoundPattern('(?i)itag=22', dlUrlPart) or self.regex.isFoundPattern('(?i)itag=18',
dlUrlPart):
urlPart = dlUrlPart.split(' ')
for part in urlPart:
print part
if self.regex.isFoundPattern('(?i)sig=.*?', part):
sig = self.regex.getSearchedData('(?i)sig=(.*?)$', part)
if self.regex.isFoundPattern('(?i)url=.*?', part):
video = self.regex.getSearchedData('(?i)url=(.*?)$', part)
print video
videoUrl = video + '&signature=' + sig
self.downloadDir = './natok.mp4'
print 'Video URL= ' + videoUrl
print self.downloadDir
break
# dlPath = './natok.mp4' if filename is None else filename
fname = self.regex.replaceData('\s+', '_', title)
dlPath = './' + fname + '.mp4' if filename is None else filename
print dlPath
print '\n\n'
if self.downloadFile(videoUrl, dlPath) is True:
print 'Download complete'
else:
print 'No data found.'
def scrapTitle(self, url):
# https://www.youtube.com/oembed?url=http://www.youtube.com/watch?v=9bZkp7q19f0&format=xml
xmlUrl = 'https://www.youtube.com/oembed?url=' + str(url) + '&format=xml'
data = self.spider.fetchData(xmlUrl)
if data and len(data) > 0:
data = self.regex.reduceNewLine(data)
data = self.regex.reduceBlankSpace(data)
print data
return self.regex.getSearchedData('(?i)<title>([^<]*)</title>', data)
def downloadFile(self, url, downloadPath, retry=0):
try:
opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(),
urllib2.HTTPHandler(debuglevel=0),
urllib2.HTTPSHandler(debuglevel=0))
opener.addheaders = [
('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:14.0) Gecko/20100101 Firefox/14.0.1'),
('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
('Connection', 'keep-alive')]
# resp = opener.open(url, timeout=10)
resp = urllib2.urlopen(url, timeout=60)
print 'ok'
print resp.info()
contentLength = resp.info()['Content-Length']
contentLength = self.regex.getSearchedData('(?i)^(\d+)', contentLength)
totalSize = float(contentLength)
directory = os.path.dirname(downloadPath)
if not os.path.exists(directory):
os.makedirs(directory)
currentSize = 0
dl_file = open(downloadPath, 'ab')
try:
if os.path.getsize(downloadPath):
start = os.path.getsize(downloadPath)
currentSize = start
opener.addheaders.append(('Range', 'bytes=%s-' % (start)))
except Exception, x:
print x
res = opener.open(url, timeout=60)
CHUNK_SIZE = 256 * 1024
while True:
data = res.read(CHUNK_SIZE)
# data = resp.read(CHUNK_SIZE)
#.........这里部分代码省略.........
示例5: YtDownloadManager
# 需要导入模块: from utils.Regex import Regex [as 别名]
# 或者: from utils.Regex.Regex import isFoundPattern [as 别名]
class YtDownloadManager(object):
def __init__(self):
self.spider = Spider()
self.regex = Regex()
self.utils = Utils()
def scrapVideoDownloadUrl(self, url):
data = self.spider.fetchData(url)
print data
soup = BeautifulSoup(data)
exit(1)
if data and len(data) > 0:
title = self.scrapTitle(url)
data = self.regex.reduceNewLine(data)
data = self.regex.reduceBlankSpace(data)
dlUrlChunk = self.regex.getSearchedData('(?i)"url_encoded_fmt_stream_map": "([^"]*)"', data)
dlUrlChunk = self.regex.replaceData('(?i)\\\\u0026', ' ', dlUrlChunk)
dlUrlParts = dlUrlChunk.split(',')
sig = ''
video = ''
videoUrl = ''
print dlUrlParts
for dlUrlPart in dlUrlParts:
dlUrlPart = urllib2.unquote(dlUrlPart)
print dlUrlPart
# if self.regex.isFoundPattern('(?i)itag=5', dlUrlPart):
urlPart = dlUrlPart.split(' ')
for part in urlPart:
print part
if self.regex.isFoundPattern('(?i)sig=.*?', part):
sig = self.regex.getSearchedData('(?i)sig=(.*?)$', part)
if self.regex.isFoundPattern('(?i)url=.*?', part):
video = self.regex.getSearchedData('(?i)url=(.*?)$', part)
print video
videoUrl = video + '&signature=' + sig
self.downloadDir = './test.flv'
# print 'Video URL= ' + videoUrl
# print self.downloadDir
# dlPath = './test.flv'
# print dlPath
print '\n\n'
# if self.downloadFile(videoUrl, dlPath) is True:
# break
def scrapTitle(self, url):
# https://www.youtube.com/oembed?url=http://www.youtube.com/watch?v=9bZkp7q19f0&format=xml
xmlUrl = 'https://www.youtube.com/oembed?url=' + str(url) + '&format=xml'
data = self.spider.fetchData(xmlUrl)
if data and len(data) > 0:
data = self.regex.reduceNewLine(data)
data = self.regex.reduceBlankSpace(data)
return self.regex.getSearchedData('(?i)<title>([^<]*)</title>', data)
def downloadFile(self, url, downloadPath, retry=0):
try:
opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(),
urllib2.HTTPHandler(debuglevel=0),
urllib2.HTTPSHandler(debuglevel=0))
opener.addheaders = [
('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:14.0) Gecko/20100101 Firefox/14.0.1'),
('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
('Connection', 'keep-alive')]
# resp = opener.open(url, timeout=10)
resp = urllib2.urlopen(url, timeout=30)
print resp.info()
contentLength = resp.info()['Content-Length']
contentLength = self.regex.getSearchedData('(?i)^(\d+)', contentLength)
totalSize = float(contentLength)
directory = os.path.dirname(downloadPath)
if not os.path.exists(directory):
os.makedirs(directory)
dl_file = open(downloadPath, 'wb')
currentSize = 0
CHUNK_SIZE = 32768
while True:
data = resp.read(CHUNK_SIZE)
if not data:
break
currentSize += len(data)
dl_file.write(data)
print('============> ' + \
str(round(float(currentSize * 100) / totalSize, 2)) + \
'% of ' + str(totalSize) + ' bytes')
notifyDl = '===> Downloaded ' + str(round(float(currentSize * 100) / totalSize, 2)) + '% of ' + str(
totalSize) + ' KB.'
if currentSize >= totalSize:
dl_file.close()
return True
except Exception, x:
error = 'Error downloading: ' + x
return False
示例6: MyLinkedInMessage
# 需要导入模块: from utils.Regex import Regex [as 别名]
# 或者: from utils.Regex.Regex import isFoundPattern [as 别名]
class MyLinkedInMessage(QThread):
notifyLinkedIn = pyqtSignal(object)
def __init__(self, spider, memberList, subject, message):
QThread.__init__(self)
# self.spider = Spider()
self.spider = spider
self.regex = Regex()
self.memberList = memberList
self.subject = unicode(subject)
self.message = unicode(message)
def run(self):
self.sendMessage()
self.notifyLinkedIn.emit('<font color=red><b>Finish Sending All Messages.</b></font>')
def sendMessage(self):
print self.memberList
for member in self.memberList:
messageUrl = 'http://www.linkedin.com/inbox/compose/dialog?insider=true&connId=' + str(member[1])
print messageUrl
# messageUrl = 'http://www.linkedin.com/inbox/compose/dialog?insider=true&connId=' + '65471931'
# data = self.spider.fetchData('http://www.linkedin.com/inbox/compose/dialog?insider=true&connId=65471931')
data = self.spider.fetchData(messageUrl)
data = self.regex.reduceNewLine(data)
data = self.regex.reduceBlankSpace(data)
fromName = self.regex.getSearchedData('(?i)<input type="hidden" name="fromName" value="([^"]*)"', data)
fromEmail = self.regex.getSearchedData('(?i)<input type="hidden" name="fromEmail" value="([^"]*)"', data)
# connectionIds = self.regex.getSearchedData('(?i)<input type="hidden" name="connectionIds" value="([^"]*)"', data)
csrfToken = self.regex.getSearchedData('(?i)<input type="hidden" name="csrfToken" value="([^"]*)"', data)
sourceAlias = self.regex.getSearchedData('(?i)<input type="hidden" name="sourceAlias" value="([^"]*)"', data)
linkedInSubject = u'Hi ' + unicode(member[0]).split(' ')[0] + self.subject
linkedInMessage = u'Hi ' + unicode(member[0]).split(' ')[0] + u',\n' + self.message
print linkedInMessage
params = {'addMoreRcpts': 'false',
'ajaxSubmit': 'Send Message',
'allowEditRcpts': 'true',
'body': linkedInMessage,
'connectionIds': str(member[1]),
'connectionNames': '',
'csrfToken': csrfToken,
'fromEmail': fromEmail,
'fromName': fromName,
'itemID': '',
'openSocialAppBodySuffix': '',
'showRecipeints': 'showRecipeints',
'sourceAlias': sourceAlias,
'st': '',
'subject': linkedInSubject,
'submit': 'Send Message',
'viewerDestinationUrl': ''}
print params
msgUrl = 'http://www.linkedin.com/msgToConns?displayCreate='
data = self.spider.fetchData(msgUrl, params)
data = self.regex.reduceNewLine(data)
data = self.regex.reduceBlankSpace(data)
if self.regex.isFoundPattern('(?i)<div class="alert success">', data):
print 'Message Sent.'
self.notifyLinkedIn.emit('<font color=green><b>Successfully Sent Message To: %s</b></font>' % member[0])
else:
self.notifyLinkedIn.emit('<font color=red><b>Something Wrong during Send Message To</b></font>' % member[0])
# params = {'addMoreRcpts': 'false',
# 'ajaxSubmit': 'Send Message',
# 'allowEditRcpts': 'true',
# 'body': 'Script Test',
# 'connectionIds': '65471931',
# 'connectionNames': '',
# 'csrfToken': 'ajax: 6539671039643459056',
# 'fromEmail': '467728216',
# 'fromName': 'Mehedi Hasan',
# 'itemID': '',
# 'openSocialAppBodySuffix': '',
# 'showRecipeints': 'showRecipeints',
# 'sourceAlias': '0_6k2algZhQ6vbvlhlVSByxRKi0OB9NXjxrnJYWBFvfhn',
# 'st': '',
# 'subject': 'Script Test',
# 'submit': 'Send Message',
# 'viewerDestinationUrl': ''}
#<input type="hidden" name="fromName" value="Mehedi Hasan" id="fromName-msgForm">
# <input type="hidden" name="showRecipeints" value="showRecipeints" id="showRecipeints-msgForm">
# <input type="hidden" name="fromEmail" value="467728216" id="fromEmail-msgForm">
# <input type="hidden" name="connectionIds" value="65471931" id="connectionIds-msgForm">
# <input type="hidden" name="connectionNames" value="" id="connectionNames-msgForm">
# <input type="hidden" name="allowEditRcpts" value="true" id="allowEditRcpts-msgForm">
# <input type="hidden" name="addMoreRcpts" value="false" id="addMoreRcpts-msgForm">
# <input type="hidden" name="itemID" value="" id="itemID-msgForm">
# <input type="hidden" name="openSocialAppBodySuffix" value="" id="openSocialAppBodySuffix-msgForm">
# <input type="hidden" name="st" value="" id="st-msgForm">
# <input type="hidden" name="viewerDestinationUrl" value="" id="viewerDestinationUrl-msgForm">
# <input type="hidden" name="csrfToken" value="ajax:6539671039643459056" id="csrfToken-msgForm">
# <input type="hidden" name="sourceAlias" value="0_6k2algZhQ6vbvlhlVSByxRKi0OB9NXjxrnJYWBFvfhn" id="sourceAlias-msgForm">
"""
msgUrl1 = 'http://www.linkedin.com/msgToConns?displayCreate='
msgParams = {}
addMoreRcpts false
#.........这里部分代码省略.........
示例7: MainForm
# 需要导入模块: from utils.Regex import Regex [as 别名]
# 或者: from utils.Regex.Regex import isFoundPattern [as 别名]
#.........这里部分代码省略.........
self.webToPdfB.threadPdfWritingStatus.connect(self.appendStatus)
self.webToPdfB.threadPdfWritingDone.connect(self.pdfGenFinishedB)
f = open(self.fileNameB, 'rb')
self.listsB = f.readlines()
f.close()
pdfFiles = [f for f in os.listdir(self.fileDirB) if f.endswith('.pdf')]
if len(pdfFiles) > 0:
self.pdfCounterB = int(self.regex.getSearchedData('(?i)^(\d+)_', pdfFiles[-1])) + 1
self.totalUrlB = len(self.listsB)
self.alreadyClickedB = True
self.startTime = time.clock()
self.pdfGenFinishedB()
else:
QMessageBox.warning(None, 'Warning', 'Please Select your URL List and PDF writing Path.')
def pdfGenFinished(self):
if self.lists is not None and len(self.lists) > 0:
self.currentUrlA += 1
url = self.lists.pop(0)
self.lineEditWebAddress.setText(url)
url = url.strip()
self.labelProStatusA.setText(
'<font color="green" size=4><b>For grouping "A": <u> %s </u> total items in the batch, processing <u> %s </u> out of <u> %s </u></b></font>' % (
str(
self.totalUrlA), str(self.currentUrlA), str(self.totalUrlA)))
pdfFile = str(url).split('/')[-1]
print 'pdf file : ' + pdfFile
pdfFile = self.regex.getSearchedData('(?i)([a-zA-Z0-9-_ ]*?)\.[a-zA-Z0-9_]*$', pdfFile)
pdfFiles = [f for f in os.listdir(self.fileDir) if f.endswith('.pdf')]
finalPdfFile = ''
i = 2
for file in pdfFiles:
if self.regex.isFoundPattern('(?i)' + pdfFile, file):
index = self.regex.getSearchedData('(?i)(\d+).*?$', file)
finalPdfFile = str(index) + '_' + str(pdfFile) + '_copy_' + str(i) + '.pdf'
i += 1
if len(finalPdfFile) is 0:
finalPdfFile = str(self.pdfCounter) + '_' + pdfFile + '.pdf'
else:
self.pdfCounter -= 1
self.webToPdf.printWebHtmlToPdf(url, self.fileDir + '/', finalPdfFile, 'A')
self.pdfCounter += 1
else:
self.showStatus('Pdf Generation Completed')
self.alreadyClicked = False
self.totalUrlA = 0
self.currentUrlA = 0
# self.labelProStatusA.setText('')
def pdfGenFinishedB(self):
if self.listsB is not None and len(self.listsB) > 0:
self.currentUrlB += 1
url = self.listsB.pop(0)
self.lineEditWebAddress.setText(url)
url = url.strip()
# self.labelProStatusB.setText(
# '<font color="green" size=4><b>For grouping "B": <u> %s </u> total items in the batch, processing <u> %s </u> out of <u> %s </u></b></font>' % (
# str(
# self.totalUrlB), str(self.currentUrlB), str(self.totalUrlB)))
elapsedTime = time.clock() - self.startTime
print elapsedTime
self.labelProStatusB.setText(
'<font size=4><b>URL <u> %s </u> of <u> %s </u> being processed. Time elapsed: %s</b></font>' % (