本文整理汇总了Python中urllib.URLopener.retrieve方法的典型用法代码示例。如果您正苦于以下问题:Python URLopener.retrieve方法的具体用法?Python URLopener.retrieve怎么用?Python URLopener.retrieve使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类urllib.URLopener
的用法示例。
在下文中一共展示了URLopener.retrieve方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: download_package
# 需要导入模块: from urllib import URLopener [as 别名]
# 或者: from urllib.URLopener import retrieve [as 别名]
def download_package(pkg_name, pkg_version):
'''Download the required package. Sometimes the download can be flaky, so we use the
retry decorator.'''
pkg_type = 'sdist' # Don't download wheel archives for now
# This JSON endpoint is not provided by PyPI mirrors so we always need to get this
# from pypi.python.org.
pkg_info = json.loads(urlopen('https://pypi.python.org/pypi/%s/json' % pkg_name).read())
downloader = URLopener()
for pkg in pkg_info['releases'][pkg_version]:
if pkg['packagetype'] == pkg_type:
filename = pkg['filename']
expected_md5 = pkg['md5_digest']
if os.path.isfile(filename) and check_md5sum(filename, expected_md5):
print "File with matching md5sum already exists, skipping %s" % filename
return True
pkg_url = "{0}/packages/{1}".format(PYPI_MIRROR, pkg['path'])
print "Downloading %s from %s" % (filename, pkg_url)
downloader.retrieve(pkg_url, filename)
actual_md5 = md5(open(filename).read()).hexdigest()
if check_md5sum(filename, expected_md5):
return True
else:
print "MD5 mismatch in file %s." % filename
return False
print "Could not find archive to download for %s %s %s" % (
pkg_name, pkg_version, pkg_type)
sys.exit(1)
示例2: getcif
# 需要导入模块: from urllib import URLopener [as 别名]
# 或者: from urllib.URLopener import retrieve [as 别名]
def getcif(target):
"""
Get all ICSD cif files listed in target file.
The target file should contain tag like '# BCC'.
"""
matgenIDs=getMatgenIDs()
if not os.path.isdir('./ciffiles'):
os.makedirs('./ciffiles')
with open(target,'r') as f:
st=f.readline()
t1=time.time()
while st:
if st[0]=='#':
tg=st.split()[-1]
st=f.readline()
t2=time.time()
print "time for the %s = %2.2f sec" %(tg,t2-t1)
t1=time.time()
continue
st=st.strip()
ind=getID(st)
if ind in matgenIDs:
continue #skip matgen compounds
URL=prefix+tg+'/'+st+'/'+st+'.cif'
testfile=URLopener()
try:
testfile.retrieve(URL,'ciffiles/'+st)
except:
print "Error: ",URL
st=f.readline()
示例3: handle_starttag
# 需要导入模块: from urllib import URLopener [as 别名]
# 或者: from urllib.URLopener import retrieve [as 别名]
def handle_starttag(self, tag, attrs):
#tmpoutput = ""
count = 0
global bDoWork
#self.output = ""
# Only parse the 'anchor' tag.
if tag == "a":
# Check the list of defined attributes.
for name, value in attrs:
# If href is defined, print it.
if name == "href":
if value[len(value) - 3:len(value)] == "jpg":
#print value
if not "http://" in value and bDoWork == True:
bDoWork = False
tmpoutput = value
#print "Val: " + value
imgurl = 'http://apod.nasa.gov/apod/' + tmpoutput
#print "IMGURL: " + imgurl
filename = imgurl.split('/')[-1]
#print "FileName: " + filename
if (not os.path.isfile(filename)) and ('apod.nasa.gov' in imgurl):
#print "Downloading: " + filename
image = URLopener()
image.retrieve(imgurl,filename)
sleep(lWaitTime)
elif (os.path.isfile(filename)):
print "Verified: " + filename
break
示例4: command
# 需要导入模块: from urllib import URLopener [as 别名]
# 或者: from urllib.URLopener import retrieve [as 别名]
def command(self):
args = list(self.args)
method, url = args[0:2]
if not url.startswith('http'):
url = 'http://%s:%s%s' % (self.session.config.sys.http_host,
self.session.config.sys.http_port,
('/' + url).replace('//', '/'))
# FIXME: The python URLopener doesn't seem to support other verbs,
# which is really quite lame.
method = method.upper()
assert(method in ('GET', 'POST'))
qv, pv = [], []
if method == 'POST':
which = pv
else:
which = qv
for arg in args[2:]:
if '=' in arg:
which.append(tuple(arg.split('=', 1)))
elif arg.upper()[0] == 'P':
which = pv
elif arg.upper()[0] == 'Q':
which = qv
if qv:
qv = urlencode(qv)
url += ('?' in url and '&' or '?') + qv
# Log us in automagically!
httpd = self.session.config.http_worker.httpd
global HACKS_SESSION_ID
if HACKS_SESSION_ID is None:
HACKS_SESSION_ID = httpd.make_session_id(None)
mailpile.auth.SetLoggedIn(None,
user='Hacks plugin HTTP client',
session_id=HACKS_SESSION_ID)
cookie = httpd.session_cookie
try:
uo = URLopener()
uo.addheader('Cookie', '%s=%s' % (cookie, HACKS_SESSION_ID))
with TcpConnBroker().context(need=[TcpConnBroker.OUTGOING_HTTP]):
if method == 'POST':
(fn, hdrs) = uo.retrieve(url, data=urlencode(pv))
else:
(fn, hdrs) = uo.retrieve(url)
hdrs = unicode(hdrs)
data = open(fn, 'rb').read().strip()
if data.startswith('{') and 'application/json' in hdrs:
data = json.loads(data)
return self._success('%s %s' % (method, url), result={
'headers': hdrs.splitlines(),
'data': data
})
except:
self._ignore_exception()
return self._error('%s %s' % (method, url))
示例5: command
# 需要导入模块: from urllib import URLopener [as 别名]
# 或者: from urllib.URLopener import retrieve [as 别名]
def command(self):
args = list(self.args)
method, url = args[0:2]
if not url.startswith("http"):
url = "http://%s:%s%s" % (
self.session.config.sys.http_host,
self.session.config.sys.http_port,
("/" + url).replace("//", "/"),
)
# FIXME: The python URLopener doesn't seem to support other verbs,
# which is really quite lame.
method = method.upper()
assert method in ("GET", "POST")
qv, pv = [], []
if method == "POST":
which = pv
else:
which = qv
for arg in args[2:]:
if "=" in arg:
which.append(tuple(arg.split("=", 1)))
elif arg.upper()[0] == "P":
which = pv
elif arg.upper()[0] == "Q":
which = qv
if qv:
qv = urlencode(qv)
url += ("?" in url and "&" or "?") + qv
# Log us in automagically!
httpd = self.session.config.http_worker.httpd
global HACKS_SESSION_ID
if HACKS_SESSION_ID is None:
HACKS_SESSION_ID = httpd.make_session_id(None)
mailpile.auth.SetLoggedIn(None, user="Hacks plugin HTTP client", session_id=HACKS_SESSION_ID)
cookie = httpd.session_cookie
try:
uo = URLopener()
uo.addheader("Cookie", "%s=%s" % (cookie, HACKS_SESSION_ID))
with TcpConnBroker().context(need=[TcpConnBroker.OUTGOING_HTTP], oneshot=True):
if method == "POST":
(fn, hdrs) = uo.retrieve(url, data=urlencode(pv))
else:
(fn, hdrs) = uo.retrieve(url)
hdrs = unicode(hdrs)
data = open(fn, "rb").read().strip()
if data.startswith("{") and "application/json" in hdrs:
data = json.loads(data)
return self._success("%s %s" % (method, url), result={"headers": hdrs.splitlines(), "data": data})
except:
self._ignore_exception()
return self._error("%s %s" % (method, url))
示例6: SlippyCache
# 需要导入模块: from urllib import URLopener [as 别名]
# 或者: from urllib.URLopener import retrieve [as 别名]
class SlippyCache(object):
"""This is a basic map tile cache used by the SlippyPanel class
to retrieve and store locally the images that form the map"""
def __init__(self, source, proxy = ""):
self.source = source
if len(proxy) > 0:
self._opener = URLopener({"http": proxy})
else:
self._opener = URLopener()
self._fetchQueue = Queue(0)
self._fetchThread = Thread(target = self._FetchTile)
self._fetchThread.setDaemon(True)
self._fetchThread.start()
def _FetchTile(self):
task = ""
while task is not None:
task = self._fetchQueue.get()
url, fname = task
if not os.path.isfile(fname):
print "Getting", fname
try:
self._opener.retrieve(url, "tmp.png")
shutil.move("tmp.png", fname)
except IOError:
pass
self._fetchQueue.task_done()
def StartNewFetchBatch(self):
try:
while True:
item = self._fetchQueue.get(False)
self._fetchQueue.task_done()
except Empty:
pass
def GetTileFilename(self, xtile, ytile, zoom):
numTiles = 2 ** zoom
while xtile >= numTiles:
xtile -= numTiles
if xtile < 0 or ytile < 0 or ytile >= numTiles:
# Indicate that this is not a valid tile
return None
else:
fname = "/".join([self.source.get_full_name(), str(zoom), str(xtile), str(ytile) + ".png"])
if not os.path.isfile(fname):
url = self.source.get_tile_url(xtile, ytile, zoom)
# Ensure that the directory exists
dname = os.path.dirname(fname)
if not os.path.isdir(dname):
os.makedirs(dname)
self._fetchQueue.put((url, fname))
# Valid tile, though may not be present in the cache
return fname
示例7: command
# 需要导入模块: from urllib import URLopener [as 别名]
# 或者: from urllib.URLopener import retrieve [as 别名]
def command(self):
args = list(self.args)
method, url = args[0:2]
if not url.startswith('http'):
url = 'http://%s:%s%s' % (self.session.config.sys.http_host,
self.session.config.sys.http_port,
('/' + url).replace('//', '/'))
# FIXME: The python URLopener doesn't seem to support other verbs,
# which is really quite lame.
method = method.upper()
assert(method in ('GET', 'POST'))
qv, pv = [], []
if method == 'POST':
which = pv
else:
which = qv
for arg in args[2:]:
if '=' in arg:
which.append(tuple(arg.split('=', 1)))
elif arg.upper()[0] == 'P':
which = pv
elif arg.upper()[0] == 'Q':
which = qv
if qv:
qv = urlencode(qv)
url += ('?' in url and '&' or '?') + qv
try:
uo = URLopener()
if method == 'POST':
(fn, hdrs) = uo.retrieve(url, data=urlencode(pv))
else:
(fn, hdrs) = uo.retrieve(url)
hdrs = unicode(hdrs)
data = open(fn, 'rb').read().strip()
if data.startswith('{') and 'application/json' in hdrs:
data = json.loads(data)
return self._success('%s %s' % (method, url), result={
'headers': hdrs.splitlines(),
'data': data
})
except:
self._ignore_exception()
return self._error('%s %s' % (method, url))
示例8: download_reports
# 需要导入模块: from urllib import URLopener [as 别名]
# 或者: from urllib.URLopener import retrieve [as 别名]
def download_reports(years=_years, weeks=_weeks):
'''Crawls through IMoH website and download all excel files in the given weeks and years'''
# Create paths for logging files and download loaction
prefix = datetime.now().strftime('./log/weeklies/%y%m%d_%H%M%S_')
log_d = prefix + "downloads.log"
log_f = prefix + "FAILED.log"
base_loc = 'http://www.health.gov.il/PublicationsFiles/IWER'
# URL object
my_file = URLopener()
for year in years:
print "\n", year,
for week in weeks:
f = open(log_d, 'a')
f.write('\n{year}_{week}: '.format(week=week, year=year))
# There are many different options of paths
options = ['{base}{week:02d}_{year}.xls'.format(base=base_loc, week=week, year=year),
'{base}{week}_{year}.xls'.format(base=base_loc, week=week, year=year),
'{base}{week:02d}_{year}.xlsx'.format(base=base_loc, week=week, year=year),
'{base}{week}_{year}.xlsx'.format(base=base_loc, week=week, year=year)]
for i, o in enumerate(options):
filetype = o.split(".")[-1]
try:
# Try different paths on remote, but always save on same path locally
my_file.retrieve(o,
'./data/weeklies/{year}_{week:02d}.{ft}'.format(week=week, year=year, ft=filetype))
# If succeeds write which filetype (xls/x) was saved
f.write('{ft}'.format(ft=filetype), )
# If downloads succeeds move close the log file and break the loop
f.close()
break
except:
# When option excepted, write try number to the log
f.write("{} ".format(i + 1))
# If all options were exhausted, it has failed.
if i == len(options) - 1 and week != 53:
print "== {year}_{week:02d} FAILED ==".format(week=week, year=year),
with open(log_f, 'a') as failed:
failed.write("{year}_{week:02d} FAILED\n".format(week=week, year=year))
f.write("FAILED")
f.close()
f.close()
示例9: install_firmware
# 需要导入模块: from urllib import URLopener [as 别名]
# 或者: from urllib.URLopener import retrieve [as 别名]
def install_firmware(self, new_version):
logging.info('Update firmware request')
logging.info('Current firmware version: {}'.format(
self.firmware_version))
logging.info('Firmware version to install: {}'.format(new_version))
fw_fname_prefix = 'sensa-%s' % new_version
fw_check_url = '%sstatic/firmware/%s.chk' % (
self.api_url, fw_fname_prefix)
fw_filename = fw_fname_prefix + '.zip'
fw_url = '%sstatic/firmware/%s' % (self.api_url, fw_filename)
# Firmware install shell script
deploy_script = 'deploy.sh'
# Download firmware
fw_file = URLopener()
try:
fw_file.retrieve(fw_url, fw_filename)
except IOError:
logging.error('Error during firmware download')
return 1
fw_file.close()
# Check downloaded firmware integrity
try:
fw_checksum_req = requests.get(fw_check_url)
except requests.exceptions.RequestException:
logging.error('Error during firmware download')
return 1
expected_check = fw_checksum_req.text.split()
fw_checksum = md5(open(fw_filename, 'rb').read()).hexdigest()
if(fw_checksum != expected_check[0] and
fw_filename != expected_check[1]):
logging.error('Error checking firmware integrity')
return
logging.info('Files checked. Updating')
# Unzip
try:
fw_file = ZipFile(fw_filename, 'r')
except IOError:
logging.error('Error reading local firmware file')
return
fw_file.extractall()
fw_file.close()
# Run firmware script
call(['sh', deploy_script])
# Remove firmware file
call(['rm', fw_filename])
# Remove firmware script
call(['rm', deploy_script])
config = SafeConfigParser()
config.read(self.config_file)
# Update firmware version on config file
config.set('device', 'firmware_version', new_version)
try:
conf_file = open(self.config, 'wb')
try:
parser.write(conf_file)
finally:
conf_file.close()
except IOError:
logging.error('Error updating version on config file')
'''
示例10: DownloadImageFromAPODPage
# 需要导入模块: from urllib import URLopener [as 别名]
# 或者: from urllib.URLopener import retrieve [as 别名]
def DownloadImageFromAPODPage(url):
if "ap140302" in url:
print "stop here"
# Copy all of the content from the provided web page
webpage = urlopen(url).read()
#print "-"
#print "URL: " + url
global bDoWork
global bCleanExtras
global bVerified
global strAPODPicturesFolder
strAPODFileName = ""
# Here I retrieve and print to screen the titles and links with just Beautiful Soup
#print "Loading Soup"
soup = BeautifulSoup(webpage)
for url in soup.findAll("a"):
imgurl = url.get('href')
#print imgurl
if not ('http://' in imgurl):
imgurl = 'http://apod.nasa.gov/' + url.get('href')
#sleep(lWaitTime)
if imgurl[len(imgurl) - 3:len(imgurl)] == "jpg":
#print "IMG: " + imgurl
strAPODFileName = imgurl.strip().split('/')[-1]
#print "strAPODFileName = " + strAPODFileName
filename = strAPODPicturesFolder + strAPODFileName
if bDoWork:
bDoWork = False
#filename = url.strip().split('/')[-1]
#print filename
if (not os.path.isfile(filename)) and ('apod.nasa.gov' in imgurl):
#print "Downloading: " + filename
image = URLopener()
image.retrieve(imgurl,filename)
sleep(lWaitTime)
elif (os.path.isfile(filename)):
#print "Verified: " + filename
bVerified = True
if not bCleanExtras:
#if we are not cleaning extras we can break here
#print "Not Seeking Extras"
break
else:
if (os.path.isfile(filename)):
#this is the logic to clean extra downloads/duplicates
#print "Deleting " + filename
os.remove(filename)
#print "Seeking Title"
txtName = ""
bForce = False
for bTag in soup.findAll("title"):
if (txtName == ""):
#bForce = True
txtName = bTag.text
txtName = txtName.replace("APOD:", "").strip()
if "\r" in txtName or "\n" in txtName:
txtName = txtName.strip().replace("\r", ' ').replace("\n", " ").replace(" ", " ").replace(" ", " ")
bForce = True
#print txtName
for bTag in soup.findAll("b"):
if (txtName == ""):
txtName = bTag.text
txtName = txtName.strip()
if "\r" in txtName or "\n" in txtName:
txtName = txtName.strip().replace("\r", ' ').replace("\n", " ").replace(" ", " ").replace(" ", " ")
bForce = True
#print txtName
#print "Loading Info"
txtPName = ""
for pTag in soup.findAll("p"):
txtPName = pTag.text
txtPName = txtPName.strip()
if "Explanation:" in txtPName:
iLoc = txtPName.find("Tomorrow's picture:")
iLoc = iLoc - 1
iLoc2 = txtPName.find("digg_url")
if iLoc2 > 0:
#txtPName = txtPName
txtPName = txtPName[:iLoc2]
iLoc2 = txtPName.find("APOD presents:")
if iLoc2 > 0:
#txtPName = txtPName
txtPName = txtPName[:iLoc2]
#The Amateur Astronomers Association of New York Presents:
iLoc2 = txtPName.find("The Amateur Astronomers Association of New York Presents:")
if iLoc2 > 0:
#txtPName = txtPName
txtPName = txtPName[:iLoc2]
iLoc2 = txtPName.find("Presents:")
if iLoc2 > 0:
#txtPName = txtPName
txtPName = txtPName[:iLoc2]
iLoc2 = txtPName.find("What was that?:")
if iLoc2 > 0:
#.........这里部分代码省略.........
示例11: Poem
# 需要导入模块: from urllib import URLopener [as 别名]
# 或者: from urllib.URLopener import retrieve [as 别名]
for poemLink in poemLinks:
try:
pl = db_session.query(Poem).filter_by(poem=poemLink).one()
except NoResultFound:
p_obj = Poem(poem=poemLink)
db_session.add(p_obj)
print 'added poem %s' %poemLink
db_session.commit()
#download and save file to temp file
#make sure its not massive
d = urllib.urlopen(poemLink)
if int(d.info()['Content-Length']) > 25000000: #arbitrary length, could be better
continue
filename = urlparse(poemLink).path.split('/')[-1]
try:
opener.retrieve(poemLink, temp + filename)
except:
continue
#open and convert file to mono, 8000Hz
poem = AudioSegment.from_mp3(temp + filename)
poem = poem.set_channels(1)
poem = poem.set_frame_rate(8000)
#erase temp file
os.remove(temp + filename)
#cut the poem into lines based on silence
lines = cutbySilence(poem)
#number the lines
line_num = 0
for line in lines:
if line.duration_seconds > 30:
continue
示例12: DownloadImageFromAPODPage
# 需要导入模块: from urllib import URLopener [as 别名]
# 或者: from urllib.URLopener import retrieve [as 别名]
def DownloadImageFromAPODPage(url):
# Copy all of the content from the provided web page
webpage = urlopen(url).read()
print "-"
print "URL: " + url
global bDoWork
global bCleanExtras
global bVerified
global strAPODPicturesFolder
strAPODFileName = ""
# Here I retrieve and print to screen the titles and links with just Beautiful Soup
soup = BeautifulSoup(webpage)
for url in soup.findAll("a"):
imgurl = url.get('href')
#print imgurl
if not ('http://' in imgurl):
imgurl = 'http://apod.nasa.gov/' + url.get('href')
#sleep(lWaitTime)
if imgurl[len(imgurl) - 3:len(imgurl)] == "jpg":
print "IMG: " + imgurl
strAPODFileName = imgurl.strip().split('/')[-1]
print "strAPODFileName = " + strAPODFileName
filename = strAPODPicturesFolder + strAPODFileName
if bDoWork:
bDoWork = False
#filename = url.strip().split('/')[-1]
#print filename
if (not os.path.isfile(filename)) and ('apod.nasa.gov' in imgurl):
print "Downloading: " + filename
image = URLopener()
image.retrieve(imgurl,filename)
sleep(lWaitTime)
elif (os.path.isfile(filename)):
print "Verified: " + filename
bVerified = True
if not bCleanExtras:
#if we are not cleaning extras we can break here
print "Not Seeking Extras"
break
else:
if (os.path.isfile(filename)):
#this is the logic to clean extra downloads/duplicates
print "Deleting " + filename
os.remove(filename)
txtName = ""
for bTag in soup.findAll("b"):
if (txtName == ""):
txtName = bTag.text
txtName = txtName.strip()
print txtName
txtPName = ""
for pTag in soup.findAll("p"):
txtPName = pTag.text
txtPName = txtPName.strip()
if "Explanation:" in txtPName:
iLoc = txtPName.find("Tomorrow's picture:")
iLoc = iLoc - 1
if iLoc > 0 and (strAPODFileName <> ""):
txtPName = txtPName[0:iLoc].strip().replace('\n', ' ').replace(' ', ' ').replace(' ', ' ').replace(' ', ' ').replace('Explanation: ', '')
if not (os.path.isfile(strAPODDataFolder + strAPODFileName.replace('.jpg', '_Title.txt'))):
print "Title: " + txtName
print "FN: " + strAPODFileName.replace('.jpg', '_Title.txt')
f = open(strAPODDataFolder + strAPODFileName.replace('.jpg', '_Title.txt'), 'w')
f.write(txtName.encode('utf8'))
f.close
if not (os.path.isfile(strAPODDataFolder + strAPODFileName.replace('.jpg', '_Info.txt'))):
print "Info Paragraph: " + txtPName
print "FN: " + strAPODFileName.replace('.jpg', '_Info.txt')
f = open(strAPODDataFolder + strAPODFileName.replace('.jpg', '_Info.txt'), 'w')
f.write(txtPName.encode('utf8'))
f.close