本文整理汇总了Python中sunpy.util.scraper.Scraper类的典型用法代码示例。如果您正苦于以下问题:Python Scraper类的具体用法?Python Scraper怎么用?Python Scraper使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Scraper类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_filelist_url_missing_directory
def test_filelist_url_missing_directory():
# Asserts solution to ticket #2684.
# Attempting to access data for the year 1960 results in a 404, so no files are returned.
pattern = 'http://lasp.colorado.edu/eve/data_access/evewebdataproducts/level2/%Y/%j/'
s = Scraper(pattern)
timerange = TimeRange('1960/01/01 00:00:00', '1960/01/02 00:00:00')
assert len(s.filelist(timerange)) == 0
示例2: testURL_patternMilliseconds
def testURL_patternMilliseconds():
s = Scraper('fd_%Y%m%d_%H%M%S_%e.fts')
# NOTE: Seems that if below fails randomly - not understood why
# with `== True` fails a bit less...
assert s._URL_followsPattern('fd_20130410_231211_119.fts')
assert not s._URL_followsPattern('fd_20130410_231211.fts.gz')
assert not s._URL_followsPattern('fd_20130410_ar_231211.fts.gz')
示例3: _get_url_for_timerange
def _get_url_for_timerange(self, timerange, **kwargs):
"""
Returns the url for Fermi/GBM data for the given date.
Parameters
----------
timerange : `sunpy.time.TimeRange`
The time range for which to download the data.
Returns
-------
`str`:
The url(s) for time of interest.
"""
# Checks if detector keyword
# If not defaults to detector 5
if 'detector' in kwargs:
det = _check_detector(kwargs['detector'])
else:
det = 'n5'
# Check for resolution keyword - either CSPEC or CTIME
# Default type is CSPEC
if 'resolution' in kwargs:
data_type = _check_type(kwargs['resolution'])
else:
data_type = 'cspec'
gbm_pattern = ('https://heasarc.gsfc.nasa.gov/FTP/fermi/data/gbm/daily/'
'%Y/%m/%d/current/glg_{data_type}_{det}_%y%m%d_v00.pha')
gbm_files = Scraper(gbm_pattern, data_type=data_type, det=det)
urls = gbm_files.filelist(timerange)
return urls
示例4: testFilesRange_sameDirectory_months_remote
def testFilesRange_sameDirectory_months_remote():
pattern = ('http://www.srl.caltech.edu/{spacecraft}/DATA/{instrument}/'
'Ahead/1minute/AeH%y%b.1m')
s = Scraper(pattern, spacecraft='STEREO', instrument='HET')
startdate = parse_time((2007, 8, 1))
enddate = parse_time((2007, 9, 10))
timerange = TimeRange(startdate, enddate)
assert len(s.filelist(timerange)) == 2
示例5: _get_time_for_url
def _get_time_for_url(self, urls):
eve = Scraper(BASEURL)
times = list()
for url in urls:
t0 = eve._extractDateURL(url)
# hard coded full day as that's the normal.
times.append(TimeRange(t0, t0 + datetime.timedelta(days=1)))
return times
示例6: _get_time_for_url
def _get_time_for_url(self, urls):
freq = urls[0].split('/')[-1][0:3] # extract the frequency label
crawler = Scraper(BASEURL, freq=freq)
times = list()
for url in urls:
t0 = crawler._extractDateURL(url)
# hard coded full day as that's the normal.
times.append(TimeRange(t0, t0 + TimeDelta(1*u.day)))
return times
示例7: testDirectoryRange_Month
def testDirectoryRange_Month():
s = Scraper('%Y%m/%d/%j_%H.txt')
startdate = parse_time((2008, 2, 20, 10))
enddate = parse_time((2008, 3, 2, 5))
timerange = TimeRange(startdate, enddate)
assert len(s.range(timerange)) == 12
startdate = parse_time((2009, 2, 20, 10))
enddate = parse_time((2009, 3, 2, 5))
timerange = TimeRange(startdate, enddate)
assert len(s.range(timerange)) == 11
示例8: testDirectoryRange_Month
def testDirectoryRange_Month():
s = Scraper("%Y%m/%d/%j_%H.txt")
startdate = datetime.datetime(2008, 2, 20, 10)
enddate = datetime.datetime(2008, 3, 2, 5)
timerange = TimeRange(startdate, enddate)
assert len(s.range(timerange)) == 12
startdate = datetime.datetime(2009, 2, 20, 10)
enddate = datetime.datetime(2009, 3, 2, 5)
timerange = TimeRange(startdate, enddate)
assert len(s.range(timerange)) == 11
示例9: testExtractDates_usingPattern
def testExtractDates_usingPattern():
# Standard pattern
s = Scraper('data/%Y/%m/%d/fits/swap/swap_00174_fd_%Y%m%d_%H%M%S.fts.gz')
testURL = 'data/2014/05/14/fits/swap/swap_00174_fd_20140514_200135.fts.gz'
timeURL = parse_time((2014, 5, 14, 20, 1, 35))
assert s._extractDateURL(testURL) == timeURL
# Not-full repeated pattern
s = Scraper('data/%Y/fits/swap/swap_00174_fd_%Y%m%d_%H%M%S.fts.gz')
testURL = 'data/2014/fits/swap/swap_00174_fd_20140514_200135.fts.gz'
timeURL = parse_time((2014, 5, 14, 20, 1, 35))
assert s._extractDateURL(testURL) == timeURL
示例10: testFilesRange_sameDirectory_local
def testFilesRange_sameDirectory_local():
# Fails due to an IsADirectoryError, wrapped in a URLError, after `requests`
# tries to open a directory as a binary file.
s = Scraper('/'.join(['file:/', rootdir,
'EIT', 'efz%Y%m%d.%H%M%S_s.fits']))
startdate = parse_time((2004, 3, 1, 4, 0))
enddate = parse_time((2004, 3, 1, 6, 30))
assert len(s.filelist(TimeRange(startdate, enddate))) == 3
startdate = parse_time((2010, 1, 10, 20, 30))
enddate = parse_time((2010, 1, 20, 20, 30))
assert len(s.filelist(TimeRange(startdate, enddate))) == 0
示例11: testFilesRange_sameDirectory_local
def testFilesRange_sameDirectory_local():
s = Scraper('/'.join(['file:/', rootdir,
'EIT', 'efz%Y%m%d.%H%M%S_s.fits']))
print(s.pattern)
print(s.now)
startdate = parse_time((2004, 3, 1, 4, 0))
enddate = parse_time((2004, 3, 1, 6, 30))
assert len(s.filelist(TimeRange(startdate, enddate))) == 3
startdate = parse_time((2010, 1, 10, 20, 30))
enddate = parse_time((2010, 1, 20, 20, 30))
assert len(s.filelist(TimeRange(startdate, enddate))) == 0
示例12: testFilesRange_sameDirectory_remote
def testFilesRange_sameDirectory_remote():
pattern = "http://solarmonitor.org/data/%Y/%m/%d/" "fits/{instrument}/" "{instrument}_00174_fd_%Y%m%d_%H%M%S.fts.gz"
s = Scraper(pattern, instrument="swap")
startdate = datetime.datetime(2014, 5, 14, 0, 0)
enddate = datetime.datetime(2014, 5, 14, 6, 30)
timerange = TimeRange(startdate, enddate)
assert len(s.filelist(timerange)) == 2
startdate = datetime.datetime(2014, 5, 14, 21, 0)
enddate = datetime.datetime(2014, 5, 14, 23, 30)
timerange = TimeRange(startdate, enddate)
assert len(s.filelist(timerange)) == 0
示例13: testExtractDates_notSeparators_andSimilar
def testExtractDates_notSeparators_andSimilar():
s = Scraper('data/%Y/Jun%b%d_%H%M%S')
testURL = 'data/2014/JunJun14_200135'
timeURL = parse_time((2014, 6, 14, 20, 1, 35))
assert s._extractDateURL(testURL) == timeURL
testURL = 'data/2014/JunMay14_200135'
timeURL = parse_time((2014, 5, 14, 20, 1, 35))
assert s._extractDateURL(testURL) == timeURL
# and testing with the month afterwards
s = Scraper('data/%Y/%dJun%b_%H%M%S')
testURL = 'data/2014/14JunJun_200135'
timeURL = parse_time((2014, 6, 14, 20, 1, 35))
assert s._extractDateURL(testURL) == timeURL
示例14: testFilesRange_sameDirectory_remote
def testFilesRange_sameDirectory_remote():
pattern = ('http://solarmonitor.org/data/%Y/%m/%d/'
'fits/{instrument}/'
'{instrument}_00174_fd_%Y%m%d_%H%M%S.fts.gz')
s = Scraper(pattern, instrument='swap')
startdate = parse_time((2014, 5, 14, 0, 0))
enddate = parse_time((2014, 5, 14, 6, 30))
timerange = TimeRange(startdate, enddate)
assert len(s.filelist(timerange)) == 2
startdate = parse_time((2014, 5, 14, 21, 0))
enddate = parse_time((2014, 5, 14, 23, 30))
timerange = TimeRange(startdate, enddate)
assert len(s.filelist(timerange)) == 0
示例15: _get_url_for_timerange
def _get_url_for_timerange(self, timerange, **kwargs):
"""
Returns list of URLS corresponding to value of input timerange.
Parameters
----------
timerange: `sunpy.time.TimeRange`
time range for which data is to be downloaded.
Returns
-------
urls : list
list of URLs corresponding to the requested time range
"""
# We allow queries with no Wavelength but error here so that the query
# does not get passed to VSO and spit out garbage.
if 'wavelength' not in kwargs.keys() or not kwargs['wavelength']:
raise ValueError("Queries to NORH should specify either 17GHz or 34GHz as a Wavelength."
"see https://solar.nro.nao.ac.jp/norh/doc/manuale/node65.html")
else:
wavelength = kwargs['wavelength']
# If wavelength is a single value GenericClient will have made it a
# Quantity in the kwargs.
if not isinstance(wavelength, u.Quantity):
raise ValueError("Wavelength to NORH must be one value not {}.".format(wavelength))
wavelength = wavelength.to(u.GHz, equivalencies=u.spectral())
if wavelength == 34 * u.GHz:
freq = 'tcz'
elif wavelength == 17 * u.GHz:
freq = 'tca'
else:
raise ValueError("NORH Data can be downloaded for 17GHz or 34GHz,"
" see https://solar.nro.nao.ac.jp/norh/doc/manuale/node65.html")
# If start of time range is before 00:00, converted to such, so
# files of the requested time ranger are included.
# This is done because the archive contains daily files.
if timerange.start.strftime('%M-%S') != '00-00':
timerange = TimeRange(timerange.start.strftime('%Y-%m-%d'),
timerange.end)
norh = Scraper(BASEURL, freq=freq)
# TODO: warn user that some files may have not been listed, like for example:
# tca160504_224657 on ftp://solar-pub.nao.ac.jp/pub/nsro/norh/data/tcx/2016/05/
# as it doesn't follow pattern.
return norh.filelist(timerange)