本文整理汇总了Python中dlstats.utils.Downloader.get_filepath_and_response方法的典型用法代码示例。如果您正苦于以下问题:Python Downloader.get_filepath_and_response方法的具体用法?Python Downloader.get_filepath_and_response怎么用?Python Downloader.get_filepath_and_response使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dlstats.utils.Downloader
的用法示例。
在下文中一共展示了Downloader.get_filepath_and_response方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _load_dsd
# 需要导入模块: from dlstats.utils import Downloader [as 别名]
# 或者: from dlstats.utils.Downloader import get_filepath_and_response [as 别名]
def _load_dsd(self):
"""
#TODO: il y a une DSD pour chaque groupe de séries (soit environ 400),
- download 1 dsd partage par plusieurs dataset
- 668 datase
"""
url = "http://www.bdm.insee.fr/series/sdmx/datastructure/INSEE/%s?references=children" % self.dsd_id
download = Downloader(url=url,
filename="dsd-%s.xml" % self.dsd_id,
headers=SDMX_METADATA_HEADERS,
store_filepath=self.store_path,
use_existing_file=self.fetcher.use_existing_file,
client=self.fetcher.requests_client)
filepath, response = download.get_filepath_and_response()
if response:
if response.status_code == HTTP_ERROR_LONG_RESPONSE:
self._load_dsd_by_element()
return
elif response.status_code >= 400:
raise response.raise_for_status()
if not os.path.exists(filepath):
self._load_dsd_by_element()
return
self.fetcher.for_delete.append(filepath)
self.xml_dsd.process(filepath)
self._set_dataset()
示例2: _load_file
# 需要导入模块: from dlstats.utils import Downloader [as 别名]
# 或者: from dlstats.utils.Downloader import get_filepath_and_response [as 别名]
def _load_file(self):
filename = "data-%s.zip" % (self.dataset_code)
download = Downloader(
url=self.url,
filename=filename,
store_filepath=self.get_store_path(),
use_existing_file=self.fetcher.use_existing_file,
)
self.filepath, response = download.get_filepath_and_response()
if self.filepath:
self.fetcher.for_delete.append(self.filepath)
release_date_str = response.headers["Last-Modified"]
# Last-Modified: Tue, 05 Apr 2016 15:05:11 GMT
self.release_date = clean_datetime(datetime.strptime(release_date_str, "%a, %d %b %Y %H:%M:%S GMT"))
if self.dataset.last_update and self.dataset.last_update >= self.release_date:
comments = "update-date[%s]" % self.release_date
raise errors.RejectUpdatedDataset(
provider_name=self.provider_name, dataset_code=self.dataset_code, comments=comments
)
self.dataset.last_update = self.release_date
示例3: _load
# 需要导入模块: from dlstats.utils import Downloader [as 别名]
# 或者: from dlstats.utils.Downloader import get_filepath_and_response [as 别名]
def _load(self):
#TODO: DSD
"""
url = "xxx/%s" % self.dataset_code
download = Downloader(url=url,
filename="dataflow-%s.xml" % self.dataset_code)
self.xml_dsd.process(download.get_filepath())
"""
url = "https://www.destatis.de/sddsplus/%s.xml" % self.dataset_code
download = Downloader(url=url,
filename="data-%s.xml" % self.dataset_code)
self.xml_data = XMLData(provider_name=self.provider_name,
dataset_code=self.dataset_code,
ns_tag_data=self.ns_tag_data,
#dimension_keys=self.xml_dsd.dimension_keys
)
#TODO: response and exception
try:
filepath, response = download.get_filepath_and_response()
except requests.exceptions.HTTPError as err:
logger.critical("AUTRE ERREUR HTTP : %s" % err.response.status_code)
raise
self.rows = self.xml_data.process(filepath)
示例4: _get_data_by_dimension
# 需要导入模块: from dlstats.utils import Downloader [as 别名]
# 或者: from dlstats.utils.Downloader import get_filepath_and_response [as 别名]
def _get_data_by_dimension(self):
self.xml_data = XMLData(
provider_name=self.provider_name,
dataset_code=self.dataset_code,
xml_dsd=self.xml_dsd,
dsd_id=self.dsd_id,
frequencies_supported=FREQUENCIES_SUPPORTED,
)
dimension_keys, dimensions = self._get_dimensions_from_dsd()
position, _key, dimension_values = select_dimension(dimension_keys, dimensions)
count_dimensions = len(dimension_keys)
for dimension_value in dimension_values:
key = get_key_for_dimension(count_dimensions, position, dimension_value)
# http://sdw-wsrest.ecb.int/service/data/IEAQ/A............
url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % (self.dataset_code, key)
if not self._is_good_url(url, good_codes=[200, HTTP_ERROR_NOT_MODIFIED]):
print("bypass url[%s]" % url)
continue
headers = SDMX_DATA_HEADERS
filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
download = Downloader(
url=url,
filename=filename,
store_filepath=self.store_path,
headers=headers,
use_existing_file=self.fetcher.use_existing_file,
# client=self.fetcher.requests_client
)
filepath, response = download.get_filepath_and_response()
if filepath and os.path.exists(filepath):
self.fetcher.for_delete.append(filepath)
elif not filepath or not os.path.exists(filepath):
continue
if response:
self._add_url_cache(url, response.status_code)
elif response and response.status_code == HTTP_ERROR_NO_RESULT:
continue
elif response and response.status_code >= 400:
raise response.raise_for_status()
for row, err in self.xml_data.process(filepath):
yield row, err
yield None, None
示例5: _get_data_by_dimension
# 需要导入模块: from dlstats.utils import Downloader [as 别名]
# 或者: from dlstats.utils.Downloader import get_filepath_and_response [as 别名]
def _get_data_by_dimension(self):
dimension_keys, dimensions = self._get_dimensions_from_dsd()
choice = "avg"
if self.dataset_code in ["IPC-2015-COICOP"]:
choice = "max"
position, _key, dimension_values = select_dimension(dimension_keys,
dimensions,
choice=choice)
count_dimensions = len(dimension_keys)
logger.info("choice[%s] - filterkey[%s] - count[%s] - provider[%s] - dataset[%s]" % (choice, _key, len(dimension_values), self.provider_name, self.dataset_code))
for dimension_value in dimension_values:
'''Pour chaque valeur de la dimension, generer une key d'url'''
key = get_key_for_dimension(count_dimensions, position, dimension_value)
url = "http://www.bdm.insee.fr/series/sdmx/data/%s/%s" % (self.dataset_code, key)
if self._is_good_url(url) is False:
logger.warning("bypass not good url[%s]" % url)
continue
filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
download = Downloader(url=url,
filename=filename,
store_filepath=self.store_path,
use_existing_file=self.fetcher.use_existing_file,
#NOT USE FOR INSEE client=self.fetcher.requests_client
)
filepath, response = download.get_filepath_and_response()
if not response is None:
self._add_url_cache(url, response.status_code)
if filepath and os.path.exists(filepath):
self.fetcher.for_delete.append(filepath)
elif not filepath or not os.path.exists(filepath):
continue
if response and response.status_code == HTTP_ERROR_NO_RESULT:
continue
elif response and response.status_code >= 400:
raise response.raise_for_status()
for row, err in self.xml_data.process(filepath):
yield row, err
#self.dataset.update_database(save_only=True)
yield None, None
示例6: _load
# 需要导入模块: from dlstats.utils import Downloader [as 别名]
# 或者: from dlstats.utils.Downloader import get_filepath_and_response [as 别名]
def _load(self):
url = "http://sdw-wsrest.ecb.int/service/dataflow/ECB/%s" % self.dataset_code
download = Downloader(url=url,
filename="dataflow-%s.xml" % self.dataset_code,
headers=SDMX_METADATA_HEADERS)
self.xml_dsd.process(download.get_filepath())
self.dsd_id = self.xml_dsd.dsd_id
if not self.dsd_id:
msg = "DSD ID not found for provider[%s] - dataset[%s]" % (self.provider_name,
self.dataset_code)
raise Exception(msg)
url = "http://sdw-wsrest.ecb.int/service/datastructure/ECB/%s?references=children" % self.dsd_id
download = Downloader(url=url,
filename="dsd-%s.xml" % self.dataset_code,
headers=SDMX_METADATA_HEADERS)
self.xml_dsd.process(download.get_filepath())
self.dataset.name = self.xml_dsd.dataset_name
dimensions = OrderedDict()
for key, item in self.xml_dsd.dimensions.items():
dimensions[key] = item["dimensions"]
self.dimension_list.set_dict(dimensions)
attributes = OrderedDict()
for key, item in self.xml_dsd.attributes.items():
attributes[key] = item["values"]
self.attribute_list.set_dict(attributes)
url = "http://sdw-wsrest.ecb.int/service/data/%s" % self.dataset_code
download = Downloader(url=url,
filename="data-%s.xml" % self.dataset_code,
headers=SDMX_DATA_HEADERS)
self.xml_data = XMLData(provider_name=self.provider_name,
dataset_code=self.dataset_code,
dimension_keys=self.xml_dsd.dimension_keys)
#TODO: response and exception
try:
filepath, response = download.get_filepath_and_response()
except requests.exceptions.HTTPError as err:
logger.critical("AUTRE ERREUR HTTP : %s" % err.response.status_code)
raise
self.rows = self.xml_data.process(filepath)
示例7: _get_data_by_dimension
# 需要导入模块: from dlstats.utils import Downloader [as 别名]
# 或者: from dlstats.utils.Downloader import get_filepath_and_response [as 别名]
def _get_data_by_dimension(self):
dimension_keys, dimensions = get_dimensions_from_dsd(self.xml_dsd,
self.provider_name,
self.dataset_code)
choice = "avg"
if self.dataset_code in ["IPC-2015-COICOP"]:
choice = "max"
position, _key, dimension_values = select_dimension(dimension_keys,
dimensions,
choice=choice)
count_dimensions = len(dimension_keys)
for dimension_value in dimension_values:
'''Pour chaque valeur de la dimension, generer une key d'url'''
sdmx_key = []
for i in range(count_dimensions):
if i == position:
sdmx_key.append(dimension_value)
else:
sdmx_key.append(".")
key = "".join(sdmx_key)
url = "http://www.bdm.insee.fr/series/sdmx/data/%s/%s" % (self.dataset_code, key)
filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
download = Downloader(url=url,
filename=filename,
store_filepath=self.store_path,
#client=self.fetcher.requests_client
)
filepath, response = download.get_filepath_and_response()
if filepath:
self.fetcher.for_delete.append(filepath)
if response.status_code == HTTP_ERROR_NO_RESULT:
continue
elif response.status_code >= 400:
raise response.raise_for_status()
for row, err in self.xml_data.process(filepath):
yield row, err
#self.dataset.update_database(save_only=True)
yield None, None
示例8: _get_data_by_dimension
# 需要导入模块: from dlstats.utils import Downloader [as 别名]
# 或者: from dlstats.utils.Downloader import get_filepath_and_response [as 别名]
def _get_data_by_dimension(self):
dimension_keys, dimensions = get_dimensions_from_dsd(self.xml_dsd,
self.provider_name,
self.dataset_code)
position, _key, dimension_values = select_dimension(dimension_keys, dimensions, choice="max")
count_dimensions = len(dimension_keys)
for dimension_value in dimension_values:
'''Pour chaque valeur de la dimension, generer une key d'url'''
local_count = 0
sdmx_key = []
for i in range(count_dimensions):
if i == position:
sdmx_key.append(dimension_value)
else:
sdmx_key.append(".")
key = "".join(sdmx_key)
url = "%s/%s" % (self._get_url_data(), key)
filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
download = Downloader(url=url,
filename=filename,
store_filepath=self.store_path,
client=self.fetcher.requests_client)
filepath, response = download.get_filepath_and_response()
if filepath:
self.fetcher.for_delete.append(filepath)
if response.status_code >= 400 and response.status_code < 500:
continue
elif response.status_code >= 500:
raise response.raise_for_status()
for row, err in self.xml_data.process(filepath):
yield row, err
local_count += 1
if local_count >= 2999:
logger.warning("TODO: VRFY - series > 2999 for provider[IMF] - dataset[%s] - key[%s]" % (self.dataset_code, key))
#self.dataset.update_database(save_only=True)
yield None, None
示例9: _get_data_by_dimension
# 需要导入模块: from dlstats.utils import Downloader [as 别名]
# 或者: from dlstats.utils.Downloader import get_filepath_and_response [as 别名]
def _get_data_by_dimension(self):
self.xml_data = XMLData(provider_name=self.provider_name,
dataset_code=self.dataset_code,
xml_dsd=self.xml_dsd,
dsd_id=self.dataset_code,
frequencies_supported=FREQUENCIES_SUPPORTED)
dimension_keys, dimensions = self._get_dimensions_from_dsd()
position, _key, dimension_values = select_dimension(dimension_keys, dimensions, choice="max")
count_dimensions = len(dimension_keys)
for dimension_value in dimension_values:
sdmx_key = []
for i in range(count_dimensions):
if i == position:
sdmx_key.append(dimension_value)
else:
sdmx_key.append(".")
key = "".join(sdmx_key)
url = "%s/%s" % (self._get_url_data(), key)
filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
download = Downloader(url=url,
filename=filename,
store_filepath=self.store_path,
client=self.fetcher.requests_client
)
filepath, response = download.get_filepath_and_response()
if filepath:
self.fetcher.for_delete.append(filepath)
if response.status_code >= 400 and response.status_code < 500:
continue
elif response.status_code >= 500:
raise response.raise_for_status()
for row, err in self.xml_data.process(filepath):
yield row, err
#self.dataset.update_database(save_only=True)
yield None, None
示例10: _load
# 需要导入模块: from dlstats.utils import Downloader [as 别名]
# 或者: from dlstats.utils.Downloader import get_filepath_and_response [as 别名]
def _load(self):
self.dsd_id = self.dataset_code
url = "http://www.bdm.insee.fr/series/sdmx/datastructure/INSEE/%s?references=children" % self.dsd_id
download = Downloader(url=url,
filename="dsd-%s.xml" % self.dataset_code,
headers=SDMX_METADATA_HEADERS)
self.xml_dsd.process(download.get_filepath())
self.dataset.name = self.xml_dsd.dataset_name
dimensions = OrderedDict()
for key, item in self.xml_dsd.dimensions.items():
dimensions[key] = item["dimensions"]
self.dimension_list.set_dict(dimensions)
attributes = OrderedDict()
for key, item in self.xml_dsd.attributes.items():
attributes[key] = item["values"]
self.attribute_list.set_dict(attributes)
url = "http://www.bdm.insee.fr/series/sdmx/data/%s" % self.dataset_code
download = Downloader(url=url,
filename="data-%s.xml" % self.dataset_code,
headers=SDMX_DATA_HEADERS)
self.xml_data = XMLData(provider_name=self.provider_name,
dataset_code=self.dataset_code,
dimension_keys=self.xml_dsd.dimension_keys)
#TODO: response and exception
try:
filepath, response = download.get_filepath_and_response()
except requests.exceptions.HTTPError as err:
logger.critical("AUTRE ERREUR HTTP : %s" % err.response.status_code)
raise
self.rows = self.xml_data.process(filepath)
示例11: load_datas
# 需要导入模块: from dlstats.utils import Downloader [as 别名]
# 或者: from dlstats.utils.Downloader import get_filepath_and_response [as 别名]
def load_datas(self):
filename = "data-%s.zip" % (self.dataset_code)
download = Downloader(url=self.url,
filename=filename,
store_filepath=self.fetcher.store_path,
use_existing_file=self.fetcher.use_existing_file,
#client=self.fetcher.requests_client
)
filepath, response = download.get_filepath_and_response()
if response:
release_date_str = response.headers['Last-Modified']
#Last-Modified: Tue, 05 Apr 2016 15:05:11 GMT
self.release_date = datetime.strptime(release_date_str,
"%a, %d %b %Y %H:%M:%S GMT")
self._is_updated(self.release_date)
self.zipfile = zipfile.ZipFile(filepath)
self.excel_filenames = iter(self.zipfile.namelist())
示例12: _get_data_by_dimension
# 需要导入模块: from dlstats.utils import Downloader [as 别名]
# 或者: from dlstats.utils.Downloader import get_filepath_and_response [as 别名]
def _get_data_by_dimension(self):
self.xml_data = XMLData(provider_name=self.provider_name,
dataset_code=self.dataset_code,
xml_dsd=self.xml_dsd,
frequencies_supported=FREQUENCIES_SUPPORTED)
dimension_keys, dimensions = get_dimensions_from_dsd(self.xml_dsd,
self.provider_name,
self.dataset_code)
position, _key, dimension_values = select_dimension(dimension_keys, dimensions)
count_dimensions = len(dimension_keys)
for dimension_value in dimension_values:
sdmx_key = []
for i in range(count_dimensions):
if i == position:
sdmx_key.append(dimension_value)
else:
sdmx_key.append(".")
key = "".join(sdmx_key)
url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % (self.dataset_code, key)
headers = SDMX_DATA_HEADERS
last_modified = None
if self.dataset.metadata and "Last-Modified" in self.dataset.metadata:
headers["If-Modified-Since"] = self.dataset.metadata["Last-Modified"]
last_modified = self.dataset.metadata["Last-Modified"]
filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
download = Downloader(url=url,
filename=filename,
store_filepath=self.store_path,
headers=headers,
client=self.fetcher.requests_client)
filepath, response = download.get_filepath_and_response()
if filepath:
self.fetcher.for_delete.append(filepath)
if response.status_code == HTTP_ERROR_NOT_MODIFIED:
msg = "Reject dataset updated for provider[%s] - dataset[%s] - update-date[%s]"
logger.warning(msg % (self.provider_name, self.dataset_code, last_modified))
continue
elif response.status_code == HTTP_ERROR_NO_RESULT:
continue
elif response.status_code >= 400:
raise response.raise_for_status()
if "Last-Modified" in response.headers:
if not self.dataset.metadata:
self.dataset.metadata = {}
self.dataset.metadata["Last-Modified"] = response.headers["Last-Modified"]
for row, err in self.xml_data.process(filepath):
yield row, err
#self.dataset.update_database(save_only=True)
yield None, None