本文整理汇总了Python中superdesk.errors.ParserError类的典型用法代码示例。如果您正苦于以下问题:Python ParserError类的具体用法?Python ParserError怎么用?Python ParserError使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ParserError类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_message
def parse_message(self, tree, provider):
item = {}
try:
docdata = tree.find("head/docdata")
# set the default type.
item["type"] = ITEM_CLASS_TEXT
item["guid"] = item["uri"] = docdata.find("doc-id").get("id-string")
item["urgency"] = docdata.find("urgency").get("ed-urg", "5")
item["pubstatus"] = docdata.attrib.get("management-status", "usable")
item["firstcreated"] = get_norm_datetime(docdata.find("date.issue"))
item["versioncreated"] = get_norm_datetime(docdata.find("date.issue"))
item["expiry"] = get_norm_datetime(docdata.find("date.expire"))
item["subject"] = get_subjects(tree)
item["body_html"] = get_content(tree)
item["place"] = get_places(docdata)
item["keywords"] = get_keywords(docdata)
if docdata.find("ed-msg") is not None:
item["ednote"] = docdata.find("ed-msg").attrib.get("info")
item["headline"] = super().trim_headline(tree.find("body/body.head/hedline/hl1").text)
elem = tree.find("body/body.head/abstract")
item["abstract"] = elem.text if elem is not None else ""
elem = tree.find("body/body.head/dateline/location/city")
item["dateline"] = elem.text if elem is not None else ""
item["byline"] = get_byline(tree)
parse_meta(tree, item)
item.setdefault("word_count", get_word_count(item["body_html"]))
return item
except Exception as ex:
raise ParserError.nitfParserError(ex, provider)
示例2: parse_message
def parse_message(self, tree, provider):
item = {}
try:
docdata = tree.find('head/docdata')
# set the default type.
item['type'] = ITEM_CLASS_TEXT
item['guid'] = item['uri'] = docdata.find('doc-id').get('id-string')
item['urgency'] = docdata.find('urgency').get('ed-urg', '5')
item['pubstatus'] = docdata.attrib.get('management-status', 'usable')
item['firstcreated'] = get_norm_datetime(docdata.find('date.issue'))
item['versioncreated'] = get_norm_datetime(docdata.find('date.issue'))
item['expiry'] = get_norm_datetime(docdata.find('date.expire'))
item['subject'] = get_subjects(tree)
item['body_html'] = get_content(tree)
item['place'] = get_places(docdata)
item['keywords'] = get_keywords(docdata)
if docdata.find('ed-msg') is not None:
item['ednote'] = docdata.find('ed-msg').attrib.get('info')
item['headline'] = tree.find('body/body.head/hedline/hl1').text
elem = tree.find('body/body.head/abstract')
item['abstract'] = elem.text if elem is not None else ''
elem = tree.find('body/body.head/dateline/location/city')
item['dateline'] = elem.text if elem is not None else ''
item['byline'] = get_byline(tree)
parse_meta(tree, item)
item.setdefault('word_count', get_word_count(item['body_html']))
return item
except Exception as ex:
raise ParserError.nitfParserError(ex, provider)
示例3: _update
def _update(self, provider):
self.provider = provider
self.path = provider.get('config', {}).get('path', None)
if not self.path:
logger.info('No path')
return []
for filename in get_sorted_files(self.path, sort_by=FileSortAttributes.created):
try:
filepath = os.path.join(self.path, filename)
if os.path.isfile(filepath):
stat = os.lstat(filepath)
last_updated = datetime.fromtimestamp(stat.st_mtime, tz=utc)
if self.is_latest_content(last_updated, provider.get('last_updated')):
item = self.parser.parse_file(filepath, provider)
dpa_derive_dateline(item)
self.move_file(self.path, filename, provider=provider, success=True)
yield [item]
else:
self.move_file(self.path, filename, provider=provider, success=True)
except Exception as ex:
self.move_file(self.path, filename, provider=provider, success=False)
raise ParserError.parseFileError('DPA', filename, ex, provider)
示例4: _test
def _test(self, provider):
"""Test connection."""
config = provider.get('config', {})
xml = self._fetch_data(config, provider)
data = feedparser.parse(xml)
if data.bozo:
raise ParserError.parseMessageError(data.bozo_exception, provider)
示例5: parse
def parse(self, file_path, provider=None):
try:
item = {ITEM_TYPE: CONTENT_TYPE.TEXT}
with open(file_path, 'rb') as f:
lines = [line for line in f]
# parse first header line
m = re.match(b'\x16\x16\x01([a-z])([0-9]{4})\x1f([a-z-]+)', lines[0], flags=re.I)
if m:
item['provider_sequence'] = m.group(2).decode()
# parse second header line
m = re.match(
b'([a-z]) ([a-z])(\x13|\x14)(\x11|\x12) (am-|pm-|bc-)([a-z-]+)(.*) '
b'([0-9]{1,2})-([0-9]{1,2}) ([0-9]{4})',
lines[1], flags=re.I)
if m:
item['priority'] = self.map_priority(m.group(1).decode())
item['anpa_category'] = [{'qcode': m.group(2).decode()}]
item['word_count'] = int(m.group(10).decode())
if m.group(4) == b'\x12':
item[ITEM_TYPE] = CONTENT_TYPE.PREFORMATTED
# parse created date at the end of file
m = re.search(b'\x03([a-z]+)-([a-z]+)-([0-9]+-[0-9]+-[0-9]+ [0-9]{2}[0-9]{2})GMT', lines[-4], flags=re.I)
if m:
item['firstcreated'] = datetime.strptime(m.group(3).decode(), '%m-%d-%y %H%M').replace(tzinfo=utc)
# parse anpa content
body = b''.join(lines[2:])
m = re.match(b'\x02(.*)\x03', body, flags=re.M + re.S)
if m:
text = m.group(1).decode().split('\n')
# text
body_lines = [l.strip() for l in text if l.startswith('\t')]
item['body_text'] = '\n'.join(body_lines)
# content metadata
header_lines = [l.strip('^<= ') for l in text if l.startswith('^')]
if len(header_lines) > 3:
item['headline'] = header_lines[1]
item['byline'] = header_lines[-2]
# slugline
if len(header_lines) > 1:
m = re.match('[A-Z]{2}-[A-Z]{2}--([a-z-0-9]+)', header_lines[0], flags=re.I)
if m:
item['slugline'] = m.group(1)
# ednote
for line in header_lines:
m = re.search("EDITOR'S NOTE _(.*)", line)
if m:
item['ednote'] = m.group(1).strip()
return item
except Exception as ex:
raise ParserError.anpaParseFileError(file_path, ex)
示例6: _update
def _update(self, provider):
"""
Check data provider for data updates and returns new items (if any).
:param provider: data provider instance
:return: a list containing a list of new content items
:rtype: list
:raises IngestApiError: if data retrieval error occurs
:raises ParserError: if retrieved RSS data cannot be parsed
"""
config = provider.get('config', {})
if config.get('auth_required'):
self.auth_info = {
'username': config.get('username', ''),
'password': config.get('password', '')
}
try:
xml_data = self._fetch_data(config, provider)
data = feedparser.parse(xml_data)
except IngestApiError:
raise
except Exception as ex:
raise ParserError.parseMessageError(ex, provider)
# If provider last updated time is not available, set it to 1.1.1970
# so that it will be recognized as "not up to date".
# Also convert it to a naive datetime object (removing tzinfo is fine,
# because it is in UTC anyway)
t_provider_updated = provider.get('last_updated', utcfromtimestamp(0))
t_provider_updated = t_provider_updated.replace(tzinfo=None)
new_items = []
field_aliases = config.get('field_aliases')
for entry in data.entries:
t_entry_updated = utcfromtimestamp(timegm(entry.updated_parsed))
if t_entry_updated <= t_provider_updated:
continue
item = self._create_item(entry, field_aliases)
self.add_timestamps(item)
# If the RSS entry references any images, create picture items from
# them and create a package referencing them and the entry itself.
# If there are no image references, treat entry as a simple text
# item, even if it might reference other media types, e.g. videos.
image_urls = self._extract_image_links(entry)
if image_urls:
image_items = self._create_image_items(image_urls, item)
new_items.extend(image_items)
new_items.append(item)
item = self._create_package(item, image_items)
new_items.append(item)
return [new_items]
示例7: _update
def _update(self, provider):
self.provider = provider
self.path = provider.get('config', {}).get('path', None)
if not self.path:
return
for filename in get_sorted_files(self.path, sort_by=FileSortAttributes.created):
try:
if os.path.isfile(os.path.join(self.path, filename)):
filepath = os.path.join(self.path, filename)
stat = os.lstat(filepath)
last_updated = datetime.fromtimestamp(stat.st_mtime, tz=utc)
if self.is_latest_content(last_updated, provider.get('last_updated')):
with open(os.path.join(self.path, filename), 'r') as f:
item = self.parser.parse_message(etree.fromstring(f.read()), provider)
self.add_timestamps(item)
self.move_file(self.path, filename, provider=provider, success=True)
yield [item]
else:
self.move_file(self.path, filename, provider=provider, success=True)
except etreeParserError as ex:
logger.exception("Ingest Type: AFP - File: {0} could not be processed".format(filename), ex)
self.move_file(self.path, filename, provider=provider, success=False)
raise ParserError.newsmlOneParserError(ex, provider)
except ParserError as ex:
self.move_file(self.path, filename, provider=provider, success=False)
except Exception as ex:
self.move_file(self.path, filename, provider=provider, success=False)
raise ProviderError.ingestError(ex, provider)
push_notification('ingest:update')
示例8: parse
def parse(self, xml, provider=None):
item = {}
try:
self.root = xml
parsed_el = xml.find('NewsItem/NewsComponent/AdministrativeMetadata/Source')
if parsed_el is not None:
item['original_source'] = parsed_el.find('Party').get('FormalName', '')
parsed_el = xml.find('NewsEnvelope/TransmissionId')
if parsed_el is not None:
item['ingest_provider_sequence'] = parsed_el.text
parsed_el = xml.find('NewsEnvelope/Priority')
item['priority'] = self.map_priority(parsed_el.text if parsed_el is not None else None)
self.parse_news_identifier(item, xml)
self.parse_newslines(item, xml)
self.parse_news_management(item, xml)
parsed_el = xml.findall('NewsItem/NewsComponent/DescriptiveMetadata/Language')
if parsed_el is not None:
language = self.parse_attributes_as_dictionary(parsed_el)
item['language'] = language[0]['FormalName'] if len(language) else ''
keywords = xml.findall('NewsItem/NewsComponent/DescriptiveMetadata/Property')
item['keywords'] = self.parse_attribute_values(keywords, 'Keyword')
subjects = xml.findall('NewsItem/NewsComponent/DescriptiveMetadata/SubjectCode/SubjectDetail')
subjects += xml.findall('NewsItem/NewsComponent/DescriptiveMetadata/SubjectCode/SubjectMatter')
subjects += xml.findall('NewsItem/NewsComponent/DescriptiveMetadata/SubjectCode/Subject')
item['subject'] = self.format_subjects(subjects)
# item['ContentItem'] = self.parse_attributes_as_dictionary(
# tree.find('NewsItem/NewsComponent/ContentItem'))
# item['Content'] = etree.tostring(
# tree.find('NewsItem/NewsComponent/ContentItem/DataContent/nitf/body/body.content'))
item['body_html'] = etree.tostring(
xml.find('NewsItem/NewsComponent/ContentItem/DataContent/nitf/body/body.content'),
encoding='unicode').replace('<body.content>', '').replace('</body.content>', '')
parsed_el = xml.findall('NewsItem/NewsComponent/ContentItem/Characteristics/Property')
characteristics = self.parse_attribute_values(parsed_el, 'Words')
item['word_count'] = characteristics[0] if len(characteristics) else None
parsed_el = xml.find('NewsItem/NewsComponent/RightsMetadata/UsageRights/UsageType')
if parsed_el is not None:
item.setdefault('usageterms', parsed_el.text)
parsed_el = xml.findall('NewsItem/NewsComponent/DescriptiveMetadata/Genre')
if parsed_el is not None:
item['genre'] = []
for el in parsed_el:
item['genre'].append({'name': el.get('FormalName')})
return self.populate_fields(item)
except Exception as ex:
raise ParserError.newsmlOneParserError(ex, provider)
示例9: parse
def parse(self, xml, provider=None):
item = {ITEM_TYPE: CONTENT_TYPE.TEXT, # set the default type.
}
try:
self.do_mapping(item, xml, namespaces=NS)
except Exception as ex:
raise ParserError.parseMessageError(ex, provider)
return item
示例10: parse_message
def parse_message(self, tree, provider):
"""Parse NewsMessage."""
items = []
try:
self.root = tree
for item_set in tree.findall(self.qname("itemSet")):
for item_tree in item_set:
item = self.parse_item(item_tree)
items.append(item)
return items
except Exception as ex:
raise ParserError.newsmlTwoParserError(ex, provider)
示例11: parse_file
def parse_file(self, filename, provider):
try:
path = provider.get('config', {}).get('path', None)
if not path:
return []
item = self.parser.parse_file(os.path.join(path, filename), provider)
return [item]
except Exception as ex:
raise ParserError.parseFileError('Teletype', filename, ex, provider)
示例12: parse
def parse(self, xml, provider=None):
item = {}
try:
docdata = xml.find('head/docdata')
# set the default type.
item[ITEM_TYPE] = CONTENT_TYPE.TEXT
item['guid'] = item['uri'] = docdata.find('doc-id').get('id-string')
if docdata.find('urgency') is not None:
item['urgency'] = int(docdata.find('urgency').get('ed-urg', '5'))
item['pubstatus'] = (docdata.attrib.get('management-status', 'usable')).lower()
item['firstcreated'] = get_norm_datetime(docdata.find('date.issue'))
item['versioncreated'] = get_norm_datetime(docdata.find('date.issue'))
if docdata.find('date.expire') is not None:
item['expiry'] = get_norm_datetime(docdata.find('date.expire'))
item['subject'] = get_subjects(xml)
item['body_html'] = get_content(xml)
item['place'] = get_places(docdata)
item['keywords'] = get_keywords(docdata)
if xml.find('head/tobject/tobject.property') is not None:
genre = xml.find('head/tobject/tobject.property').get('tobject.property.type')
genre_map = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='genre')
if genre_map is not None:
item['genre'] = [x for x in genre_map.get('items', []) if x['name'] == genre]
if docdata.find('ed-msg') is not None:
item['ednote'] = docdata.find('ed-msg').attrib.get('info')
if xml.find('body/body.head/hedline/hl1') is not None:
item['headline'] = xml.find('body/body.head/hedline/hl1').text
else:
if xml.find('head/title') is not None:
item['headline'] = xml.find('head/title').text
elem = xml.find('body/body.head/abstract/p')
item['abstract'] = elem.text if elem is not None else ''
if elem is None:
elem = xml.find('body/body.head/abstract')
item['abstract'] = elem.text if elem is not None else ''
elem = xml.find('body/body.head/dateline/location/city')
if elem is not None:
self.set_dateline(item, city=elem.text)
item['byline'] = get_byline(xml)
parse_meta(xml, item)
item.setdefault('word_count', get_word_count(item['body_html']))
return item
except Exception as ex:
raise ParserError.nitfParserError(ex, provider)
示例13: test_parse_message_error_save_data
def test_parse_message_error_save_data(self):
data = 'some data'
with assert_raises(ParserError):
try:
raise Exception("Err message")
except Exception as ex:
raise ParserError.parseMessageError(ex, self.provider, data=data)
self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
message = self.mock_logger_handler.messages['error'][0]
self.assertIn('file=', message)
filename = message.split('file=')[1]
with open(filename, 'r') as file:
self.assertEqual(data, file.read())
示例14: test_raise_parseMessageError
def test_raise_parseMessageError(self):
with assert_raises(ParserError) as error_context:
ex = Exception("Testing parseMessageError")
raise ParserError.parseMessageError(ex, self.provider)
exception = error_context.exception
self.assertTrue(exception.code == 1001)
self.assertTrue(exception.message == "Message could not be parsed")
self.assertIsNotNone(exception.system_exception)
self.assertEqual(exception.system_exception.args[0], "Testing parseMessageError")
self.assertEqual(len(self.mock_logger_handler.messages['error']), 1)
self.assertEqual(self.mock_logger_handler.messages['error'][0],
"ParserError Error 1001 - Message could not be parsed: "
"Testing parseMessageError on channel TestProvider")
示例15: _update
def _update(self, provider, update):
config = provider.get('config', {})
json_items = self._fetch_data(config, provider)
parsed_items = []
for item in json_items:
try:
parser = self.get_feed_parser(provider, item)
parsed_items.append(parser.parse(item))
except Exception as ex:
raise ParserError.parseMessageError(ex, provider, data=item)
return parsed_items