本文整理汇总了Python中reporter.Reporter.new_article方法的典型用法代码示例。如果您正苦于以下问题:Python Reporter.new_article方法的具体用法?Python Reporter.new_article怎么用?Python Reporter.new_article使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类reporter.Reporter
的用法示例。
在下文中一共展示了Reporter.new_article方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Checker
# 需要导入模块: from reporter import Reporter [as 别名]
# 或者: from reporter.Reporter import new_article [as 别名]
class Checker(object):
def __init__(self, input_dir, output_dir=None, tmp_dir=None):
self.input_dir = input_dir
self.output_dir = output_dir
self.tmp_dir = tmp_dir
self.storer = Storer()
self.name = self.__class__.__name__
self.repok = Reporter(prefix="[%s - INFO] " % self.name)
self.repok.new_article()
self.reper = Reporter(prefix="[%s - ERROR] " % self.name)
self.reper.new_article()
def process(self):
for cur_dir, cur_subdir, cur_files in os.walk(self.input_dir):
for cur_file in cur_files:
self.repok.new_article()
self.reper.new_article()
cur_rdf_path = cur_dir + os.sep + cur_file
try:
self.repok.add_sentence("Processing '%s'" % cur_rdf_path)
g = self.storer.load(cur_rdf_path, tmp_dir=self.tmp_dir)
if self.output_dir is None:
self.repok.add_sentence("The RDF graph has been converted in TRIG as follows:\n%s"
% g.serialize(format="trig"))
else:
if not os.path.exists(self.output_dir):
os.makedirs(self.output_dir)
output_file = self.output_dir + os.sep + "converted_" + cur_file + ".ttl"
self.repok.add_sentence("The RDF graph has been stored in %s"
% (output_file, g.serialize(output_file, format="trig")))
except Exception:
self.reper.add_sentence("The file '%s' doesn't contain RDF statements", False)
示例2: Reporter
# 需要导入模块: from reporter import Reporter [as 别名]
# 或者: from reporter.Reporter import new_article [as 别名]
import os
import re
import rdflib
import shutil
import json
from reporter import Reporter
from rdflib import Graph, ConjunctiveGraph, URIRef, Literal
from rdflib.namespace import RDF, Namespace, RDFS
import csv
from support import find_paths
from datetime import datetime
context_path = "https://w3id.org/oc/corpus/context.json"
repok = Reporter(True, prefix="[fix_prov.py: INFO] ")
reperr = Reporter(True, prefix="[fix_prov.py: ERROR] ")
repok.new_article()
reperr.new_article()
context_json = {}
PROV = Namespace("http://www.w3.org/ns/prov#")
OCO = Namespace("https://w3id.org/oc/ontology/")
DCTERMS = Namespace("http://purl.org/dc/terms/")
CITO = Namespace("http://purl.org/spar/cito/")
DATACITE = Namespace("http://purl.org/spar/datacite/")
FRBR = Namespace("http://purl.org/vocab/frbr/core#")
LITERAL = Namespace("http://www.essepuntato.it/2010/06/literalreification/")
def create_citation_update_part(cur_subj_g, is_f=True):
query_part = u""
is_first = is_f
示例3: GraphSet
# 需要导入模块: from reporter import Reporter [as 别名]
# 或者: from reporter.Reporter import new_article [as 别名]
class GraphSet(object):
# Labels
labels = {
"ar": "agent role",
"be": "bibliographic entry",
"br": "bibliographic resource",
"id": "identifier",
"ra": "responsible agent",
"re": "resource embodiment"
}
def __init__(self, base_iri, context_path, info_dir):
self.r_count = 0
# A list of rdflib.Graphs, one for subject entity
self.g = []
# The following variable maps a URIRef with the graph in the graph list related to them
self.entity_g = {}
# The following variable maps a URIRef with the related graph entity
self.res_to_entity = {}
self.base_iri = base_iri
self.context_path = context_path
self.cur_name = "SPACIN " + self.__class__.__name__
# Graphs
# The following structure of URL is quite important for the other classes
# developed and should not be changed. The only part that can change is the
# value of the base_iri
self.g_ar = base_iri + "ar/"
self.g_be = base_iri + "be/"
self.g_br = base_iri + "br/"
self.g_id = base_iri + "id/"
self.g_ra = base_iri + "ra/"
self.g_re = base_iri + "re/"
# Local paths
self.info_dir = info_dir
self.ar_info_path = info_dir + "ar.txt"
self.be_info_path = info_dir + "be.txt"
self.br_info_path = info_dir + "br.txt"
self.id_info_path = info_dir + "id.txt"
self.ra_info_path = info_dir + "ra.txt"
self.re_info_path = info_dir + "re.txt"
self.reperr = Reporter(True)
self.reperr.new_article()
self.repok = Reporter(True)
self.repok.new_article()
def res_count(self):
return self.r_count
def get_entity(self, res):
if res in self.res_to_entity:
return self.res_to_entity[res]
# Add resources related to bibliographic entities
def add_ar(self, resp_agent, source_agent=None, source=None, res=None):
return self._add(
self.g_ar, GraphEntity.role_in_time, res, resp_agent,
source_agent, source, self.ar_info_path, "ar")
def add_be(self, resp_agent, source_agent=None, source=None, res=None):
return self._add(
self.g_be, GraphEntity.bibliographic_reference, res, resp_agent,
source_agent, source, self.be_info_path, "be")
def add_br(self, resp_agent, source_agent=None, source=None, res=None):
return self._add(self.g_br, GraphEntity.expression, res, resp_agent,
source_agent, source, self.br_info_path, "br")
def add_id(self, resp_agent, source_agent=None, source=None, res=None):
return self._add(self.g_id, GraphEntity.identifier, res, resp_agent,
source_agent, source, self.id_info_path, "id")
def add_ra(self, resp_agent, source_agent=None, source=None, res=None):
return self._add(self.g_ra, GraphEntity.agent, res, resp_agent,
source_agent, source, self.ra_info_path, "ra")
def add_re(self, resp_agent, source_agent=None, source=None, res=None):
return self._add(
self.g_re, GraphEntity.manifestation, res, resp_agent,
source_agent, source, self.re_info_path, "re")
def _add(self, graph_url, main_type, res, resp_agent, source_agent,
source, info_file_path, short_name, list_of_entities=[]):
cur_g = Graph(identifier=graph_url)
self._set_ns(cur_g)
self.g += [cur_g]
# This is the case when 'res_or_resp_agent' is a resource. It allows one to create
# the graph entity starting from and existing URIRef, without incrementing anything
# at the graph set level. However, a new graph is created and reserved for such resource
# and it is added to the graph set.
if res is not None:
return self._generate_entity(cur_g, res=res, resp_agent=resp_agent,
source_agent=source_agent, source=source,
list_of_entities=list_of_entities)
# This is the case when 'res_or_resp_agent' is actually a string representing the name
# of the responsible agent. In this case, a new individual will be created.
else:
#.........这里部分代码省略.........
示例4: FormatProcessor
# 需要导入模块: from reporter import Reporter [as 别名]
# 或者: from reporter.Reporter import new_article [as 别名]
class FormatProcessor(object):
doi_pattern = "[^A-z0-9\.]([0-9]+\.[0-9]+(\.[0-9]+)*/[^%\"# \?<>{}\^\[\]`\|\\\+]+)"
http_pattern = "(https?://([A-z]|[0-9]|%|&|\?|/|\.|_|~|-|:)+)"
"""This class is the abstract one for any kind of processors."""
def __init__(self, base_iri, context_base, info_dir, entries, agent_id=None):
if "doi" in entries:
self.doi = entries["doi"].lower()
else:
self.doi = None
if "pmid" in entries:
self.pmid = entries["pmid"]
else:
self.pmid = None
if "pmcid" in entries:
self.pmcid = entries["pmcid"]
else:
self.pmcid = None
if "url" in entries:
self.url = entries["url"].lower()
else:
self.url = None
if "curator" in entries:
self.curator = entries["curator"]
else:
self.curator = None
if "source" in entries:
self.source = entries["source"]
else:
self.source = None
if "source_provider" in entries:
self.source_provider = entries["source_provider"]
else:
self.source_provider = None
self.entries = entries["references"]
self.name = "SPACIN " + self.__class__.__name__
self.g_set = GraphSet(base_iri, context_base, info_dir)
self.id = agent_id
self.repok = Reporter(prefix="[%s - INFO] " % self.name)
self.repok.new_article()
self.reperr = Reporter(prefix="[%s - ERROR] " % self.name)
self.reperr.new_article()
def process(self):
pass # Implemented in the subclasses
def graph_set(self):
return self.g_set
def graphs(self):
return self.g_set.graphs()
def message(self, mess):
return "%s" % mess
@staticmethod
def clean_entry(entry):
return quote(sa(re.sub(":", ",", entry)))
@staticmethod
def extract_data(string, pattern):
if string is not None:
result = re.search(pattern, string)
if result:
return result.group(1)
@staticmethod
def extract_doi(string):
if string is not None:
result = FormatProcessor.extract_data(string, FormatProcessor.doi_pattern)
if result:
result = re.sub("(\.|,)?$", "", result)
return result
@staticmethod
def extract_url(string):
if string is not None:
result = FormatProcessor.extract_data(string, FormatProcessor.http_pattern)
if result:
result = re.sub("\\\\", "", re.sub("/?\.?$", "", result))
return result
示例5: Reporter
# 需要导入模块: from reporter import Reporter [as 别名]
# 或者: from reporter.Reporter import new_article [as 别名]
import glob
import json
from reporter import Reporter
if __name__ == "__main__":
arg_parser = argparse.ArgumentParser(
"find_prov_issues.py",
description="This script allows one to identify the provenance item ids that have some issues.")
arg_parser.add_argument("-i", "--input_dir", dest="i_dir", required=True,
help="The id directory where to look for issues.")
arg_parser.add_argument("-o", "--output_file", dest="o_file", required=True,
help="The file where to write the results.")
args = arg_parser.parse_args()
rep = Reporter(True)
rep.new_article()
for cur_dir in glob.glob(args.i_dir + os.sep + "[0-9]*/[0-9]*/prov/"):
se_file = cur_dir + os.sep + "se.json"
ca_file = cur_dir + os.sep + "ca.json"
with open(se_file) as f, open(ca_file) as g:
cur_se = json.load(f)
cur_ca = json.load(g)
for item in cur_se["@graph"]:
generated = []
se_generated_by = []
se_invalidated_by = []
for cur_graph in item["@graph"]:
cur_generated = cur_graph["generated"]
示例6: Storer
# 需要导入模块: from reporter import Reporter [as 别名]
# 或者: from reporter.Reporter import new_article [as 别名]
class Storer(object):
def __init__(self, graph_set=None, repok=None, reperr=None,
context_map={}, dir_split=0, n_file_item=1):
self.dir_split = dir_split
self.n_file_item = n_file_item
self.context_map = context_map
for context_url in context_map:
context_file_path = context_map[context_url]
with open(context_file_path) as f:
context_json = json.load(f)
self.context_map[context_url] = context_json
if graph_set is None:
self.g = []
else:
self.g = graph_set.graphs()
if repok is None:
self.repok = Reporter(prefix="[Storer: INFO] ")
else:
self.repok = repok
if reperr is None:
self.reperr = Reporter(prefix="[Storer: ERROR] ")
else:
self.reperr = reperr
self.preface_query = ""
def store_all(self, base_dir, base_iri, context_path, tmp_dir=None, g_set=[], override=False):
for g in g_set:
self.g += [g]
self.repok.new_article()
self.reperr.new_article()
self.repok.add_sentence("Starting the process")
processed_graphs = {}
for cur_g in self.g:
processed_graphs = self.store(cur_g, base_dir, base_iri, context_path, tmp_dir,
override, processed_graphs, False)
stored_graph_path = []
for cur_file_path in processed_graphs:
stored_graph_path += [cur_file_path]
self.__store_in_file(processed_graphs[cur_file_path], cur_file_path, context_path)
return stored_graph_path
def upload_and_store(self, base_dir, triplestore_url, base_iri, context_path,
tmp_dir=None, g_set=[], override=False):
stored_graph_path = self.store_all(base_dir, base_iri, context_path, tmp_dir, g_set, override)
# Some graphs were not stored properly, then no one will be updloaded to the triplestore
# but we highlights those ones that could be added in principle, by mentioning them
# with a ".notupdloaded" marker
if None in stored_graph_path:
for file_path in stored_graph_path:
# Create a marker for the file not uploaded in the triplestore
open("%s.notuploaded" % file_path, "w").close()
self.reperr.add_sentence("[6] "
"The statements of in the JSON-LD file '%s' were not "
"uploaded into the triplestore." % file_path)
else: # All the files have been stored
self.upload_all(self.g, triplestore_url, base_dir)
def __query(self, query_string, triplestore_url, n_statements=None, base_dir=None):
if query_string != "":
try:
tp = SPARQLWrapper(triplestore_url)
tp.setMethod('POST')
tp.setQuery(query_string)
tp.query()
if n_statements is None:
self.repok.add_sentence(
"Triplestore updated by means of a SPARQL Update query.")
else:
self.repok.add_sentence(
"Triplestore updated with %s more RDF statements." % n_statements)
return True
except Exception as e:
self.reperr.add_sentence("[1] "
"Graph was not loaded into the "
"triplestore due to communication problems: %s" % str(e))
if base_dir is not None:
tp_err_dir = base_dir + os.sep + "tp_err"
if not os.path.exists(tp_err_dir):
os.makedirs(tp_err_dir)
cur_file_err = tp_err_dir + os.sep + \
datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f_not_uploaded.txt')
with io.open(cur_file_err, "w", encoding="utf-8") as f:
f.write(query_string)
return False
def upload_all(self, all_g, triplestore_url, base_dir):
result = True
#.........这里部分代码省略.........