当前位置: 首页>>代码示例>>Python>>正文


Python Reporter.new_article方法代码示例

本文整理汇总了Python中reporter.Reporter.new_article方法的典型用法代码示例。如果您正苦于以下问题:Python Reporter.new_article方法的具体用法?Python Reporter.new_article怎么用?Python Reporter.new_article使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在reporter.Reporter的用法示例。


在下文中一共展示了Reporter.new_article方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: Checker

# 需要导入模块: from reporter import Reporter [as 别名]
# 或者: from reporter.Reporter import new_article [as 别名]
class Checker(object):
    def __init__(self, input_dir, output_dir=None, tmp_dir=None):
        self.input_dir = input_dir
        self.output_dir = output_dir
        self.tmp_dir = tmp_dir
        self.storer = Storer()
        self.name = self.__class__.__name__
        self.repok = Reporter(prefix="[%s - INFO] " % self.name)
        self.repok.new_article()
        self.reper = Reporter(prefix="[%s - ERROR] " % self.name)
        self.reper.new_article()

    def process(self):
        for cur_dir, cur_subdir, cur_files in os.walk(self.input_dir):
            for cur_file in cur_files:
                self.repok.new_article()
                self.reper.new_article()
                cur_rdf_path = cur_dir + os.sep + cur_file
                try:
                    self.repok.add_sentence("Processing '%s'" % cur_rdf_path)
                    g = self.storer.load(cur_rdf_path, tmp_dir=self.tmp_dir)
                    if self.output_dir is None:
                        self.repok.add_sentence("The RDF graph has been converted in TRIG as follows:\n%s"
                                                % g.serialize(format="trig"))
                    else:
                        if not os.path.exists(self.output_dir):
                            os.makedirs(self.output_dir)
                        output_file = self.output_dir + os.sep + "converted_" + cur_file + ".ttl"
                        self.repok.add_sentence("The RDF graph has been stored in %s"
                                                % (output_file, g.serialize(output_file, format="trig")))
                except Exception:
                    self.reper.add_sentence("The file '%s' doesn't contain RDF statements", False)
开发者ID:essepuntato,项目名称:opencitations,代码行数:34,代码来源:checker.py

示例2: Reporter

# 需要导入模块: from reporter import Reporter [as 别名]
# 或者: from reporter.Reporter import new_article [as 别名]
import os
import re
import rdflib
import shutil
import json
from reporter import Reporter
from rdflib import Graph, ConjunctiveGraph, URIRef, Literal
from rdflib.namespace import RDF, Namespace, RDFS
import csv
from support import find_paths
from datetime import datetime

context_path = "https://w3id.org/oc/corpus/context.json"
repok = Reporter(True, prefix="[fix_prov.py: INFO] ")
reperr = Reporter(True, prefix="[fix_prov.py: ERROR] ")
repok.new_article()
reperr.new_article()
context_json = {}
PROV = Namespace("http://www.w3.org/ns/prov#")
OCO = Namespace("https://w3id.org/oc/ontology/")
DCTERMS = Namespace("http://purl.org/dc/terms/")
CITO = Namespace("http://purl.org/spar/cito/")
DATACITE = Namespace("http://purl.org/spar/datacite/")
FRBR = Namespace("http://purl.org/vocab/frbr/core#")
LITERAL = Namespace("http://www.essepuntato.it/2010/06/literalreification/")


def create_citation_update_part(cur_subj_g, is_f=True):
    query_part = u""
    is_first = is_f
    
开发者ID:essepuntato,项目名称:opencitations,代码行数:32,代码来源:fix_prov_to_clashing_updates.py

示例3: GraphSet

# 需要导入模块: from reporter import Reporter [as 别名]
# 或者: from reporter.Reporter import new_article [as 别名]
class GraphSet(object):
    # Labels
    labels = {
        "ar": "agent role",
        "be": "bibliographic entry",
        "br": "bibliographic resource",
        "id": "identifier",
        "ra": "responsible agent",
        "re": "resource embodiment"
    }

    def __init__(self, base_iri, context_path, info_dir):
        self.r_count = 0
        # A list of rdflib.Graphs, one for subject entity
        self.g = []
        # The following variable maps a URIRef with the graph in the graph list related to them
        self.entity_g = {}
        # The following variable maps a URIRef with the related graph entity
        self.res_to_entity = {}
        self.base_iri = base_iri
        self.context_path = context_path
        self.cur_name = "SPACIN " + self.__class__.__name__

        # Graphs
        # The following structure of URL is quite important for the other classes
        # developed and should not be changed. The only part that can change is the
        # value of the base_iri
        self.g_ar = base_iri + "ar/"
        self.g_be = base_iri + "be/"
        self.g_br = base_iri + "br/"
        self.g_id = base_iri + "id/"
        self.g_ra = base_iri + "ra/"
        self.g_re = base_iri + "re/"

        # Local paths
        self.info_dir = info_dir
        self.ar_info_path = info_dir + "ar.txt"
        self.be_info_path = info_dir + "be.txt"
        self.br_info_path = info_dir + "br.txt"
        self.id_info_path = info_dir + "id.txt"
        self.ra_info_path = info_dir + "ra.txt"
        self.re_info_path = info_dir + "re.txt"

        self.reperr = Reporter(True)
        self.reperr.new_article()
        self.repok = Reporter(True)
        self.repok.new_article()

    def res_count(self):
        return self.r_count

    def get_entity(self, res):
        if res in self.res_to_entity:
            return self.res_to_entity[res]

    # Add resources related to bibliographic entities
    def add_ar(self, resp_agent, source_agent=None, source=None, res=None):
        return self._add(
            self.g_ar, GraphEntity.role_in_time, res, resp_agent,
            source_agent, source, self.ar_info_path, "ar")

    def add_be(self, resp_agent, source_agent=None, source=None, res=None):
        return self._add(
            self.g_be, GraphEntity.bibliographic_reference, res, resp_agent,
            source_agent, source, self.be_info_path, "be")

    def add_br(self, resp_agent, source_agent=None, source=None, res=None):
        return self._add(self.g_br, GraphEntity.expression, res, resp_agent,
                         source_agent, source, self.br_info_path, "br")

    def add_id(self, resp_agent, source_agent=None, source=None, res=None):
        return self._add(self.g_id, GraphEntity.identifier, res, resp_agent,
                         source_agent, source, self.id_info_path, "id")

    def add_ra(self, resp_agent, source_agent=None, source=None, res=None):
        return self._add(self.g_ra, GraphEntity.agent, res, resp_agent,
                         source_agent, source, self.ra_info_path, "ra")

    def add_re(self, resp_agent, source_agent=None, source=None, res=None):
        return self._add(
            self.g_re, GraphEntity.manifestation, res, resp_agent,
            source_agent, source, self.re_info_path, "re")

    def _add(self, graph_url, main_type, res, resp_agent, source_agent,
             source, info_file_path, short_name, list_of_entities=[]):
        cur_g = Graph(identifier=graph_url)
        self._set_ns(cur_g)
        self.g += [cur_g]

        # This is the case when 'res_or_resp_agent' is a resource. It allows one to create
        # the graph entity starting from and existing URIRef, without incrementing anything
        # at the graph set level. However, a new graph is created and reserved for such resource
        # and it is added to the graph set.
        if res is not None:
            return self._generate_entity(cur_g, res=res, resp_agent=resp_agent,
                                         source_agent=source_agent, source=source,
                                         list_of_entities=list_of_entities)
        # This is the case when 'res_or_resp_agent' is actually a string representing the name
        # of the responsible agent. In this case, a new individual will be created.
        else:
#.........这里部分代码省略.........
开发者ID:essepuntato,项目名称:opencitations,代码行数:103,代码来源:graphlib.py

示例4: FormatProcessor

# 需要导入模块: from reporter import Reporter [as 别名]
# 或者: from reporter.Reporter import new_article [as 别名]
class FormatProcessor(object):
    doi_pattern = "[^A-z0-9\.]([0-9]+\.[0-9]+(\.[0-9]+)*/[^%\"# \?<>{}\^\[\]`\|\\\+]+)"
    http_pattern = "(https?://([A-z]|[0-9]|%|&|\?|/|\.|_|~|-|:)+)"

    """This class is the abstract one for any kind of processors."""
    def __init__(self, base_iri, context_base, info_dir, entries, agent_id=None):
        if "doi" in entries:
            self.doi = entries["doi"].lower()
        else:
            self.doi = None
        if "pmid" in entries:
            self.pmid = entries["pmid"]
        else:
            self.pmid = None
        if "pmcid" in entries:
            self.pmcid = entries["pmcid"]
        else:
            self.pmcid = None
        if "url" in entries:
            self.url = entries["url"].lower()
        else:
            self.url = None
        if "curator" in entries:
            self.curator = entries["curator"]
        else:
            self.curator = None
        if "source" in entries:
            self.source = entries["source"]
        else:
            self.source = None
        if "source_provider" in entries:
            self.source_provider = entries["source_provider"]
        else:
            self.source_provider = None

        self.entries = entries["references"]
        self.name = "SPACIN " + self.__class__.__name__
        self.g_set = GraphSet(base_iri, context_base, info_dir)
        self.id = agent_id
        self.repok = Reporter(prefix="[%s - INFO] " % self.name)
        self.repok.new_article()
        self.reperr = Reporter(prefix="[%s - ERROR] " % self.name)
        self.reperr.new_article()

    def process(self):
        pass  # Implemented in the subclasses

    def graph_set(self):
        return self.g_set

    def graphs(self):
        return self.g_set.graphs()

    def message(self, mess):
        return "%s" % mess

    @staticmethod
    def clean_entry(entry):
        return quote(sa(re.sub(":", ",", entry)))

    @staticmethod
    def extract_data(string, pattern):
        if string is not None:
            result = re.search(pattern, string)
            if result:
                return result.group(1)

    @staticmethod
    def extract_doi(string):
        if string is not None:
            result = FormatProcessor.extract_data(string, FormatProcessor.doi_pattern)
            if result:
                result = re.sub("(\.|,)?$", "", result)

            return result

    @staticmethod
    def extract_url(string):
        if string is not None:
            result = FormatProcessor.extract_data(string, FormatProcessor.http_pattern)
            if result:
                result = re.sub("\\\\", "", re.sub("/?\.?$", "", result))

            return result
开发者ID:essepuntato,项目名称:opencitations,代码行数:86,代码来源:format_processor.py

示例5: Reporter

# 需要导入模块: from reporter import Reporter [as 别名]
# 或者: from reporter.Reporter import new_article [as 别名]
import glob
import json
from reporter import Reporter

if __name__ == "__main__":
    arg_parser = argparse.ArgumentParser(
        "find_prov_issues.py",
        description="This script allows one to identify the provenance item ids that have some issues.")
    arg_parser.add_argument("-i", "--input_dir", dest="i_dir", required=True,
                            help="The id directory where to look for issues.")
    arg_parser.add_argument("-o", "--output_file", dest="o_file", required=True,
                            help="The file where to write the results.")
    args = arg_parser.parse_args()
    
    rep = Reporter(True)
    rep.new_article()
    
    for cur_dir in glob.glob(args.i_dir + os.sep + "[0-9]*/[0-9]*/prov/"):
        se_file = cur_dir + os.sep + "se.json"
        ca_file = cur_dir + os.sep + "ca.json"
        
        with open(se_file) as f, open(ca_file) as g:
            cur_se = json.load(f)
            cur_ca = json.load(g)
            for item in cur_se["@graph"]:
                generated = []
                se_generated_by = []
                se_invalidated_by = []
                
                for cur_graph in item["@graph"]:
                    cur_generated = cur_graph["generated"]
开发者ID:essepuntato,项目名称:opencitations,代码行数:33,代码来源:find_prov_issues.py

示例6: Storer

# 需要导入模块: from reporter import Reporter [as 别名]
# 或者: from reporter.Reporter import new_article [as 别名]
class Storer(object):

    def __init__(self, graph_set=None, repok=None, reperr=None,
                 context_map={}, dir_split=0, n_file_item=1):
        self.dir_split = dir_split
        self.n_file_item = n_file_item
        self.context_map = context_map
        for context_url in context_map:
            context_file_path = context_map[context_url]
            with open(context_file_path) as f:
                context_json = json.load(f)
                self.context_map[context_url] = context_json

        if graph_set is None:
            self.g = []
        else:
            self.g = graph_set.graphs()
        if repok is None:
            self.repok = Reporter(prefix="[Storer: INFO] ")
        else:
            self.repok = repok
        if reperr is None:
            self.reperr = Reporter(prefix="[Storer: ERROR] ")
        else:
            self.reperr = reperr
        self.preface_query = ""

    def store_all(self, base_dir, base_iri, context_path, tmp_dir=None, g_set=[], override=False):
        for g in g_set:
            self.g += [g]

        self.repok.new_article()
        self.reperr.new_article()

        self.repok.add_sentence("Starting the process")

        processed_graphs = {}
        for cur_g in self.g:
            processed_graphs = self.store(cur_g, base_dir, base_iri, context_path, tmp_dir,
                                          override, processed_graphs, False)

        stored_graph_path = []
        for cur_file_path in processed_graphs:
            stored_graph_path += [cur_file_path]
            self.__store_in_file(processed_graphs[cur_file_path], cur_file_path, context_path)

        return stored_graph_path

    def upload_and_store(self, base_dir, triplestore_url, base_iri, context_path,
                         tmp_dir=None, g_set=[], override=False):

        stored_graph_path = self.store_all(base_dir, base_iri, context_path, tmp_dir, g_set, override)

        # Some graphs were not stored properly, then no one will be updloaded to the triplestore
        # but we highlights those ones that could be added in principle, by mentioning them
        # with a ".notupdloaded" marker
        if None in stored_graph_path:
            for file_path in stored_graph_path:
                # Create a marker for the file not uploaded in the triplestore
                open("%s.notuploaded" % file_path, "w").close()
                self.reperr.add_sentence("[6] "
                                         "The statements of in the JSON-LD file '%s' were not "
                                         "uploaded into the triplestore." % file_path)
        else:  # All the files have been stored
            self.upload_all(self.g, triplestore_url, base_dir)

    def __query(self, query_string, triplestore_url, n_statements=None, base_dir=None):
        if query_string != "":
            try:
                tp = SPARQLWrapper(triplestore_url)
                tp.setMethod('POST')
                tp.setQuery(query_string)
                tp.query()

                if n_statements is None:
                    self.repok.add_sentence(
                        "Triplestore updated by means of a SPARQL Update query.")
                else:
                    self.repok.add_sentence(
                        "Triplestore updated with %s more RDF statements." % n_statements)

                return True

            except Exception as e:
                self.reperr.add_sentence("[1] "
                                         "Graph was not loaded into the "
                                         "triplestore due to communication problems: %s" % str(e))
                if base_dir is not None:
                    tp_err_dir = base_dir + os.sep + "tp_err"
                    if not os.path.exists(tp_err_dir):
                        os.makedirs(tp_err_dir)
                    cur_file_err = tp_err_dir + os.sep + \
                                   datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f_not_uploaded.txt')
                    with io.open(cur_file_err, "w", encoding="utf-8") as f:
                        f.write(query_string)

        return False

    def upload_all(self, all_g, triplestore_url, base_dir):
        result = True
#.........这里部分代码省略.........
开发者ID:essepuntato,项目名称:opencitations,代码行数:103,代码来源:storer.py


注:本文中的reporter.Reporter.new_article方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。