当前位置: 首页>>代码示例>>Python>>正文


Python HTCondorUtils类代码示例

本文整理汇总了Python中HTCondorUtils的典型用法代码示例。如果您正苦于以下问题:Python HTCondorUtils类的具体用法?Python HTCondorUtils怎么用?Python HTCondorUtils使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了HTCondorUtils类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: killAll

    def killAll(self):

        # We need to keep ROOT DAG in hold until periodic remove kicks in.
        # See DagmanSubmitter.py#L390 (dagAd["PeriodicRemove"])
        # This is needed in case user wants to resubmit.
        rootConst = "TaskType =?= \"ROOT\" && CRAB_ReqName =?= %s" % HTCondorUtils.quote(self.workflow)
        # Holding DAG job does not mean that it will remove all jobs
        # and this must be done separately
        # --------------------------------------
        # From HTCondor documentation
        # http://research.cs.wisc.edu/htcondor/manual/v8.3/2_10DAGMan_Applications.html#SECTION003107000000000000000
        # --------------------------------------
        # After placing the condor_dagman job on hold, no new node jobs will be submitted,
        # and no PRE or POST scripts will be run. Any node jobs already in the HTCondor queue
        # will continue undisturbed. If the condor_dagman job is left on hold, it will remain
        # in the HTCondor queue after all of the currently running node jobs are finished.
        # --------------------------------------
        # TODO: Remove jobConst query when htcondor ticket is solved
        # https://htcondor-wiki.cs.wisc.edu/index.cgi/tktview?tn=5175
        jobConst = "TaskType =!= \"ROOT\" && CRAB_ReqName =?= %s" % HTCondorUtils.quote(self.workflow)

        with HTCondorUtils.AuthenticatedSubprocess(self.proxy) as (parent, rpipe):
            if not parent:
                with self.schedd.transaction() as tsc:
                    self.schedd.act(htcondor.JobAction.Hold, rootConst)
                    self.schedd.act(htcondor.JobAction.Remove, jobConst)
        results = rpipe.read()
        if results != "OK":
            raise TaskWorkerException("The CRAB3 server backend could not kill the task because the Grid scheduler answered with an error\n"\
                                      "This is probably a temporary glitch, please try it again and contact an expert if the error persist\n"+\
                                      "Error reason %s" % results)
开发者ID:nizamyusli,项目名称:CRABServer,代码行数:31,代码来源:DagmanKiller.py

示例2: getScheddObj

 def getScheddObj(self, name):
     """
     Return a tuple (schedd, address) containing an object representing the
     remote schedd and its corresponding address.
     Still required for OLD tasks. Remove it later TODO
     """
     info = name.split("_")
     if len(info) > 3:
         name = info[2]
     else:
         name = self.getSchedd()
     if name == "localhost":
         schedd = htcondor.Schedd()
         with open(htcondor.param['SCHEDD_ADDRESS_FILE']) as fd:
             address = fd.read().split("\n")[0]
     else:
         info = name.split(":")
         pool = "localhost"
         if len(info) == 3:
             pool = info[1]
         htcondor.param['COLLECTOR_HOST'] = self.getCollector(pool)
         coll = htcondor.Collector()
         schedds = coll.query(htcondor.AdTypes.Schedd, 'regexp(%s, Name)' % HTCondorUtils.quote(info[0]))
         self.scheddAd = ""
         if not schedds:
             self.scheddAd = self.getCachedCollectorOutput(info[0])
         else:
             self.cacheCollectorOutput(info[0], schedds[0])
             self.scheddAd = self.getCachedCollectorOutput(info[0])
         address = self.scheddAd['MyAddress']
         schedd = htcondor.Schedd(self.scheddAd)
     return schedd, address
开发者ID:mialiu149,项目名称:CRABServer,代码行数:32,代码来源:HTCondorLocator.py

示例3: getScheddObj

 def getScheddObj(self, name):
     """
     Return a tuple (schedd, address) containing an object representing the
     remote schedd and its corresponding address.
     """
     info = name.split("_")
     if len(info) > 3:
         name = info[2]
     else:
         name = self.getSchedd()
     if name == "localhost":
         schedd = htcondor.Schedd()
         with open(htcondor.param['SCHEDD_ADDRESS_FILE']) as fd:
             address = fd.read().split("\n")[0]
     else:
         info = name.split(":")
         pool = "localhost"
         if len(info) == 3:
             pool = info[1]
         htcondor.param['COLLECTOR_HOST'] = self.getCollector(pool)
         coll = htcondor.Collector()
         schedds = coll.query(htcondor.AdTypes.Schedd, 'regexp(%s, Name)' % HTCondorUtils.quote(info[0]))
         if not schedds:
             raise Exception("Unable to locate schedd %s" % info[0])
         self.scheddAd = schedds[0]
         address = self.scheddAd['MyAddress']
         schedd = htcondor.Schedd(self.scheddAd)
     return schedd, address
开发者ID:khurtado,项目名称:CRABServer,代码行数:28,代码来源:HTCondorLocator.py

示例4: executeInternal

    def executeInternal(self, apmon, *args, **kwargs):
        #Marco: I guess these value errors only happens for development instances
        if 'task' not in kwargs:
            raise ValueError("No task specified.")
        self.task = kwargs['task']
        if 'tm_taskname' not in self.task:
            raise ValueError("No taskname specified")
        self.workflow = self.task['tm_taskname']
        if 'user_proxy' not in self.task:
            raise ValueError("No proxy provided")
        self.proxy = self.task['user_proxy']

        self.logger.info("About to kill workflow: %s." % self.workflow)

        self.workflow = str(self.workflow)
        if not WORKFLOW_RE.match(self.workflow):
            raise Exception("Invalid workflow name.")

        # Query HTCondor for information about running jobs and update Dashboard appropriately
        if self.task['tm_collector']:
            self.backendurls['htcondorPool'] = self.task['tm_collector']
        loc = HTCondorLocator.HTCondorLocator(self.backendurls)

        address = ""
        try:
            self.schedd, address = loc.getScheddObjNew(self.task['tm_schedd'])
        except Exception as exp:
            msg  = "The CRAB server backend was not able to contact the Grid scheduler."
            msg += " Please try again later."
            msg += " If the error persists send an e-mail to %s." % (FEEDBACKMAIL)
            msg += " Message from the scheduler: %s" % (str(exp))
            self.logger.exception("%s: %s" % (self.workflow, msg))
            raise TaskWorkerException(msg)

        try:
            hostname = socket.getfqdn()
        except:
            hostname = ''

        const = 'CRAB_ReqName =?= %s && TaskType=?="Job"' % HTCondorUtils.quote(self.workflow)
        try:
            for ad in list(self.schedd.xquery(const, ['CRAB_Id', 'CRAB_Retry'])):
                if ('CRAB_Id' not in ad) or ('CRAB_Retry' not in ad):
                    continue
                jobid = str(ad.eval('CRAB_Id'))
                jobretry = str(ad.eval('CRAB_Retry'))
                jinfo = {'broker': hostname,
                         'bossId': jobid,
                         'StatusValue': 'killed',
                        }
                insertJobIdSid(jinfo, jobid, self.workflow, jobretry)
                self.logger.info("Sending kill info to Dashboard: %s" % str(jinfo))
                apmon.sendToML(jinfo)
        except:
            self.logger.exception("Failed to notify Dashboard of job kills") #warning

        # Note that we can not send kills for jobs not in queue at this time; we'll need the
        # DAG FINAL node to be fixed and the node status to include retry number.
        return self.killAll(const)
开发者ID:belforte,项目名称:CRABServer,代码行数:59,代码来源:DagmanKiller.py

示例5: killJobs

 def killJobs(self, ids):
     ad = classad.ClassAd()
     ad['foo'] = ids
     const = "CRAB_ReqName =?= %s && member(CRAB_Id, %s)" % (HTCondorUtils.quote(self.workflow), ad.lookup("foo").__repr__())
     with HTCondorUtils.AuthenticatedSubprocess(self.proxy) as (parent, rpipe):
         if not parent:
             self.schedd.act(htcondor.JobAction.Remove, const)
     results = rpipe.read()
     if results != "OK":
         raise TaskWorkerException("The CRAB3 server backend could not kill jobs [%s]. because the Grid scheduler answered with an error\n" % ", ".join(ids)+\
                                   "This is probably a temporary glitch, please try it again and contact an expert if the error persist\n"+\
                                   "Error reason %s" % results)
开发者ID:nizamyusli,项目名称:CRABServer,代码行数:12,代码来源:DagmanKiller.py

示例6: killAll

    def killAll(self):

        # Search for and hold the DAG
        rootConst = "TaskType =?= \"ROOT\" && CRAB_ReqName =?= %s" % HTCondorUtils.quote(self.workflow)

        with HTCondorUtils.AuthenticatedSubprocess(self.proxy) as (parent, rpipe):
            if not parent:
                self.schedd.act(htcondor.JobAction.Hold, rootConst)
        results = rpipe.read()
        if results != "OK":
            raise TaskWorkerException("The CRAB3 server backend could not kill the task because the Grid scheduler answered with an error\n"\
                                      "This is probably a temporary glitch, please try it again and contact an expert if the error persist\n"+\
                                      "Error reason %s" % results)
开发者ID:khurtado,项目名称:CRABServer,代码行数:13,代码来源:DagmanKiller.py

示例7: duplicateCheck

    def duplicateCheck(self, task):
        """
        Look to see if the task we are about to submit is already in the schedd.
        If so, assume that this task in TaskWorker was run successfully, but killed
        before it could update the frontend.
        """
        workflow = task["tm_taskname"]

        if task["tm_collector"]:
            self.backendurls["htcondorPool"] = task["tm_collector"]
        loc = HTCondorLocator.HTCondorLocator(self.backendurls)

        schedd = ""
        try:
            self.logger.debug("Duplicate check is getting the schedd obj. Collector is: %s", task["tm_collector"])
            schedd, dummyAddress = loc.getScheddObjNew(task["tm_schedd"])
            self.logger.debug("Got schedd obj for %s ", task["tm_schedd"])
        except Exception as exp:
            msg = "The CRAB server backend was not able to contact the Grid scheduler."
            msg += " Please try again later."
            msg += " If the error persists send an e-mail to %s." % (FEEDBACKMAIL)
            msg += " Message from the scheduler: %s" % (str(exp))
            self.logger.exception("%s: %s", workflow, msg)
            raise TaskWorkerException(msg)

        rootConst = (
            'TaskType =?= "ROOT" && CRAB_ReqName =?= %s && (isUndefined(CRAB_Attempt) || CRAB_Attempt == 0)'
            % HTCondorUtils.quote(workflow)
        )

        self.logger.debug("Duplicate check is querying the schedd: %s", rootConst)
        results = list(schedd.xquery(rootConst, []))
        self.logger.debug("Schedd queried %s", results)

        if not results:
            # Task not already in schedd
            return None

        configreq = {"workflow": workflow, "status": "SUBMITTED", "subresource": "success"}
        self.logger.warning(
            "Task %s already submitted to HTCondor; pushing information centrally: %s", workflow, str(configreq)
        )
        data = urllib.urlencode(configreq)
        self.server.post(self.resturi, data=data)

        # Note that we don't re-send Dashboard jobs; we assume this is a rare occurrance and
        # don't want to upset any info already in the Dashboard.

        return Result.Result(task=task, result=(-1))
开发者ID:AndresTanasijczuk,项目名称:CRABServer,代码行数:49,代码来源:DagmanSubmitter.py

示例8: killJobs

 def killJobs(self, ids):
     ad = classad.ClassAd()
     ad['foo'] = ids
     const = "CRAB_ReqName =?= %s && member(CRAB_Id, %s)" % (HTCondorUtils.quote(self.workflow), ad.lookup("foo").__repr__())
     with HTCondorUtils.AuthenticatedSubprocess(self.proxy) as (parent, rpipe):
         if not parent:
             self.schedd.act(htcondor.JobAction.Remove, const)
     results = rpipe.read()
     if results != "OK":
         msg  = "The CRAB server backend was not able to kill these jobs %s," % (ids)
         msg += " because the Grid scheduler answered with an error."
         msg += " This is probably a temporary glitch. Please try again later."
         msg += " If the error persists send an e-mail to %s." % (FEEDBACKMAIL)
         msg += " Error reason: %s" % (results)
         raise TaskWorkerException(msg)
开发者ID:dciangot,项目名称:CRABServer,代码行数:15,代码来源:DagmanKiller.py

示例9: getScheddObjNew

 def getScheddObjNew(self, schedd):
     """
     Return a tuple (schedd, address) containing an object representing the
     remote schedd and its corresponding address.
     """
     htcondor.param['COLLECTOR_HOST'] = self.getCollector().encode('ascii', 'ignore')
     coll = htcondor.Collector()
     schedds = coll.query(htcondor.AdTypes.Schedd, 'regexp(%s, Name)' % HTCondorUtils.quote(schedd.encode('ascii', 'ignore')))
     self.scheddAd = ""
     if not schedds:
         self.scheddAd = self.getCachedCollectorOutput(schedd)
     else:
         self.cacheCollectorOutput(schedd, schedds[0])
         self.scheddAd = self.getCachedCollectorOutput(schedd)
     address = self.scheddAd['MyAddress']
     scheddObj = htcondor.Schedd(self.scheddAd)
     return scheddObj, address
开发者ID:jmarra13,项目名称:CRABServer,代码行数:17,代码来源:HTCondorLocator.py

示例10: getScheddObjNew

 def getScheddObjNew(self, schedd):
     """
     Return a tuple (schedd, address) containing an object representing the
     remote schedd and its corresponding address.
     """
     htcondor.param['COLLECTOR_HOST'] = self.getCollector().encode('ascii', 'ignore')
     coll = htcondor.Collector()
     schedds = coll.query(htcondor.AdTypes.Schedd, 'Name=?=%s' % HTCondorUtils.quote(schedd.encode('ascii', 'ignore')),
                          ["AddressV1", "CondorPlatform", "CondorVersion", "Machine", "MyAddress", "Name", "MyType", "ScheddIpAddr", "RemoteCondorSetup"])
     self.scheddAd = ""
     if not schedds:
         self.scheddAd = self.getCachedCollectorOutput(schedd)
     else:
         self.cacheCollectorOutput(schedd, schedds[0])
         self.scheddAd = self.getCachedCollectorOutput(schedd)
     address = self.scheddAd['MyAddress']
     scheddObj = htcondor.Schedd(self.scheddAd)
     return scheddObj, address
开发者ID:mmascher,项目名称:CRABServer,代码行数:18,代码来源:HTCondorLocator.py

示例11: bootstrap

def bootstrap():
    print("Entering TaskManagerBootstrap with args: %s" % sys.argv)
    command = sys.argv[1]
    if command == "POSTJOB":
        return PostJob.PostJob().execute(*sys.argv[2:])
    elif command == "PREJOB":
        return PreJob.PreJob().execute(*sys.argv[2:])
    elif command == "PREDAG":
        return PreDAG.PreDAG().execute(*sys.argv[2:])

    infile, outfile = sys.argv[2:]

    adfile = os.environ["_CONDOR_JOB_AD"]
    print("Parsing classad")
    with open(adfile, "r") as fd:
        ad = classad.parseOld(fd)
    print("..done")
    in_args = []
    if infile != "None":
        with open(infile, "r") as fd:
            in_args = pickle.load(fd)

    config = Configuration.Configuration()
    config.section_("Services")
    config.Services.DBSUrl = 'https://cmsweb.cern.ch/dbs/prod/phys03/DBSWriter/'
    
    ad['tm_taskname'] = ad.eval("CRAB_Workflow")
    ad['tm_split_algo'] = ad.eval("CRAB_SplitAlgo")
    ad['tm_dbs_url'] = ad.eval("CRAB_DBSURL")
    ad['tm_input_dataset'] = ad.eval("DESIRED_CMSDataset")
    ad['tm_outfiles'] = HTCondorUtils.unquote(ad.eval("CRAB_AdditionalOutputFiles"))
    ad['tm_tfile_outfiles'] = HTCondorUtils.unquote(ad.eval("CRAB_TFileOutputFiles"))
    ad['tm_edm_outfiles'] = HTCondorUtils.unquote(ad.eval("CRAB_EDMOutputFiles"))
    ad['tm_site_whitelist'] = HTCondorUtils.unquote(ad.eval("CRAB_SiteWhitelist"))
    ad['tm_site_blacklist'] = HTCondorUtils.unquote(ad.eval("CRAB_SiteBlacklist"))
    ad['tm_job_type'] = 'Analysis'
    print("TaskManager got this raw ad")
    print(ad)
    pure_ad = {}
    for key in ad:
        try:
            pure_ad[key] = ad.eval(key)
            if isinstance(pure_ad[key], classad.Value):
                del pure_ad[key]
            if isinstance(pure_ad[key], list):
                pure_ad[key] = [i.eval() for i in pure_ad[key]]
        except:
            pass
    ad = pure_ad
    ad['CRAB_AlgoArgs'] = json.loads(ad["CRAB_AlgoArgs"])
    ad['tm_split_args'] = ad["CRAB_AlgoArgs"]
    ad['tarball_location'] = os.environ.get('CRAB_TARBALL_LOCATION', '')
    print("TaskManagerBootstrap got this ad:")
    pprint.pprint(ad)

    results = task.execute(in_args, task=ad).result

    print(results)
    with open(outfile, "w") as fd:
        pickle.dump(results, fd)

    return 0
开发者ID:belforte,项目名称:CRABServer,代码行数:62,代码来源:TaskManagerBootstrap.py

示例12: str

        workflow = task['tm_taskname']

        if task['tm_collector']:
            self.backendurls['htcondorPool'] = task['tm_collector']
        loc = HTCondorLocator.HTCondorLocator(self.backendurls)

        address = ""
        schedd = ""
        try:
            schedd, address = loc.getScheddObjNew(task['tm_schedd'])
        except Exception, exp:
            msg = ("%s: The CRAB3 server backend is not able to contact Grid scheduler. Please, retry later. Message from the scheduler: %s") % (workflow, str(exp))
            self.logger.exception(msg)
            raise TaskWorkerException(msg)

        rootConst = 'TaskType =?= "ROOT" && CRAB_ReqName =?= %s && (isUndefined(CRAB_Attempt) || CRAB_Attempt == 0)' % HTCondorUtils.quote(workflow)

        results = list(schedd.xquery(rootConst, []))

        if not results:
            # Task not already in schedd
            return None

        configreq = {'workflow': workflow,
                     'status': "SUBMITTED",
                     'jobset': "-1",
                     'subresource': 'success',
                    }
        self.logger.warning("Task %s already submitted to HTCondor; pushing information centrally: %s" % (workflow, str(configreq)))
        data = urllib.urlencode(configreq)
        self.server.post(self.resturi, data = data)
开发者ID:mialiu149,项目名称:CRABServer,代码行数:31,代码来源:DagmanSubmitter.py

示例13: executeInternal

    def executeInternal(self, apmon, *args, **kw):
        #Marco: I guess these value errors only happens for development instances
        if 'task' not in kw:
            raise ValueError("No task specified.")
        self.task = kw['task']
        if 'tm_taskname' not in self.task:
            raise ValueError("No taskname specified")
        self.workflow = self.task['tm_taskname']
        if 'user_proxy' not in self.task:
            raise ValueError("No proxy provided")
        self.proxy = self.task['user_proxy']

        try:
            self.killTransfers(apmon)
        except:
            self.logger.exception("Failed to kill transfers; suppressing error until functionality is confirmed") #TODO send a warning?

        self.logger.info("About to kill workflow: %s." % self.workflow)

        self.workflow = str(self.workflow)
        if not WORKFLOW_RE.match(self.workflow):
            raise Exception("Invalid workflow name.")

        # Query HTCondor for information about running jobs and update Dashboard appropriately
        if self.task['tm_collector']:
            self.backendurls['htcondorPool'] = self.task['tm_collector']
        loc = HTCondorLocator.HTCondorLocator(self.backendurls)

        address = ""
        try:
            self.schedd, address = loc.getScheddObjNew(self.task['tm_schedd'])
        except Exception as exp:
            msg = ("%s: The CRAB3 server backend is not able to contact Grid scheduler. Please, retry later. Message from the scheduler: %s") % (self.workflow, str(exp))
            self.logger.exception(msg)
            raise TaskWorkerException(msg)

        ad = classad.ClassAd()
        ad['foo'] = self.task['kill_ids']
        try:
            hostname = socket.getfqdn()
        except:
            hostname = ''

        const = "CRAB_ReqName =?= %s && member(CRAB_Id, %s)" % (HTCondorUtils.quote(self.workflow), ad.lookup("foo").__repr__())
        try:
            for ad in list(self.schedd.xquery(const, ['CRAB_Id', 'CRAB_Retry'])):
                if ('CRAB_Id' not in ad) or ('CRAB_Retry' not in ad):
                    continue
                jobid = str(ad.eval('CRAB_Id'))
                jobretry = str(ad.eval('CRAB_Retry'))
                jinfo = {'jobId': ("%s_https://glidein.cern.ch/%s/%s_%s" % (jobid, jobid, self.workflow.replace("_", ":"), jobretry)),
                         'sid': "https://glidein.cern.ch/%s%s" % (jobid, self.workflow.replace("_", ":")),
                         'broker': hostname,
                         'bossId': jobid,
                         'StatusValue' : 'killed',
                        }
                self.logger.info("Sending kill info to Dashboard: %s" % str(jinfo))
                apmon.sendToML(jinfo)
        except:
            self.logger.exception("Failed to notify Dashboard of job kills") #warning

        # Note that we can not send kills for jobs not in queue at this time; we'll need the
        # DAG FINAL node to be fixed and the node status to include retry number.

        if self.task['kill_all']:
            return self.killAll()
        else:
            return self.killJobs(self.task['kill_ids'])
开发者ID:nizamyusli,项目名称:CRABServer,代码行数:68,代码来源:DagmanKiller.py

示例14: parseJobLog

def parseJobLog(fp, nodes, node_map):
    count = 0
    for event in HTCondorUtils.readEvents(fp):
        count += 1
        eventtime = time.mktime(time.strptime(event["EventTime"], "%Y-%m-%dT%H:%M:%S"))
        if event["MyType"] == "SubmitEvent":
            m = node_name_re.match(event["LogNotes"])
            if m:
                node = m.groups()[0]
                proc = event["Cluster"], event["Proc"]
                info = nodes.setdefault(node, NODE_DEFAULTS)
                info["State"] = "idle"
                info["JobIds"].append("%d.%d" % proc)
                info["RecordedSite"] = False
                info["SubmitTimes"].append(eventtime)
                info["TotalUserCpuTimeHistory"].append(0)
                info["TotalSysCpuTimeHistory"].append(0)
                info["WallDurations"].append(0)
                info["ResidentSetSize"].append(0)
                info["Retries"] = len(info["SubmitTimes"]) - 1
                node_map[proc] = node
        elif event["MyType"] == "ExecuteEvent":
            node = node_map[event["Cluster"], event["Proc"]]
            nodes[node]["StartTimes"].append(eventtime)
            nodes[node]["State"] = "running"
            nodes[node]["RecordedSite"] = False
        elif event["MyType"] == "JobTerminatedEvent":
            node = node_map[event["Cluster"], event["Proc"]]
            nodes[node]["EndTimes"].append(eventtime)
            nodes[node]["WallDurations"][-1] = nodes[node]["EndTimes"][-1] - nodes[node]["StartTimes"][-1]
            insertCpu(event, nodes[node])
            if event["TerminatedNormally"]:
                if event["ReturnValue"] == 0:
                    nodes[node]["State"] = "transferring"
                else:
                    nodes[node]["State"] = "cooloff"
            else:
                nodes[node]["State"] = "cooloff"
        elif event["MyType"] == "PostScriptTerminatedEvent":
            m = node_name2_re.match(event["DAGNodeName"])
            if m:
                node = m.groups()[0]
                if event["TerminatedNormally"]:
                    if event["ReturnValue"] == 0:
                        nodes[node]["State"] = "finished"
                    elif event["ReturnValue"] == 2:
                        nodes[node]["State"] = "failed"
                    else:
                        nodes[node]["State"] = "cooloff"
                else:
                    nodes[node]["State"] = "cooloff"
        elif (
            event["MyType"] == "ShadowExceptionEvent"
            or event["MyType"] == "JobReconnectFailedEvent"
            or event["MyType"] == "JobEvictedEvent"
        ):
            node = node_map[event["Cluster"], event["Proc"]]
            if nodes[node]["State"] != "idle":
                nodes[node]["EndTimes"].append(eventtime)
                if nodes[node]["WallDurations"] and nodes[node]["EndTimes"] and nodes[node]["StartTimes"]:
                    nodes[node]["WallDurations"][-1] = nodes[node]["EndTimes"][-1] - nodes[node]["StartTimes"][-1]
                nodes[node]["State"] = "idle"
                insertCpu(event, nodes[node])
                nodes[node]["TotalUserCpuTimeHistory"].append(0)
                nodes[node]["TotalSysCpuTimeHistory"].append(0)
                nodes[node]["WallDurations"].append(0)
                nodes[node]["ResidentSetSize"].append(0)
                nodes[node]["SubmitTimes"].append(-1)
                nodes[node]["JobIds"].append(nodes[node]["JobIds"][-1])
                nodes[node]["Restarts"] += 1
        elif event["MyType"] == "JobAbortedEvent":
            node = node_map[event["Cluster"], event["Proc"]]
            if nodes[node]["State"] == "idle" or nodes[node]["State"] == "held":
                nodes[node]["StartTimes"].append(-1)
                if not nodes[node]["RecordedSite"]:
                    nodes[node]["SiteHistory"].append("Unknown")
            nodes[node]["State"] = "killed"
            insertCpu(event, nodes[node])
        elif event["MyType"] == "JobHeldEvent":
            node = node_map[event["Cluster"], event["Proc"]]
            if nodes[node]["State"] == "running":
                nodes[node]["EndTimes"].append(eventtime)
                if nodes[node]["WallDurations"] and nodes[node]["EndTimes"] and nodes[node]["StartTimes"]:
                    nodes[node]["WallDurations"][-1] = nodes[node]["EndTimes"][-1] - nodes[node]["StartTimes"][-1]
                insertCpu(event, nodes[node])
                nodes[node]["TotalUserCpuTimeHistory"].append(0)
                nodes[node]["TotalSysCpuTimeHistory"].append(0)
                nodes[node]["WallDurations"].append(0)
                nodes[node]["ResidentSetSize"].append(0)
                nodes[node]["SubmitTimes"].append(-1)
                nodes[node]["JobIds"].append(nodes[node]["JobIds"][-1])
                nodes[node]["Restarts"] += 1
            nodes[node]["State"] = "held"
        elif event["MyType"] == "JobReleaseEvent":
            node = node_map[event["Cluster"], event["Proc"]]
            nodes[node]["State"] = "idle"
        elif event["MyType"] == "JobAdInformationEvent":
            node = node_map[event["Cluster"], event["Proc"]]
            if (
                (not nodes[node]["RecordedSite"])
#.........这里部分代码省略.........
开发者ID:emaszs,项目名称:CRABServer,代码行数:101,代码来源:cache_status.py

示例15: parseJobLog

def parseJobLog(fp, nodes, nodeMap):
    count = 0
    for event in HTCondorUtils.readEvents(fp):
        count += 1
        eventtime = time.mktime(time.strptime(event['EventTime'], "%Y-%m-%dT%H:%M:%S"))
        if event['MyType'] == 'SubmitEvent':
            m = nodeNameRe.match(event['LogNotes'])
            if m:
                node = m.groups()[0]
                proc = event['Cluster'], event['Proc']
                info = nodes.setdefault(node, copy.deepcopy(NODE_DEFAULTS))
                info['State'] = 'idle'
                info['JobIds'].append("%d.%d" % proc)
                info['RecordedSite'] = False
                info['SubmitTimes'].append(eventtime)
                info['TotalUserCpuTimeHistory'].append(0)
                info['TotalSysCpuTimeHistory'].append(0)
                info['WallDurations'].append(0)
                info['ResidentSetSize'].append(0)
                info['Retries'] = len(info['SubmitTimes'])-1
                nodeMap[proc] = node
        elif event['MyType'] == 'ExecuteEvent':
            node = nodeMap[event['Cluster'], event['Proc']]
            nodes[node]['StartTimes'].append(eventtime)
            nodes[node]['State'] = 'running'
            nodes[node]['RecordedSite'] = False
        elif event['MyType'] == 'JobTerminatedEvent':
            node = nodeMap[event['Cluster'], event['Proc']]
            nodes[node]['EndTimes'].append(eventtime)
            # at times HTCondor does not log the ExecuteEvent and there's no StartTime
            if nodes[node]['StartTimes'] :
                nodes[node]['WallDurations'][-1] = nodes[node]['EndTimes'][-1] - nodes[node]['StartTimes'][-1]
            else:
                 nodes[node]['WallDurations'][-1] = 0
            insertCpu(event, nodes[node])
            if event['TerminatedNormally']:
                if event['ReturnValue'] == 0:
                    nodes[node]['State'] = 'transferring'
                else:
                    nodes[node]['State'] = 'cooloff'
            else:
                nodes[node]['State'] = 'cooloff'
        elif event['MyType'] == 'PostScriptTerminatedEvent':
            m = nodeName2Re.match(event['DAGNodeName'])
            if m:
                node = m.groups()[0]
                if event['TerminatedNormally']:
                    if event['ReturnValue'] == 0:
                        nodes[node]['State'] = 'finished'
                    elif event['ReturnValue'] == 2:
                        nodes[node]['State'] = 'failed'
                    else:
                        nodes[node]['State'] = 'cooloff'
                else:
                    nodes[node]['State'] = 'cooloff'
        elif event['MyType'] == 'ShadowExceptionEvent' or event["MyType"] == "JobReconnectFailedEvent" or event['MyType'] == 'JobEvictedEvent':
            node = nodeMap[event['Cluster'], event['Proc']]
            if nodes[node]['State'] != 'idle':
                nodes[node]['EndTimes'].append(eventtime)
                if nodes[node]['WallDurations'] and nodes[node]['EndTimes'] and nodes[node]['StartTimes']:
                    nodes[node]['WallDurations'][-1] = nodes[node]['EndTimes'][-1] - nodes[node]['StartTimes'][-1]
                nodes[node]['State'] = 'idle'
                insertCpu(event, nodes[node])
                nodes[node]['TotalUserCpuTimeHistory'].append(0)
                nodes[node]['TotalSysCpuTimeHistory'].append(0)
                nodes[node]['WallDurations'].append(0)
                nodes[node]['ResidentSetSize'].append(0)
                nodes[node]['SubmitTimes'].append(-1)
                nodes[node]['JobIds'].append(nodes[node]['JobIds'][-1])
                nodes[node]['Restarts'] += 1
        elif event['MyType'] == 'JobAbortedEvent':
            node = nodeMap[event['Cluster'], event['Proc']]
            if nodes[node]['State'] == "idle" or nodes[node]['State'] == "held":
                nodes[node]['StartTimes'].append(-1)
                if not nodes[node]['RecordedSite']:
                    nodes[node]['SiteHistory'].append("Unknown")
            nodes[node]['State'] = 'killed'
            insertCpu(event, nodes[node])
        elif event['MyType'] == 'JobHeldEvent':
            node = nodeMap[event['Cluster'], event['Proc']]
            if nodes[node]['State'] == 'running':
                nodes[node]['EndTimes'].append(eventtime)
                if nodes[node]['WallDurations'] and nodes[node]['EndTimes'] and nodes[node]['StartTimes']:
                    nodes[node]['WallDurations'][-1] = nodes[node]['EndTimes'][-1] - nodes[node]['StartTimes'][-1]
                insertCpu(event, nodes[node])
                nodes[node]['TotalUserCpuTimeHistory'].append(0)
                nodes[node]['TotalSysCpuTimeHistory'].append(0)
                nodes[node]['WallDurations'].append(0)
                nodes[node]['ResidentSetSize'].append(0)
                nodes[node]['SubmitTimes'].append(-1)
                nodes[node]['JobIds'].append(nodes[node]['JobIds'][-1])
                nodes[node]['Restarts'] += 1
            nodes[node]['State'] = 'held'
        elif event['MyType'] == 'JobReleaseEvent':
            node = nodeMap[event['Cluster'], event['Proc']]
            nodes[node]['State'] = 'idle'
        elif event['MyType'] == 'JobAdInformationEvent':
            node = nodeMap[event['Cluster'], event['Proc']]
            if (not nodes[node]['RecordedSite']) and ('JOBGLIDEIN_CMSSite' in event) and not event['JOBGLIDEIN_CMSSite'].startswith("$$"):
                nodes[node]['SiteHistory'].append(event['JOBGLIDEIN_CMSSite'])
#.........这里部分代码省略.........
开发者ID:belforte,项目名称:CRABServer,代码行数:101,代码来源:cache_status.py


注:本文中的HTCondorUtils类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。