Skip to content

Commit

Permalink
Merge pull request #141 from PanDAWMS/next
Browse files Browse the repository at this point in the history
3.8.1.66
  • Loading branch information
PalNilsson authored Sep 9, 2024
2 parents 568c7fc + e1e6571 commit 4a45bc2
Show file tree
Hide file tree
Showing 85 changed files with 4,497 additions and 3,322 deletions.
2 changes: 1 addition & 1 deletion PILOTVERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.7.9.1
3.8.1.66
3 changes: 1 addition & 2 deletions doc/components/info/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
http://www.apache.org/licenses/LICENSE-2.0

Authors:
- Paul Nilsson, [email protected], 2018
- Paul Nilsson, [email protected], 2018-24

info components
===============
Expand All @@ -23,6 +23,5 @@ info components
infoservice
jobdata
jobinfo
jobinfoservice
queuedata
storagedata
19 changes: 0 additions & 19 deletions doc/components/info/jobinfoservice.rst

This file was deleted.

3 changes: 1 addition & 2 deletions doc/components/resource/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
http://www.apache.org/licenses/LICENSE-2.0

Authors:
- Paul Nilsson, [email protected], 2018-2019
- Paul Nilsson, [email protected], 2018-24

resource components
===================
Expand All @@ -19,5 +19,4 @@ resource components
bnl
generic
nersc
summit
titan
19 changes: 0 additions & 19 deletions doc/components/resource/summit.rst

This file was deleted.

54 changes: 41 additions & 13 deletions pilot.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
# under the License.
#
# Authors:
# - Mario Lassnig, [email protected], 2016-2017
# - Mario Lassnig, [email protected], 2016-17
# - Daniel Drizhuk, [email protected], 2017
# - Paul Nilsson, [email protected], 2017-2024
# - Paul Nilsson, [email protected], 2017-24

"""This is the entry point for the PanDA Pilot, executed with 'python3 pilot.py <args>'."""

Expand All @@ -39,29 +39,30 @@
from pilot.common.exception import PilotException
from pilot.info import infosys
from pilot.util.auxiliary import (
convert_signal_to_exit_code,
pilot_version_banner,
shell_exit_code,
convert_signal_to_exit_code
)
from pilot.util.config import config
from pilot.util.constants import (
get_pilot_version,
SUCCESS,
FAILURE,
ERRNO_NOJOBS,
PILOT_START_TIME,
FAILURE,
PILOT_END_TIME,
SERVER_UPDATE_NOT_DONE,
PILOT_MULTIJOB_START_TIME,
PILOT_START_TIME,
SERVER_UPDATE_NOT_DONE,
SUCCESS,
)
from pilot.util.cvmfs import (
cvmfs_diagnostics,
get_last_update,
is_cvmfs_available,
get_last_update
)
from pilot.util.filehandling import (
get_pilot_work_dir,
mkdirs,
store_base_urls
)
from pilot.util.harvester import (
is_harvester_mode,
Expand All @@ -72,6 +73,7 @@
get_panda_server,
https_setup,
send_update,
update_local_oidc_token_info
)
from pilot.util.loggingsupport import establish_logging
from pilot.util.networking import dump_ipv6_info
Expand Down Expand Up @@ -116,8 +118,11 @@ def main() -> int:
https_setup(args, get_pilot_version())
args.amq = None

# update the OIDC token if necessary
update_local_oidc_token_info(args.url, args.port)

# let the server know that the worker has started
if args.update_server:
if args.update_server and args.workerpilotstatusupdate:
send_worker_status(
"started", args.queue, args.url, args.port, logger, "IPv6"
) # note: assuming IPv6, fallback in place
Expand Down Expand Up @@ -160,6 +165,9 @@ def main() -> int:
)
logger.debug(f'PILOT_RUCIO_SITENAME={os.environ.get("PILOT_RUCIO_SITENAME")}')

#os.environ['RUCIO_ACCOUNT'] = 'atlpilo1'
#logger.warning(f"enforcing RUCIO_ACCOUNT={os.environ.get('RUCIO_ACCOUNT')}")

# store the site name as set with a pilot option
environ[
"PILOT_SITENAME"
Expand All @@ -171,6 +179,8 @@ def main() -> int:
f"pilot.workflow.{args.workflow}", globals(), locals(), [args.workflow], 0
)

# check if real-time logging is requested for this queue
#rtloggingtype
# update the pilot heartbeat file
update_pilot_heartbeat(time.time())

Expand All @@ -182,7 +192,7 @@ def main() -> int:
exitcode = None

# let the server know that the worker has finished
if args.update_server:
if args.update_server and args.workerpilotstatusupdate:
send_worker_status(
"finished",
args.queue,
Expand Down Expand Up @@ -357,15 +367,20 @@ def get_args() -> Any:
required=False, # From v 2.2.1 the site name is internally set
help="OBSOLETE: site name (e.g., AGLT2_TEST)",
)

# graciously stop pilot process after hard limit
arg_parser.add_argument(
"-j",
"--joblabel",
dest="job_label",
default="ptest",
help="Job prod/source label (default: ptest)",
)
arg_parser.add_argument(
"-g",
"--baseurls",
dest="baseurls",
default="",
help="Comma separated list of base URLs for validation of trf download",
)

# pilot version tag; PR or RC
arg_parser.add_argument(
Expand All @@ -385,6 +400,15 @@ def get_args() -> Any:
help="Disable server updates",
)

arg_parser.add_argument(
"-k",
"--noworkerpilotstatusupdate",
dest="workerpilotstatusupdate",
action="store_false",
default=True,
help="Disable updates to updateWorkerPilotStatus",
)

arg_parser.add_argument(
"-t",
"--noproxyverification",
Expand Down Expand Up @@ -842,7 +866,7 @@ def send_worker_status(
port: str,
logger: Any,
internet_protocol_version: str,
) -> None:
):
"""
Send worker info to the server to let it know that the worker has started.
Expand Down Expand Up @@ -956,6 +980,10 @@ def list_zombies():
# set environment variables (to be replaced with singleton implementation)
set_environment_variables()

# store base URLs in a file if set
if args.baseurls:
store_base_urls(args.baseurls)

# execute main function
trace = main()

Expand Down
14 changes: 11 additions & 3 deletions pilot/api/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,15 @@ class StagingClient:
# list of allowed schemas to be used for transfers from REMOTE sites
remoteinput_allowed_schemas = ['root', 'gsiftp', 'dcap', 'srm', 'storm', 'https']

def __init__(self, infosys_instance: Any = None, acopytools: dict = None, logger: Any = None,
default_copytools: str = 'rucio', trace_report: dict = None, ipv: str = 'IPv6', workdir: str = ""):
def __init__(self,
infosys_instance: Any = None,
acopytools: dict = None,
logger: Any = None,
default_copytools: str = 'rucio',
trace_report: dict = None,
ipv: str = 'IPv6',
workdir: str = "",
altstageout: str = None):
"""
Set default/init values.
Expand All @@ -106,6 +113,7 @@ def __init__(self, infosys_instance: Any = None, acopytools: dict = None, logger
self.infosys = infosys_instance or infosys
self.ipv = ipv
self.workdir = workdir
self.altstageout = altstageout

if isinstance(acopytools, str):
acopytools = {'default': [acopytools]} if acopytools else {}
Expand Down Expand Up @@ -221,7 +229,7 @@ def print_replicas(self, replicas: list, label: str = 'unsorted'):
"""
number = 1
maxnumber = 10
self.logger.info(f'{label} list of replicas: (max {maxnumber})')
self.logger.debug(f'{label} list of replicas: (max {maxnumber})')
for pfn, xdat in replicas:
self.logger.debug(f"{number}. "
f"lfn={pfn}, "
Expand Down
6 changes: 6 additions & 0 deletions pilot/common/errorcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ class ErrorCodes:
LOGCREATIONTIMEOUT = 1376
CVMFSISNOTALIVE = 1377
LSETUPTIMEDOUT = 1378
PREEMPTION = 1379
ARCPROXYFAILURE = 1380
ARCPROXYLIBFAILURE = 1381

_error_messages = {
GENERALERROR: "General pilot error, consult batch log",
Expand Down Expand Up @@ -320,6 +323,9 @@ class ErrorCodes:
LOGCREATIONTIMEOUT: "Log file creation timed out",
CVMFSISNOTALIVE: "CVMFS is not responding",
LSETUPTIMEDOUT: "Lsetup command timed out during remote file open",
PREEMPTION: "Job was preempted",
ARCPROXYFAILURE: "General arcproxy failure",
ARCPROXYLIBFAILURE: "Arcproxy failure while loading shared libraries",
}

put_error_codes = [1135, 1136, 1137, 1141, 1152, 1181]
Expand Down
Loading

0 comments on commit 4a45bc2

Please sign in to comment.