From a9ca5e10e3414b460ee776f85259650904a99eeb Mon Sep 17 00:00:00 2001 From: Noah Biederbeck Date: Fri, 21 Apr 2023 15:06:23 +0200 Subject: [PATCH 1/6] Fixes #29. --- lapalma.py | 35 +++++++++++++++++++++++++++++++++++ lapalma.sh | 18 ------------------ 2 files changed, 35 insertions(+), 18 deletions(-) create mode 100755 lapalma.py delete mode 100755 lapalma.sh diff --git a/lapalma.py b/lapalma.py new file mode 100755 index 0000000..1d1613b --- /dev/null +++ b/lapalma.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 +from argparse import ArgumentParser +from subprocess import run + +parser = ArgumentParser( + description="Copy data files from the cluster to locally create plots.", +) +parser.add_argument( + "--hostname", + help="The hostname as configured in you ~/.ssh/config, e.g. `cp01`.", + required=True, +) +parser.add_argument( + "--remote-path", + help="Path to your directory on the cluster, " + "e.g. `/fefs/aswg/workspace//lst-agn-analysis/`. " + "Can be absolute (start with `/`) otherwise it is relative to the home.", + required=True, +) +args = parser.parse_args() + +filelist = [ + "build/dl1-datachecks-masked.h5", +] +files = "{" + ",".join(filelist) + "}" + + +def main(): + rsync = "rsync -auh --info=progress2 --exclude-from=.gitignore" + cmd = f"{rsync} '{args.hostname}:{args.remote_path}/{files}' ." + run(cmd, shell=True, capture_output=True, check=True) + + +if __name__ == "__main__": + main() diff --git a/lapalma.sh b/lapalma.sh deleted file mode 100755 index 3e355bb..0000000 --- a/lapalma.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -RSYNC="rsync -auh --info=progress2 --exclude-from=.gitignore " -LOCAL=. -REMOTE=mrk421 -HOST=cp01 - -case "$1" in -pull) - $RSYNC $HOST:$REMOTE/ $LOCAL - ;; -push) - $RSYNC $LOCAL/ $HOST:$REMOTE - ;; -*) - echo "'push' or 'pull'?" - exit 1 - ;; -esac From 9d46e5fa0068a85db1cd45db6711ef37e3990083 Mon Sep 17 00:00:00 2001 From: Noah Biederbeck Date: Fri, 21 Apr 2023 15:11:39 +0200 Subject: [PATCH 2/6] Add the copy-helper-script section to README --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 4020922..68f61fc 100644 --- a/README.md +++ b/README.md @@ -80,15 +80,14 @@ It is related to https://github.com/nbiederbeck/lst-agn-analysis/issues/26 If you have run snakemake on the cluster, you can create the plots and tex files locally (using your own matplotlibrc for example). We separate the calculation of metrics and the plotting to make sure you can finetune plots later on without needing to run the expensive steps on the local machine. The tables for that are saved as either `fits.gz` or `h5`. - -For the data-selection plots, you need to download `build/dl1-datacheck-masked.h5`, e.g.: +We create a helper script for that that uses `rsync`. Check how to use it: ``` -mkdir -p build -scp :/lst-data-selection/build/dl1-datachecks-masked.h5 build/ +./lapalma.py --help ``` -Afterwards: +For the data-selection plots, you need to download `build/dl1-datacheck-masked.h5`, which is done via the above helper script. +Run it with your settings and afterwards: ``` make -f local.mk From 69fcb1c7ac1ba0e80baf6baf74985dcef72c2d93 Mon Sep 17 00:00:00 2001 From: Noah Biederbeck Date: Fri, 5 May 2023 12:24:16 +0200 Subject: [PATCH 3/6] Copy script copies everything in the build directory with exceptions --- README.md | 11 ----------- lapalma.py | 47 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 68f61fc..0c83aaa 100644 --- a/README.md +++ b/README.md @@ -85,14 +85,3 @@ We create a helper script for that that uses `rsync`. Check how to use it: ``` ./lapalma.py --help ``` - -For the data-selection plots, you need to download `build/dl1-datacheck-masked.h5`, which is done via the above helper script. -Run it with your settings and afterwards: - -``` -make -f local.mk -``` - -DEV-TODO: Do the same for the other plots (https://github.com/nbiederbeck/lst-agn-analysis/issues/29) -If you do some `cp **/*.fits.gz` shenanigans, beware that the dl3 files are saved with -the extension `fits.gz` as well. diff --git a/lapalma.py b/lapalma.py index 1d1613b..c73c4ab 100755 --- a/lapalma.py +++ b/lapalma.py @@ -3,7 +3,7 @@ from subprocess import run parser = ArgumentParser( - description="Copy data files from the cluster to locally create plots.", + description="Copy built data files from the cluster to locally create plots.", ) parser.add_argument( "--hostname", @@ -13,22 +13,51 @@ parser.add_argument( "--remote-path", help="Path to your directory on the cluster, " - "e.g. `/fefs/aswg/workspace//lst-agn-analysis/`. " - "Can be absolute (start with `/`) otherwise it is relative to the home.", + "e.g. `/fefs/aswg/workspace//lst-agn-analysis/build`. " + "Can be absolute (start with `/`) otherwise it is relative to the home. " + "Get the absoulte path of any directory with " + "`realpath ` on the cluster.", required=True, ) +parser.add_argument( + "--exclude", + help="Patterns to additionally exclude from copying. Comma separated string. " + "When you use globs (`*`), remember to quote it in single quotes, " + "e.g. `--exclude='*.pdf,*.png'`.", + default=None, + type=str, +) +parser.add_argument( + "--rsync-args", + help="Custom command line arguments for rsync. Check `man rsync` for info. " + "Try `--rsync-args='-nv'` for a verbose dry-run.", + default="", + type=str, +) args = parser.parse_args() -filelist = [ - "build/dl1-datachecks-masked.h5", +exclude_patterns = [ + "dl1_*.h5", + "dl2_*.h5", + "dl3_*.fits.gz", + "*.log", + "logs/*", + "models/model*", + "*.pdf", ] -files = "{" + ",".join(filelist) + "}" def main(): - rsync = "rsync -auh --info=progress2 --exclude-from=.gitignore" - cmd = f"{rsync} '{args.hostname}:{args.remote_path}/{files}' ." - run(cmd, shell=True, capture_output=True, check=True) + rsync = "rsync -auh --info=progress2 " + rsync += args.rsync_args + " " + for pat in exclude_patterns: + rsync += f"--exclude='{pat}' " + if args.exclude is not None: + for pat in args.exclude.split(","): + rsync += f"--exclude='{pat}' " + cmd = f"{rsync} '{args.hostname}:{args.remote_path}' ." + print(cmd) + run(cmd, shell=True, capture_output=False, check=True) if __name__ == "__main__": From 5e59615e7a1be85e968ff26239f73b236a69ce5b Mon Sep 17 00:00:00 2001 From: Noah Biederbeck Date: Fri, 5 May 2023 12:29:37 +0200 Subject: [PATCH 4/6] strip trailing slash, otherwise it copies not the folder but the contents --- lapalma.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lapalma.py b/lapalma.py index c73c4ab..a022c8a 100755 --- a/lapalma.py +++ b/lapalma.py @@ -55,7 +55,7 @@ def main(): if args.exclude is not None: for pat in args.exclude.split(","): rsync += f"--exclude='{pat}' " - cmd = f"{rsync} '{args.hostname}:{args.remote_path}' ." + cmd = f"{rsync} '{args.hostname}:{args.remote_path.rstrip('/')}' ." print(cmd) run(cmd, shell=True, capture_output=False, check=True) From 5eb169f78be2d64b1d376b637d64bcb1482c0b84 Mon Sep 17 00:00:00 2001 From: Noah Biederbeck Date: Fri, 5 May 2023 12:30:15 +0200 Subject: [PATCH 5/6] rename script --- README.md | 2 +- lapalma.py => copy-from-cluster.py | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename lapalma.py => copy-from-cluster.py (100%) diff --git a/README.md b/README.md index 0c83aaa..2fda238 100644 --- a/README.md +++ b/README.md @@ -83,5 +83,5 @@ run the expensive steps on the local machine. The tables for that are saved as e We create a helper script for that that uses `rsync`. Check how to use it: ``` -./lapalma.py --help +./copy-from-cluster.py --help ``` diff --git a/lapalma.py b/copy-from-cluster.py similarity index 100% rename from lapalma.py rename to copy-from-cluster.py From 9800dbd24b991861fea5fa8c45fb679cd65e5a53 Mon Sep 17 00:00:00 2001 From: Noah Biederbeck Date: Fri, 5 May 2023 14:13:12 +0200 Subject: [PATCH 6/6] also exclude dl4 --- copy-from-cluster.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/copy-from-cluster.py b/copy-from-cluster.py index a022c8a..8c3b789 100755 --- a/copy-from-cluster.py +++ b/copy-from-cluster.py @@ -37,12 +37,19 @@ args = parser.parse_args() exclude_patterns = [ + # run files "dl1_*.h5", "dl2_*.h5", "dl3_*.fits.gz", + # DL4 Datasets + "phaobs_*.fits", + "dl4/*/datasets.fits.gz", + # log files "*.log", "logs/*", + # models "models/model*", + # plots "*.pdf", ]