#author("2023-08-27T12:42:36+00:00","default:sysosa","sysosa")
#author("2025-10-24T16:58:43+00:00","default:sysosa","sysosa")
slurmでcryoSPARCを運用する場合.

&color(darkorange){cluster_info.json};と&color(darkorchid){cluster_script.sh};は下記コマンドで取得できます.

#code(nonumber){{
cryosparcm cluster example slurm
}}
cluster_info.json内にある「qstat_code_cmd_tpl」のコマンド行が微妙でエラーになります
どうもパイプ(|)がダメらしくて下記に変更します

***&color(darkorange){cluster_info.json}; [#lae79ece]
中身はこんな感じ
#code(nonumber){{
"qstat_code_cmd_tpl": "squeue -j {{ cluster_job_id } } --format=%T | sed -n 2p",

"qstat_code_cmd_tpl": "squeue --noheader -j {{ cluster_job_id } } --format=%T",
}}
&size(10){Pukiwikiの表記のため一部表記を変えてます};

#code(nonumber){{
{
    "name" : "node01",
    "worker_bin_path" : "/home/cryosparc/cryosparc_worker/bin/cryosparcw",
    "cache_path" : "/scratch/cryosparc",
    "send_cmd_tpl" : "{{ command } }",
    "name" : "slurmcluster",
    "worker_bin_path" : "/path/to/cryosparc_worker/bin/cryosparcw",
    "cache_path" : "/path/to/local/SSD/on/cluster/nodes",
    "send_cmd_tpl" : "ssh loginnode {{ command } }",
    "qsub_cmd_tpl" : "sbatch {{ script_path_abs } }",
    "qstat_cmd_tpl" : "squeue -j {{ cluster_job_id } }",
    "qstat_code_cmd_tpl": "squeue --noheader -j {{ cluster_job_id } } --format=%T",
    "qstat_code_cmd_tpl": "squeue -j {{ cluster_job_id } } --format=%T | sed -n 2p",
    "qdel_cmd_tpl" : "scancel {{ cluster_job_id } }",
    "qinfo_cmd_tpl" : "sinfo",
    "transfer_cmd_tpl" : "cp {{ src_path } } {{ dest_path } }"
    "qinfo_cmd_tpl" : "sinfo"
}
}}
&size(10){***Pukiwikiの表記のため一部表記を変えてます***};

ただ、、「qstat_code_cmd_tpl」のコマンド行が微妙でエラーになる場合がある. そのため
#code(nonumber){{
"qstat_code_cmd_tpl": "squeue -j {{ cluster_job_id } } --format=%T | sed -n 2p",

"qstat_code_cmd_tpl": "squeue --noheader -j {{ cluster_job_id } } --format=%T",
}}
とした方がいいかも.

あとcluster_script.shはデフォのままかな  &size(10){Pukiwikiの表記のため一部表記を変えてます};

cluster_info.json内にある「qstat_code_cmd_tpl」のコマンド行が微妙でエラーになります
どうもパイプ(|)がダメらしくて下記に変更します


***&color(darkorchid){cluster_script.sh}; [#uc13e0c9]
中身はこんな感じ

#code(nonumber){{
#!/usr/bin/env bash
#### cryoSPARC cluster submission script template for SLURM
## Available variables:
## {{ run_cmd } }            - the complete command string to run the job
## {{ num_cpu } }            - the number of CPUs needed
## {{ num_gpu } }            - the number of GPUs needed. 
##                            Note: the code will use this many GPUs starting from dev id 0
##                                  the cluster scheduler or this script have the responsibility
##                                  of setting CUDA_VISIBLE_DEVICES so that the job code ends up
##                                  using the correct cluster-allocated GPUs.
## {{ num_gpu } }            - the number of GPUs needed.
##                            Note: The code will use this many GPUs starting from dev id 0.
##                                  The cluster scheduler has the responsibility
##                                  of setting CUDA_VISIBLE_DEVICES or otherwise enuring that the
##                                  job uses the correct cluster-allocated GPUs.
## {{ ram_gb } }             - the amount of RAM needed in GB
## {{ job_dir_abs } }        - absolute path to the job directory
## {{ project_dir_abs } }    - absolute path to the project dir
## {{ job_log_path_abs } }   - absolute path to the log file for the job
## {{ worker_bin_path } }    - absolute path to the cryosparc worker command
## {{ run_args } }           - arguments to be passed to cryosparcw run
## {{ project_uid } }        - uid of the project
## {{ job_uid } }            - uid of the job
## {{ job_creator } }        - name of the user that created the job (may contain spaces)
## {{ cryosparc_username } } - cryosparc username of the user that created the job (usually an email)
##
## What follows is a simple SLURM script:

#SBATCH --job-name cryosparc_{{ project_uid } }_{{ job_uid } }
#SBATCH -n {{ num_cpu } }
#SBATCH --gres=gpu:{{ num_gpu } }
#SBATCH --partition=gpu
#SBATCH --mem={{ (ram_gb*1000)|int } }MB
#SBATCH --output={{ job_log_path_abs } }
#SBATCH --error={{ job_log_path_abs } }
#SBATCH --mem={{ (ram_gb*1000)|int } }M
#SBATCH --output={{ job_dir_abs } }/slurm.out
#SBATCH --error={{ job_dir_abs } }/slurm.err

available_devs=""
for devidx in $(seq 0 15);
do
    if [[ -z $(nvidia-smi -i $devidx --query-compute-apps=pid --format=csv,noheader) ]] ; then
        if [[ -z "$available_devs" ]] ; then
            available_devs=$devidx
        else
            available_devs=$available_devs,$devidx
        fi
    fi
done
export CUDA_VISIBLE_DEVICES=$available_devs

{{ run_cmd } }

}}

&size(10){***Pukiwikiの表記のため一部表記を変えてます***};



1

トップ   編集 差分 履歴 添付 複製 名前変更 リロード   新規 一覧 検索 最終更新   ヘルプ   最終更新のRSS