ジョブスケジューラー TORQUE の使用を前提に relion のsubmit画面を変更してみた
Torqueは、使用するノード数その1ノード内で使用するプロセッサ(ppn: processor per node)の指定が必須
なので、relionのsubmit画面に使用するノード数を指定できるようにした。

前提

間違ってるかもしれないけど、

  • TORQUEは、ユーザからのnode(台数)とppn(core数)の要求(+メモリ量も)に見合ったサイトを見つけて、そこにジョブを投げる。
  • TORQUEは、環境変数PBS_NODEFILEにて実行可能なnodeを提示する
  • TORQUEは、割り当てられたnode自身(OS)を制御して、リソースを確保することはない。ジョブを投げ込むだけ。
  • TORQUEは、ジョブに対してのみ停止したり、nice値を変更することはできる。

そして、

  • mpirunは、指定されたプロセス数でジョブを起動させ、プロセス間通信で互いにデータを融通して計算を進める。
  • mpirunは、指定された複数のnode(筐体)でプロセスを実行できて、そのプロセスの総数は-npで与えられる

qsub.shサンプル

Relionをジョブスケジュールソフト TORQUE で使用する際の qsubスクリプト が下記サイトで提示されている。
http://www2.mrc-lmb.cam.ac.uk/relion/index.php/TORQUE_template_script_example

こちらでは、使用するノードと各ノード当たりのcoreとthreadを指定できるように下記改修を加えている。
そのためqsub.shも変更になっている

#!/bin/bash
#PBS -q XXXqueueXXX
#PBS -l nodes=XXXnodesXXX:ppn=XXXcoresXXX
#PBS -j oe
#
cd $PBS_O_WORKDIR
PATH=/Appl/relion-2.0.1/bin:/usr/lib64/openmpi/bin:$PATH
LD_LIBRARY_PATH=/Appl/relion-2.0.1/lib:/usr/lib64/openmpi/lib:$LD_LIBRARY_PATH
#
host=$(cat $PBS_NODEFILE | sort -u | tr "\n" " ")
echo "-------------------------------------------"
echo "スタート"`date`
echo "-------------------------------------------"
echo ""
echo "Queue 名称                              "XXXqueueXXX
echo "実行ノード数                             XXXnodesXXX ($host)"
echo "  +--1ノードのプログラム数[MPI procs]  "XXXmpinodesXXX
echo "       +-- 1MPIのスレッド数[threads]   "XXXthreadsXXX
echo "                    (1ノード使用コア数 XXXcoresXXX)"
echo ""
echo "全MPI数( XXXnodesXXX x XXXmpinodesXXX ) = XXXmpiprocXXX"
echo ""
echo "全使用コア数( XXXnodesXXX x XXXcoresXXX ) --> XXXsumXXX"
echo ""
#
# relion-2のGPU対応「relion_refine_mpi」
cmd="XXXcommandXXX"
run=$(echo $cmd|awk '{print $1}')
np=XXXmpiprocXXX
if [ ${run##*/} =  "relion_refine_mpi" ]; then
   if [[ $cmd = *--gpu* ]]; then
      np=$(expr XXXmpiprocXXX + 1 )
echo "GPU使用のためnp数を変更しました: XXXmpiprocXXX --> $np"
   fi
fi
echo ""
mpirun --bind-to none --mca plm_rsh_agent rsh -machinefile $PBS_NODEFILE --map-by node -np $np $cmd
#
echo "-------------------------------------------"
echo "完了"`date`
echo "-------------------------------------------"

*フラグ「--gpu」があって、relion_refine_mpiを使う場合、「-np」が一つ加わる

ソース改修 relion-2.1-beta-0

  • gui_jobwindow.cpp
    --- gui_jobwindow.cpp.orig      2017-06-15 10:46:01.609241683 +0900
    +++ gui_jobwindow.cpp   2017-06-15 15:08:45.979177765 +0900
    @@ -219,6 +219,7 @@
     
            if (myjob.joboptions.find("nr_mpi") != myjob.joboptions.end())
         {
    +               if (myjob.joboptions.find("nr_nodes") != myjob.joboptions.end()) place("nr_nodes", TOGGLE_LEAVE_ACTIVE);
                    place("nr_mpi", TOGGLE_LEAVE_ACTIVE);
            has_parallel = true;
         }
    @@ -254,11 +255,6 @@
     
            place("qsubscript");
     
    -       place("min_dedicated");
    -       if (do_allow_change_minimum_dedicated)
    -               guientries["min_dedicated"].deactivate(false);
    -       else
    -               guientries["min_dedicated"].deactivate(true);
     
            queue_group->end();
            guientries["do_queue"].cb_menu_i(); // This is to make the default effective
  • pipeline_jobs.cpp
    *横幅の関係から一部省略してます
    --- pipeline_jobs.cpp.orig      2017-06-15 10:46:01.626241738 +0900
    +++ pipeline_jobs.cpp   2017-06-15 15:01:59.800834232 +0900
    @@ -312,25 +312,17 @@
            int nmpi = joboptions["nr_mpi"].getNumber();
            int nthr = (joboptions.find("nr_threads") != joboptions.end()) ? joboptions["nr_threads"].getNumber() : 1;
            int ncores = nmpi * nthr;
    -       int ndedi = joboptions["min_dedicated"].getNumber();
    -       float fnodes = (float)ncores / (float)ndedi;
    -       int nnodes = CEIL(fnodes);
    -       if (fmod(fnodes, 1) > 0)
    -       {
    -               std:: cout << std::endl;
    -               std::cout << " Warning! You're using " << nmpi << " MPI processes with " 
    -               std::cout << " It is more efficient to make the number of cores 
    -       }
     
                    fh.clear(); // reset eof if happened...
            fh.seekg(0, std::ios::beg);
                    std::string line;
                    std::map<std::string, std::string> replacing;
    +               replacing["XXXnodesXXX"] = floatToString( joboptions["nr_nodes"].getNumber() );
    +               replacing["XXXsumXXX"] = floatToString( joboptions["nr_nodes"].getNumber() * ncores );
    +               replacing["XXXmpiprocXXX"] = floatToString( joboptions["nr_nodes"].getNumber() * nmpi );
                    replacing["XXXmpinodesXXX"] = floatToString(nmpi);
                    replacing["XXXthreadsXXX"] = floatToString(nthr);
                    replacing["XXXcoresXXX"] = floatToString(ncores);
    -               replacing["XXXdedicatedXXX"] = floatToString(ndedi);
    -               replacing["XXXnodesXXX"] = floatToString(nnodes);
                    replacing["XXXnameXXX"] = outputname;
                    replacing["XXXerrfileXXX"] = outputname + "run.err";
                    replacing["XXXoutfileXXX"] = outputname + "run.out";
    @@ -621,7 +613,8 @@
     
     
            if (has_mpi)
    -               joboptions["nr_mpi"] = JobOption("Number of MPI procs:", 1, 1, 64, 1, "Number of MPI nodes(略
    +               joboptions["nr_nodes"] = JobOption("Number of MPI nodes:", 1, 1, 8, 1, "machine nodes");
    +               joboptions["nr_mpi"] = JobOption("Number of MPI procs:", 1, 1, 64, 1, "Number of MPI par nodes");
            if (has_thread)
                    joboptions["nr_threads"] = JobOption("Number of threads:", 1, 1, 16, 1, "Number of shared-(略
    @@ -631,11 +624,9 @@
     the job will be executed locally. Note that only MPI jobs may be sent to a queue.");
     
         // Need the std::string(), as otherwise it will be overloaded and passed as a boolean....
    -    joboptions["queuename"] = JobOption("Queue name: ", std::string("openmpi"), "Name of the queue to (略
    +    joboptions["queuename"] = JobOption("Queue name: ", std::string("batch"), "Name of the queue");
     
    -    joboptions["qsub"] = JobOption("Queue submit command:", std::string("qsub"), "Name of the command (略
    -Note that the person who installed RELION should have made a custom script for your cluster/queue setup.(略
    -(or create your own script following the RELION WIKI) if you have trouble submitting jobs.");
    +    joboptions["qsub"] = JobOption("Queue submit command:", std::string("qsub"), "");
     
            // Two additional options that may be set through environment variables RELION_QSUB_EXTRA1 and (略
            char * extra1_text = getenv ("RELION_QSUB_EXTRA1");
    @@ -668,7 +659,7 @@
                    default_location=mydefault;
            }
     
    -    joboptions["qsubscript"] = JobOption("Standard submission script:", std::string(default_location),(略
    +    joboptions["qsubscript"] = JobOption("Standard submission script:", std::string("/Appl/relion-2.1-(略
     "The template for your standard queue job submission script. \
     Its default location may be changed by setting the environment variable RELION_QSUB_TEMPLATE. \
     In the template script a number of variables will be replaced: \n \
    @@ -687,9 +678,6 @@
     But note that (unlike all other entries in the GUI) the extra values are not remembered from one (略
     
            // Check for environment variable RELION_QSUB_TEMPLATE
    -       char * my_minimum_dedicated = getenv ("RELION_MINIMUM_DEDICATED");
    -       int minimum_nr_dedicated = (my_minimum_dedicated == NULL) ? DEFAULTMININIMUMDEDICATED : (略
    -       joboptions["min_dedicated"] = JobOption("Minimum dedicated cores per node:", minimum_nr_(略
     
            // Need the std::string(), as otherwise it will be overloaded and passed as a boolean....
            joboptions["other_args"] = JobOption("Additional arguments:", std::string(""), "In this (略
    [root@em00 src]#
  • pipeline_jobs.h
    --- pipeline_jobs.h.orig        2017-06-15 10:46:01.626241738 +0900
    +++ pipeline_jobs.h     2017-06-15 11:44:18.004821647 +0900
    @@ -49,13 +49,13 @@
     #define RADIO_NODETYPE 1
     
     // Our own defaults at LMB are the hard-coded ones
    -#define DEFAULTQSUBLOCATION "/public/EM/RELION/relion/bin/relion_qsub.csh"
    -#define DEFAULTCTFFINDLOCATION "/public/EM/ctffind/ctffind.exe"
    -#define DEFAULTMOTIONCOR2LOCATION "/public/EM/MOTIONCOR2/MotionCor2"
    -#define DEFAULTUNBLURLOCATION "/public/EM/UNBLUR/unblur.exe"
    -#define DEFAULTSUMMOVIELOCATION "/public/EM/SUMMOVIE/summovie.exe"
    -#define DEFAULTGCTFLOCATION "/public/EM/Gctf/bin/Gctf"
    -#define DEFAULTRESMAPLOCATION "/public/EM/ResMap/ResMap-1.1.4-linux64"
    +#define DEFAULTQSUBLOCATION "/Appl/relion/bin/qsub.sh"
    +#define DEFAULTCTFFINDLOCATION "/Appl/ctf/ctffind4.exe"
    +#define DEFAULTMOTIONCOR2LOCATION "/Appl/local/bin/MotionCor2"
    +#define DEFAULTUNBLURLOCATION "/Appl/local/bin/unblur"
    +#define DEFAULTSUMMOVIELOCATION "/Appl/local/bin/summovie"
    +#define DEFAULTGCTFLOCATION "/Appl/Gctf/bin/gctf"
    +#define DEFAULTRESMAPLOCATION "/Appl/local/bin/ResMap"
     #define DEFAULTMININIMUMDEDICATED 1
     #define DEFAULTWARNINGLOCALMPI 32
     #define DEFAULTALLOWCHANGEMINDEDICATED true

ソース改修 relion-2.0.1

「Running」タブで使用するノード数(マシン台数)を指定する「Number of Nodes」を追加して、
「Minimum dedicated cores per node」は削除。
*5,6台のクラスターとか、ヘテロなクラスター構成ならこっちが楽かなって思っている
2016y10m10d_101814353.png

  • relion2-beta/src/gui_jobwindow.cpp
    --- relion2-beta/src/gui_jobwindow.cpp.orig     2016-10-09 23:40:02.944387466 +0900
    +++ relion2-beta/src/gui_jobwindow.cpp  2016-10-10 10:12:23.457312665 +0900
    @@ -88,17 +88,6 @@
            has_thread = _has_thread;
     
            // Check for environment variable RELION_QSUB_TEMPLATE
    -       char * my_minimum_dedicated = getenv ("RELION_MINIMUM_DEDICATED");
    -       minimum_nr_dedicated = (my_minimum_dedicated == NULL) ? DEFAULTMININIMUMDEDICATED : (略
    -
    -       char * my_allow_change_dedicated = getenv ("RELION_ALLOW_CHANGE_MINIMUM_DEDICATED");
    -       if (my_allow_change_dedicated == NULL)
    -               do_allow_change_minimum_dedicated = DEFAULTMININIMUMDEDICATED;
    -       else
    -       {
    -               int check_allow =  textToInteger(my_allow_change_dedicated);
    -               do_allow_change_minimum_dedicated = (check_allow == 0) ? false : true;
    -       }
     
            // Set up tabs
         if (nr_tabs >= 1) // there is always the running tab, which is not counted on the input nr_tabs!
    @@ -184,7 +173,10 @@
         resetHeight();
     
            if (has_mpi)
    +       {
    +               nr_nodes.place(current_y, "Number of nodes:", 1, 1, 16, 1, "Number of nodes to use in parallel.");
                    nr_mpi.place(current_y, "Number of MPI procs:", 1, 1, 64, 1, "Number of MPI (略
    +       }
     
            if (has_thread)
                    nr_threads.place(current_y, "Number of threads:", 1, 1, 16, 1, "Number of (略
    @@ -256,8 +248,6 @@
     XXXmpinodesXXX = The number of MPI nodes; \n \
     XXXthreadsXXX = The number of threads; \n \
     XXXcoresXXX = XXXmpinodesXXX * XXXthreadsXXX; \n \
    -XXXdedicatedXXX = The minimum number of dedicated cores on each node; \n \
    -XXXnodesXXX = The number of requested nodes = CEIL(XXXcoresXXX / XXXdedicatedXXX); \n \
     If these options are not enough for your standard jobs, you may define two extra variables: (略
     Their help text is set by the environment variables RELION_QSUB_EXTRA1 and RELION_QSUB_EXTRA2 \
     For example, setenv RELION_QSUB_EXTRA1 \"Max number of hours in queue\" will result in an (略
    @@ -265,12 +255,6 @@
     Likewise, default values for the extra entries can be set through environment variables (略
     But note that (unlike all other entries in the GUI) the extra values are not remembered (略
     
    -       min_dedicated.place(current_y, "Minimum dedicated cores per node:", minimum_nr_dedicated, (略
    -       if (do_allow_change_minimum_dedicated)
    -               min_dedicated.deactivate(false);
    -       else
    -               min_dedicated.deactivate(true);
    -
            queue_group->end();
            do_queue.cb_menu_i(); // This is to make the default effective
     
    @@ -344,7 +328,10 @@
     void RelionJobWindow::closeWriteFile(std::ofstream& fh, std::string fn)
     {
            if (has_mpi)
    +       {
    +               nr_nodes.writeValue(fh);
                    nr_mpi.writeValue(fh);
    +       }
            if (has_thread)
                    nr_threads.writeValue(fh);
            do_queue.writeValue(fh);
    @@ -366,7 +353,10 @@
     void RelionJobWindow::closeReadFile(std::ifstream& fh)
     {
            if (has_mpi)
    +       {
    +               nr_nodes.readValue(fh);
                    nr_mpi.readValue(fh);
    +       }
            if (has_thread)
                    nr_threads.readValue(fh);
            do_queue.readValue(fh);
    @@ -394,21 +384,13 @@
            int nmpi = nr_mpi.getValue();
            int nthr = (has_thread) ? nr_threads.getValue() : 1;
            int ncores = nr_mpi.getValue() * nthr;
    -       int ndedi = min_dedicated.getValue();
    -       float fnodes = (float)ncores / (float)ndedi;
    -       int nnodes = CEIL(fnodes);
    -       if (fmod(fnodes, 1) > 0)
    -       {
    -               std:: cout << std::endl;
    -               std::cout << " Warning! You're using " << nmpi << " MPI processes with " (略
    -               std::cout << " It is more efficient to make the number of cores (i.e. mpi*threads) (略
    -       }
     
    +       replaceStringAll(textbuf, "XXXnodesXXX", floatToString(nr_nodes.getValue()) );
    +       replaceStringAll(textbuf, "XXXsumXXX", floatToString(     nr_nodes.getValue() * ncores ));
    +       replaceStringAll(textbuf, "XXXmpiprocXXX", floatToString( nr_nodes.getValue() * nmpi ));
            replaceStringAll(textbuf, "XXXmpinodesXXX", floatToString(nmpi) );
            replaceStringAll(textbuf, "XXXthreadsXXX", floatToString(nthr) );
            replaceStringAll(textbuf, "XXXcoresXXX", floatToString(ncores) );
    -       replaceStringAll(textbuf, "XXXdedicatedXXX", floatToString(ndedi) );
    -       replaceStringAll(textbuf, "XXXnodesXXX", floatToString(nnodes) );
            replaceStringAll(textbuf, "XXXnameXXX", outputname);
            replaceStringAll(textbuf, "XXXerrfileXXX", outputname + "run.err");
            replaceStringAll(textbuf, "XXXoutfileXXX", outputname + "run.out");
  • relion2-beta/src/gui_jobwindow.h
    --- relion2-beta/src/gui_jobwindow.h.orig       2016-10-09 23:40:02.949387557 +0900
    +++ relion2-beta/src/gui_jobwindow.h    2016-10-09 23:47:52.376866776 +0900
    @@ -104,6 +104,7 @@
     
            // Running
            Fl_Group *queue_group;
    +       SliderEntry nr_nodes;
            SliderEntry nr_mpi;
            SliderEntry nr_threads;
         BooleanEntry do_queue;

改修 relion-1.4

--- gui_jobwindow.cpp.orig      2015-09-02 02:17:43.000000000 +0900
+++ gui_jobwindow.cpp   2016-02-10 20:44:20.094870634 +0900
@@ -120,7 +120,10 @@
     resetHeight();
 
        if (has_mpi)
+       {
+               nr_nodes.place(current_y, "Number of nodes:", 1, 1, 16, 1, "Number of nodes to use in parallel.");
                nr_mpi.place(current_y, "Number of MPI procs:", 1, 1, 64, 1, "Number of (略
+       }
 
        if (has_thread)
        {
@@ -270,7 +273,10 @@
 void RelionJobWindow::closeWriteFile(std::ofstream& fh)
 {
        if (has_mpi)
+       {
+               nr_nodes.writeValue(fh);
                nr_mpi.writeValue(fh);
+       }
        if (has_thread)
        {
                nr_threads.writeValue(fh);
@@ -292,7 +298,10 @@
 void RelionJobWindow::closeReadFile(std::ifstream& fh)
 {
        if (has_mpi)
+       {
+               nr_nodes.readValue(fh);
                nr_mpi.readValue(fh);
+       }
        if (has_thread)
        {
                nr_threads.readValue(fh);
@@ -333,11 +342,13 @@
                std::cout << " It is more efficient to make the number of cores (i.e. mpi*threads)(略
        }
 
+       replaceStringAll(textbuf, "XXXnodesXXX", floatToString(nr_nodes.getValue()) );
+       replaceStringAll(textbuf, "XXXsumXXX", floatToString(     nr_nodes.getValue() * ncores ));
+       replaceStringAll(textbuf, "XXXmpiprocXXX", floatToString( nr_nodes.getValue() * nmpi ));
        replaceStringAll(textbuf, "XXXmpinodesXXX", floatToString(nmpi) );
        replaceStringAll(textbuf, "XXXthreadsXXX", floatToString(nthr) );
        replaceStringAll(textbuf, "XXXcoresXXX", floatToString(ncores) );
        replaceStringAll(textbuf, "XXXdedicatedXXX", floatToString(ndedi) );
-       replaceStringAll(textbuf, "XXXnodesXXX", floatToString(nnodes) );
        replaceStringAll(textbuf, "XXXnameXXX", outputname);
        replaceStringAll(textbuf, "XXXerrfileXXX", outputname + ".err");
        replaceStringAll(textbuf, "XXXoutfileXXX", outputname + ".out");
--- gui_jobwindow.h.orig        2015-08-27 19:20:56.000000000 +0900
+++ gui_jobwindow.h     2016-02-10 20:26:28.515365998 +0900
@@ -76,6 +76,7 @@
 
        // Running
        Fl_Group *queue_group;
+       SliderEntry nr_nodes;
        SliderEntry nr_mpi;
        SliderEntry nr_threads;
        SliderEntry ram_per_thread;

改修 relion-1.3

TORQUEのキューの作り方を工夫すれば解決可能なのかも知れないけど、relionにて提供されているTORQUEへの変数(リソース用)には

  • XXXmpinodesXXX
    MPIプロセス数
  • XXXthreadsXXX
    各MPIプロセスで供されるthread数
  • XXXcoreXXX
    (MPIプロセス数) x (各MPIプロセスで供されるthread数)で、1ノードで使用されるcore数

らがある。ここに使用する筐体nodeを指定する変数(XXXnodesXXX)とトータルのMPIプロセス数が定まる変数(XXXmpirunXXX)を用意したいと思った。
修正箇所はrelionのソースないの下記部分である。
relion-1.3/src/gui_jobwindow.cpp

  • 109行目で
            if (has_mpi)
    +       {
    +               nr_nodes.place(current_y, "Number of nodes:", 1, 1, 16, 1, "Number of nodes to use in parallel.");
                    nr_mpi.place(current_y, "Number of MPI procs:", 1, 1, 64, 1, (略
    +       }
            if (has_thread)
  • 250行目で
            if (has_mpi)
    +       {
    +               nr_nodes.writeValue(fh);
                    nr_mpi.writeValue(fh);
    +       }
    
  • 272行目で
            if (has_mpi)
    +       {
    +               nr_nodes.readValue(fh);
                    nr_mpi.readValue(fh);
    +       }
            if (has_thread)
  • 303行目で
            int nthr = (has_thread) ? nr_threads.getValue() : 1;
     
    +       replaceStringAll(textbuf, "XXXnodesXXX", floatToString(nr_nodes.getValue()) );
    +       replaceStringAll(textbuf, "XXXmpirunXXX", floatToString(nr_mpi.getValue() * nr_nodes.getValue() ) );
            replaceStringAll(textbuf, "XXXmpinodesXXX", floatToString(nr_mpi.getValue()) );

relion-1.3/src/gui_jobwindow.h

  • 70行目で
            Fl_Group *queue_group;
    +       SliderEntry nr_nodes;
            SliderEntry nr_mpi;

としてコンパイルを行う。っで、追加したnode数をしている欄が表示される
2014y12m01d_010924395.png

これに連動して、使用するqsub.shを下記のようにする。

#!/bin/bash
#PBS -q XXXqueueXXX
#PBS -l nodes=XXXnodesXXX:ppn=XXXcoresXXX
cd $PBS_O_WORKDIR
#
mpirun -bind-to none --mca plm_rsh_agent rsh -machinefile $PBS_NODEFILE --map-by node -np XXXmpirunXXX XXXcommandXXX

「nodes」で筐体node数を決めて、1node当り使用するコア数。そして、mpirunで「-np」によるMPIプロセスの数を定めている。
*MPI/pthreadsなハイブリッドな様式なので-bind-to noneを追記しないとパフォーマンスに重大な影響がでる。


トップ   編集 添付 複製 名前変更     ヘルプ   最終更新のRSS
Last-modified: 2017-06-19 (月) 08:52:01 (39d)