Relionはジョブ管理システム(SGE、Torque、PBSProら)と連携できて、
ジョブ投入画面の「Submit to queue?」をYesにすれば既設のジョブ管理システムへジョブが投入される
2018y08m03d_121758190.png

指定した「queue」の先で指定したMPI分のリソースを確保して計算するのだが、
指定先の「queue」で1台当たりのコア数を「Minimum deficated core per node」に記載する必要がある。
一様にcore数が揃ったクラスターマシンが数組なら便利です。
っが、購入時期によってcore数が異なるクラスターマシン群が複数存在すると面倒かな。

そう思って「queue指定 -> 使用するノードの数 -> 使用するMPIの数 -> MPI当たりのthreads数」で
ジョブを定義できるような変更をかつて行ってました。
2018y09m13d_053604415.png

Relion-3.0でも同様に変更してみた
この変更で RELION_QSUB_TEMPLATE(qsub.sh)も変更する必要がある。参照Relion/qsub.sh
*Relionでは qsub.csh を提供しているが、ここでは bash で書いてます

修正コード

gui_jobwindow.cpp

--- relion-3.0_beta/src/gui_jobwindow.cpp.orig	2018-08-03 00:42:30.661198915 +0900
+++ relion-3.0_beta/src/gui_jobwindow.cpp	2018-08-03 20:45:29.884243395 +0900
@@ -218,6 +218,7 @@
 
 	if (myjob.joboptions.find("nr_mpi") != myjob.joboptions.end())
 	{
+		if (myjob.joboptions.find("nr_nodes") != myjob.joboptions.end()) place("nr_nodes", TOGGLE_LEAVE_ACTIVE);
 		place("nr_mpi", TOGGLE_LEAVE_ACTIVE);
 		has_parallel = true;
 	}
@@ -259,11 +260,6 @@
 
 	place("qsubscript");
 
-	place("min_dedicated");
-	if (do_allow_change_minimum_dedicated)
-		guientries["min_dedicated"].deactivate(false);
-	else
-		guientries["min_dedicated"].deactivate(true);
 
 	queue_group->end();
 	guientries["do_queue"].cb_menu_i(); // This is to make the default effective

pipeline_jobs.cpp

--- relion-3.0_beta/src/pipeline_jobs.cpp.orig  2018-08-07 18:29:26.193434987 +0900
+++ relion-3.0_beta/src/pipeline_jobs.cpp       2018-08-07 19:12:01.347371365 +0900
@@ -443,25 +443,18 @@
                int nmpi = (joboptions.find("nr_mpi") != joboptions.end()) ? joboptions["nr_mpi"(略
                int nthr = (joboptions.find("nr_threads") != joboptions.end()) ? joboptions["nr_(略
                int ncores = nmpi * nthr;
-               int ndedi = joboptions["min_dedicated"].getNumber();
-               float fnodes = (float)ncores / (float)ndedi;
-               int nnodes = CEIL(fnodes);
-               if (fmod(fnodes, 1) > 0)
-               {
-                       std:: cout << std::endl;
-                       std::cout << " Warning! You're using " << nmpi << " MPI processes with(略
-                       std::cout << " It is more efficient to make the number of cores (i.e. (略
-               }
 
                fh.clear(); // reset eof if happened...
                fh.seekg(0, std::ios::beg);
                std::string line;
                std::map<std::string, std::string> replacing;
+               replacing["XXXversionXXX"] = RELION_VERSION;
+               replacing["XXXsumXXX"] = floatToString( joboptions["nr_nodes"].getNumber() * ncores );
+               replacing["XXXmpiprocXXX"] = floatToString( joboptions["nr_nodes"].getNumber() * nmpi );
                replacing["XXXmpinodesXXX"] = floatToString(nmpi);
                replacing["XXXthreadsXXX"] = floatToString(nthr);
                replacing["XXXcoresXXX"] = floatToString(ncores);
-               replacing["XXXdedicatedXXX"] = floatToString(ndedi);
-               replacing["XXXnodesXXX"] = floatToString(nnodes);
+               replacing["XXXnodesXXX"] = floatToString( joboptions["nr_nodes"].getNumber() );
                replacing["XXXnameXXX"] = outputname;
                replacing["XXXerrfileXXX"] = outputname + "run.err";
                replacing["XXXoutfileXXX"] = outputname + "run.out";
@@ -757,7 +750,8 @@
        const char qsub_nrmpi_val = (qsub_nrmpi_text ? atoi(qsub_nrmpi_text) : DEFAULTNRMPI);
        if (has_mpi)
        {
-               joboptions["nr_mpi"] = JobOption("Number of MPI procs:", qsub_nrmpi_val , 1, (略
+               joboptions["nr_nodes"] = JobOption("Number of nodes:", 1, 1, 8 , 1, "Number (略
+               joboptions["nr_mpi"] = JobOption("Number of MPI procs per nodes:", qsub_nrm(略
        }
 
        const char *thread_max_input = getenv("RELION_THREAD_MAX");
@@ -837,11 +831,12 @@
 In the template script a number of variables will be replaced: \n \
 XXXcommandXXX = relion command + arguments; \n \
 XXXqueueXXX = The queue name; \n \
-XXXmpinodesXXX = The number of MPI nodes; \n \
-XXXthreadsXXX = The number of threads; \n \
+XXXmpinodesXXX = The number of MPI per node; \n \
+XXXmpiprocXXX = The total number of MPI; \n \
+XXXthreadsXXX = The number of threads per MPI; \n \
 XXXcoresXXX = XXXmpinodesXXX * XXXthreadsXXX; \n \
-XXXdedicatedXXX = The minimum number of dedicated cores on each node; \n \
-XXXnodesXXX = The number of requested nodes = CEIL(XXXcoresXXX / XXXdedicatedXXX); \n \
+XXXsumXXX = The total number of core; \n \
+XXXnodesXXX = The number of requested nodes \n \
 If these options are not enough for your standard jobs, you may define a user-specifie(略
 The number of extra variables is controlled through the environment variable RELION_QS(略
 Their help text is set by the environment variables RELION_QSUB_EXTRA1, RELION_QSUB_EX(略
@@ -851,9 +846,6 @@
 But note that (unlike all other entries in the GUI) the extra values are not rememb(略
 
        // Check for environment variable RELION_QSUB_TEMPLATE
-       char * my_minimum_dedicated = getenv ("RELION_MINIMUM_DEDICATED");
-       int minimum_nr_dedicated = (my_minimum_dedicated == NULL) ? DEFAULTMININIMUMD(略
-       joboptions["min_dedicated"] = JobOption("Minimum dedicated cores per node:", (略
 
        // Need the std::string(), as otherwise it will be overloaded and passed as a boolean....
        joboptions["other_args"] = JobOption("Additional arguments:", std::string(""), (略

pipeline_jobs.h

--- ./relion-3.0_beta/src/pipeline_jobs.h.orig  2018-08-03 00:42:30.809196808 +0900
+++ ./relion-3.0_beta/src/pipeline_jobs.h       2018-08-07 02:41:58.233612539 +0900
@@ -51,13 +51,13 @@
 #define RADIO_GAIN_FLIP 3
 
 // Our own defaults at LMB are the hard-coded ones
-#define DEFAULTQSUBLOCATION "/public/EM/RELION/relion/bin/relion_qsub.csh"
-#define DEFAULTCTFFINDLOCATION "/public/EM/ctffind/ctffind.exe"
-#define DEFAULTMOTIONCOR2LOCATION "/public/EM/MOTIONCOR2/MotionCor2"
-#define DEFAULTUNBLURLOCATION "/public/EM/UNBLUR/unblur.exe"
-#define DEFAULTSUMMOVIELOCATION "/public/EM/SUMMOVIE/summovie.exe"
-#define DEFAULTGCTFLOCATION "/public/EM/Gctf/bin/Gctf"
-#define DEFAULTRESMAPLOCATION "/public/EM/ResMap/ResMap-1.1.4-linux64"
+#define DEFAULTQSUBLOCATION "/Appl/relion/bin/qsub.sh"
+#define DEFAULTCTFFINDLOCATION "/Appl/ctf/ctffind4.exe"
+#define DEFAULTMOTIONCOR2LOCATION "/Appl/local/bin/MotionCor2"
+#define DEFAULTUNBLURLOCATION "/Appl/local/bin/unblur"
+#define DEFAULTSUMMOVIELOCATION "/Appl/local/bin/summovie"
+#define DEFAULTGCTFLOCATION "/Appl/Gctf/bin/gctf"
+#define DEFAULTRESMAPLOCATION "/Appl/local/bin/ResMap"
 #define DEFAULTQSUBCOMMAND "qsub"
 #define DEFAULTQUEUENAME "openmpi"
 #define DEFAULTMININIMUMDEDICATED 1

filegui_jobwindow.cpp.180803.patch
filepipeline_jobs.cpp.180807.patch
filepipeline_jobs.h.180807.patch

追加

qsub.shにジョブ情報が書込みされ、qsubに流れるわけだが、qsub.shにRelionのバージョン情報を渡すために
「XXXversionXXX」を追加してみた

それと GUI 画面のタイトルを変更するには

--- relion-3.0_beta/src/apps/maingui.cpp.prog   2018-08-02 15:17:56.410396004 +0900
+++ relion-3.0_beta/src/apps/maingui.cpp        2018-08-05 00:42:17.070339083 +0900
@@ -65,7 +65,7 @@
 #ifdef PACKAGE_VERSION
         strcat(titletext,PACKAGE_VERSION);
 #endif
-        strcat(titletext,": ");
+        strcat(titletext,": 180805, cuda-9.2 :");
 
        strcat (titletext, short_dir);

git pull

上記の変更を加えた後、
開発元のソースが更新され、それを「git pull」すると下記エラーが発生する場合がある。

[root@c relion-3.0_beta]# git pull
remote: Counting objects: 19, done.
remote: Compressing objects: 100% (19/19), done.
remote: Total 19 (delta 16), reused 0 (delta 0)
Unpacking objects: 100% (19/19), done.
From https://bitbucket.org/scheres/relion-3.0_beta
   e5aada9..340fd22  master     -> origin/master
Updating e5aada9..340fd22
error: Your local changes to the following files would be overwritten by merge:
        src/pipeline_jobs.cpp
Please, commit your changes or stash them before you can merge.
Aborting
 
[root@c relion-3.0_beta]#

こちら(local)の修正箇所が影響して更新できないのである....勝手に変更してごめんなさい..

こちらの修正箇所を保ちつつ、大本の更新を適用させるには、
一旦、こちらの修正箇所を避難させ、その上で「git pull」を実行します。そしてその後にこちらの修正箇所を戻します。

(こちらの変更箇所を一時退避する)
[root@c relion-3.0_beta]# git stash save -u
Saved working directory and index state WIP on master: e5aada9 GUI: Fix RELION_QSUB_EXTRA1/2 (Issue #391 on GitHub)
HEAD is now at e5aada9 GUI: Fix RELION_QSUB_EXTRA1/2 (Issue #391 on GitHub)
 
(一応退避のリストを見る)
[root@c relion-3.0_beta]# git stash list
stash@{0}: WIP on master: e5aada9 GUI: Fix RELION_QSUB_EXTRA1/2 (Issue #391 on GitHub)
 
 
(退避したまま[オリジナル]状態にgit pullする)
[root@c relion-3.0_beta]# git pull
Updating e5aada9..340fd22
Fast-forward
 src/jaz/motion/motion_helper.cpp          | 3 +--
 src/jaz/motion/motion_param_estimator.cpp | 2 ++
 src/mask.cpp                              | 7 +++++++
 src/multidim_array.h                      | 5 +++++
 src/pipeline_jobs.cpp                     | 2 +-
 src/preprocessing.cpp                     | 5 ++++-
 6 files changed, 20 insertions(+), 4 deletions(-)
 
(退避内容を戻す)
[root@c relion-3.0_beta]# git stash pop

このあと、再度make; make installを行います。


トップ   編集 添付 複製 名前変更     ヘルプ   最終更新のRSS
Last-modified: 2018-09-13 (木) 05:36:20 (37d)