本家様http://raptorx.uchicago.edu/ https://github.com/j3xugit/RaptorX-3DModeling

参照先https://qiita.com/Ag_smith/items/9e6a29faa719be64be90

下準備

yum install epel-release
yum install cmake python3 openmpi-devel dssp screen wget parallel
mkdir -p /apps/src
yum localinstall cuda-repo-rhel7-11-2-local-11.2.0_460.27.04-1.x86_64.rpm
yum install cuda-11-2
yum localinstall cuda-repo-rhel7-10-2-local-10.2.89-440.33.01-1.0-1.x86_64.rpm
yum install yum install cuda-libraries-dev-10-2
yum remove "cuda-repo*"
tar zxvf cudnn-10.2-linux-x64-v7.6.5.32.tgz
cp -rp cuda/include/cudnn.h /usr/local/cuda-10.2/include/
cp -rp cuda/lib64/* /usr/local/cuda-10.2/lib64/
cp -rp cuda/NVIDIA_SLA_cuDNN_Support.txt /usr/local/cuda-10.2/

「site-packages/theano/gpuarray/dnn.py」から「cuDNN to a version >= v5 and <= v7」と言われるが、dnn.pyを調べると「v >= 7200:」と要求される cudnnは 7.2未満?
カードの問題なのか不明だが、10.2ではGPUで計算出来ていない.

pdbx

[root@centos7 ~]# cd /apps/
[root@centos7 apps]# git clone https://github.com/soedinglab/pdbx
[root@centos7 apps]# cd pdbx/
[root@centos7 pdbx]# mkdir build && cd $_
[root@centos7 build]# cmake ..
[root@centos7 build]# make && make install

pythonライブラリが「/usr/local/lib/python3.6/site-packages/pdbx」にインストールされる

legacy blast

legacy blastの最新版 2.2.26

[root@centos7 ~]# cd /apps/src
[root@centos7 src]# curl -O ftp://ftp.ncbi.nlm.nih.gov/blast/executables/legacy.NOTSUPPORTED/2.2.26/blast-2.2.26-x64-linux.tar.gz
[root@centos7 src]# cd ..
[root@centos7 apps]# tar xzf src/blast-2.2.26-x64-linux.tar.gz

psipred

[root@centos7 ~]# cd /apps/
[root@centos7 apps]# git clone https://github.com/psipred/psipred
[root@centos7 apps]# cd psipred
[root@centos7 psipred]# git checkout -b 4.0 refs/tags/v4.0
Switched to a new branch '4.0'
[root@centos7 psipred]# cd src
[root@centos7 src]# make && make install
 
[root@centos7 src]# cd /apps/psipred/
[root@centos7 psipred]#
[root@centos7 psipred]# ls -CF
bin/  BLAST+/  data/  example/  LICENSE  README  runpsipred*  runpsipred_single*  src/  VERSION
[root@centos7 psipred]#
[root@centos7 psipred]# ls -l bin/
total 68
-rwxr-xr-x. 1 root root 13736 Jan 29 01:45 chkparse
-rwxr-xr-x. 1 root root 18240 Jan 29 01:45 psipass2
-rwxr-xr-x. 1 root root 13744 Jan 29 01:45 psipred
-rwxr-xr-x. 1 root root 13464 Jan 29 01:45 seq2mtx
[root@centos7 psipred]#

HHSuite

[root@centos7 ~]# cd /apps/src
[root@centos7 src]# git clone  https://github.com/soedinglab/hh-suite
[root@centos7 src]# cd hh-suite/
[root@centos7 hh-suite]# git checkout -b 3.3.0 refs/tags/v3.3.0
Switched to a new branch '3.3.0'
[root@centos7 hh-suite]# 
[root@centos7 hh-suite]# mkdir build && cd $_
 
[root@centos7 build]# module load mpi/openmpi-x86_64
 
[root@centos7 build]# cmake -DCMAKE_INSTALL_PREFIX=/apps/hh-suite/3.3 ..
[root@centos7 build]# make && make install
 
[root@centos7 build]# ls -l /apps/hh-suite/3.3/
total 8
drwxr-xr-x. 2 root root 4096 Jan 29 01:33 bin
drwxr-xr-x. 2 root root  167 Jan 29 01:33 data
drwxr-xr-x. 2 root root 4096 Jan 29 01:33 scripts
[root@centos7 build]#

設定

[root@centos7 ~]# vi /apps/hh-suite/3.3/scripts/HHPaths.pm
--- /apps/hh-suite/3.3/scripts/HHPaths.pm.orig  2021-02-02 10:26:41.000000000 +0900
+++ /apps/hh-suite/3.3/scripts/HHPaths.pm       2021-02-02 10:33:49.738884533 +0900
@@ -38,18 +38,18 @@
 #our $execdir = ".../psipred/bin";         # path to PSIPRED V2 binaries
 #our $datadir = ".../psipred/data";        # path to PSIPRED V2 data files
 #our $ncbidir = ".../blast/bin";           # path to NCBI binaries (for PSIPRED in addss.pl)
-our $execdir = "/cluster/toolkit/production/bioprogs/psipred/bin";  # path to PSIPRED V2 binaries
-our $datadir = "/cluster/toolkit/production/bioprogs/psipred/data"; # path to PSIPRED V2 data files
-our $ncbidir = "/cluster/toolkit/production/bioprogs/blast/bin";    # path to NCBI binaries (for PSIPRED in addss.pl)
+our $execdir = "/apps/psipred/bin";  # path to PSIPRED V2 binaries
+our $datadir = "/apps/psipred/data"; # path to PSIPRED V2 data files
+our $ncbidir = "/apps/blast-2.2.26/bin";    # path to NCBI binaries (for PSIPRED in addss.pl)
 
 ##############################################################################################
 # PLEASE COMPLETE THE PATHS ... TO YOUR LOCAL PDB FILES, DSSP FILES ETC.
 #our $pdbdir  =  ".../pdb/all";            # where are the pdb files? (pdb/divided directory will also work)
 #our $dsspdir =  ".../dssp/data";          # where are the dssp files? Used in addss.pl.
 #our $dssp    =  ".../dssp/bin/dsspcmbi";  # where is the dssp binary? Used in addss.pl.
-our $pdbdir  =  "/cluster/databases/pdb/all";            # where are the pdb files? (pdb/divided directory will also work)
-our $dsspdir =  "/cluster/databases/dssp/data";          # where are the dssp files? Used in addss.pl
-our $dssp    =  "/cluster/databases/dssp/bin/dsspcmbi";  # where is the dssp binary? Used in addss.pl
+our $pdbdir  =  "/Public/databases/pdb/divided";            # where are the pdb files? (pdb/divided directory will also work)
+our $dsspdir =  "/Public/databases/dssp/data";          # where are the dssp files? Used in addss.pl
+our $dssp    =  "/usr/bin/mkdssp";  # where is the dssp binary? Used in addss.pl
 ##############################################################################################
 
 # The lines below probably do not need to be changed

PDBファイル/DSSPデータベース/HH-suiteデータファイルの取得

保存場所の確保

[root@centos7 ~]# mkdir /Public
[root@centos7 ~]# mount -t nfs qnap:/Public /Public
[root@centos7 ~]# mkdir -p /Public/databases/pdb/divided /Public/databases/dssp/data /Public/databases/HH-suite
  • PDBファイルの取得
    [root@centos7 ~]# cd /Public/databases/pdb/divided
    [root@centos7 divided]# rsync -avz --delete ftp.pdbj.org::ftp_data/structures/divided/pdb .
    [root@centos7 divided]# du -hs .
    34G     .
    [root@centos7 divided]# find . -name *.ent.gz -exec /usr/local/bin/gzip -dk {} \;    # gzip-1.10を/usr/local/binにインスト
    解凍前が34GBで解凍すると176GB. 解凍前のファイルはそのままにしているので、正味 142GBでしょうか
  • DSSPデータベースの取得
    「rsync://rsync.cmbi.umcn.nl/dssp-from-mmcif/」を参照元にしたら多くのファイルで「rsync: send_files failed to open "/6yt9.dssp" (in dssp-from-mmcif): Permission denied (13)」とエラーが表示された. なので別の取得先に変更
    [root@centos7 ~]# rsync -avz --delete rsync://rsync.cmbi.umcn.nl/dssp/ /Public/databases/dssp/data/
    [root@centos7 ~]# du -hs /Public/databases/dssp/
    29G     /Public/databases/dssp/
    [root@centos7 ~]#
    [root@centos7 ~]# ls -l /Public/databases/dssp/data | wc -l
    169765
    [root@centos7 ~]#
    [root@centos7 ~]# find /Public/databases/dssp/data -name *.dssp | wc -l
    169764
    [root@centos7 ~]#
    だいたい29GBになります. こちらは平文. 総数は 169,764枚
  • HH-suite向けのデータファイルを取得
    http://wwwuser.gwdg.de/~compbiol/uniclust/
    [root@centos7 ~]# cd /Public/databases/HH-suite
    [root@centos7 HH-suite]# wget http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz
    [root@centos7 HH-suite]# tar xzf UniRef30_2020_06_hhsuite.tar.gz
    [root@centos7 HH-suite]# ls -lh
    total 228G
    -rw-------. 1 528745 47148 140G Oct  5 21:17 UniRef30_2020_06_a3m.ffdata
    -rw-------. 1 528745 47148 671M Oct  5 21:17 UniRef30_2020_06_a3m.ffindex
    -rw-------. 1 528745 47148 6.0G Oct  5 21:13 UniRef30_2020_06_cs219.ffdata
    -rw-------. 1 528745 47148 605M Oct  5 21:14 UniRef30_2020_06_cs219.ffindex
    -rw-------. 1 528745 47148  35G Oct  5 21:18 UniRef30_2020_06_hhm.ffdata
    -rw-------. 1 528745 47148  20M Oct  5 21:18 UniRef30_2020_06_hhm.ffindex
    -rw-r--r--. 1 root   root   47G Feb  1 05:20 UniRef30_2020_06_hhsuite.tar.gz
    -rw-------. 1 528745 47148  379 Oct  7 02:26 UniRef30_2020_06.md5sums
    [root@centos7 HH-suite]# chmod 644 ./*
    [root@centos7 HH-suite]# du -hs .
    228G    .
    [root@centos7 HH-suite]#

176GB+29GB+228GB = 433GB

pyenv/anaconda環境

[root@centos7 ~]# git clone https://github.com/yyuu/pyenv.git /apps/pyenv
[root@centos7 ~]# export PYENV_ROOT=/apps/pyenv
[root@centos7 ~]# export PATH=$PYENV_ROOT/bin:$PATH
[root@centos7 ~]# eval "$(pyenv init - --no-rehash)"
[root@centos7 ~]# pyenv install anaconda3-5.3.1
[root@centos7 ~]# pyenv global anaconda3-5.3.1
 
[root@centos7 ~]# export PATH=$PYENV_ROOT/versions/anaconda3-5.3.1/bin/:$PATH
[root@centos7 ~]# conda update conda
 
[root@centos7 ~]# conda create -n RaptorX python=2
 
[root@centos7 ~]# source activate RaptorX
 
(RaptorX) [root@centos7 ~]# which python2.7
/apps/pyenv/versions/anaconda3-5.3.1/envs/RaptorX/bin/python2.7
 
(RaptorX) [root@centos7 ~]# conda install numpy nose
(RaptorX) [root@centos7 ~]# conda install -c anaconda msgpack-python 
(RaptorX) [root@centos7 ~]# pip install biopython==1.76
(RaptorX) [root@centos7 ~]# conda install pillow
(RaptorX) [root@centos7 ~]# conda install numpy scipy mkl 
(RaptorX) [root@centos7 ~]# conda install theano pygpu
 
(RaptorX) [root@centos7 ~]# mkdir -p /apps/src && cd $_
(RaptorX) [root@centos7 src]# git clone https://github.com/crowsonkb/shared_ndarray.git
(RaptorX) [root@centos7 src]# cd shared_ndarray/
(RaptorX) [root@centos7 shared_ndarray]# python2.7 setup.py install
 
(RaptorX) [root@centos7 ~]# cd /apps/src
(RaptorX) [root@centos7 src]# tar jxvf PyRosetta4.Release.python27.linux.release-274.tar.bz2
(RaptorX) [root@centos7 src]# mv PyRosetta4.Release.python27.linux.release-274 PyRosetta4_python27_274
(RaptorX) [root@centos7 src]# cd PyRosetta4_python27_274/setup/
(RaptorX) [root@centos7 setup]# python setup.py build
(RaptorX) [root@centos7 setup]# python setup.py install
 
(RaptorX) [root@centos7 ~]# cd /apps
(RaptorX) [root@centos7 apps]# git clone https://github.com/j3xugit/RaptorX-3DModeling.git
(RaptorX) [root@centos7 apps]# cd RaptorX-3DModeling
(RaptorX) [root@centos7 RaptorX-3DModeling]# vi raptorx-external.sh
--- /apps/RaptorX-3DModeling/raptorx-external.sh.orig   2021-02-02 09:29:21.529937627 +0900
+++ /apps/RaptorX-3DModeling/raptorx-external.sh        2021-02-02 14:20:13.065802041 +0900
@@ -1,9 +1,9 @@
 ## the install folder of the hhsuite package, needed for MSA generation.
-export HHDIR=/mnt/data/RaptorXCommon/HHblits/hhsuite-3.2.0-SSE2-Linux/
+export HHDIR=/apps/hh-suite/3.3
 export PATH=$HHDIR/bin:$HHDIR/scripts:$PATH
 
 ## the location of the HHM database to be searched by HHblits for MSA generation. Needed unless you already have an MSA and do not want to generate any new MSAs.
-export HHDB=/mnt/data/RaptorXCommon/HHblits/DB/uniref30_2020_02/UniRef30_2020_02
+export HHDB=/Public/databases/HH-suite/UniRef30_2020_06
 
 ## the location of the metagenome data file to be searched by hmmsearch. Not needed unless you want to build MSAs using metagenome data.
 export MetaDB=/mnt/data/conmod_databases/metaclust_series/metaclust_50.fasta
(RaptorX) [root@centos7 ~]# export ModelingHome=/apps/RaptorX-3DModeling

http://raptorx.uchicago.edu/download/」にて登録を行い「RXDeepModels4DistOri-FM.tar.gz」と「RXDeepModels4Property.tar.gz」を得る

(RaptorX) [root@centos7 ~]# cd $ModelingHome/DL4DistancePrediction4
(RaptorX) [root@centos7 DL4DistancePrediction4]# tar xf /apps/src/RXDeepModels4DistOri-FM.tar.gz
 
(RaptorX) [root@centos7 DL4DistancePrediction4]# cd $ModelingHome/DL4PropertyPrediction
(RaptorX) [root@centos7 DL4PropertyPrediction]# tar xf /apps/src/RXDeepModels4Property.tar.gz

HH-Suiteのテスト

[illya@centos7 ~]$ mkdir test && cd $_
[illya@centos7 test]$ cp /apps/src/hh-suite/data/query.seq .
[illya@centos7 test]$ cat query.seq
>sp|Q5VUD6|FA69B_HUMAN Protein FAM69B OS=Homo sapiens GN=FAM69B PE=2 SV=3
MRRLRRLAHLVLFCPFSKRLQGRLPGLRVRCIFLAWLGVFAGSWLVYVHYSSYSERCRGHVCQVVICDQYRKGIISGSVCQDLCELHMVEWRTCLSVAPGQQVYSGLW
RDKDVTIKCGIEETLDSKARSDAAPRRELVLFDKPTRGTSIKEFREMTLSFLKANLGDLPSLPALVGQVLLMADFNKDNRVSLAEAKSVWALLQRNEFLLLLSLQEKE
HASRLLGYCGDLYLTEGVPHGAWHAAALPPLLRPLLPPALQGALQQWLGPAWPWRAKIAIGLLEFVEELFHGSYGTFYMCETTLANVGYTATYDFKMADLQQVAPEAT
VRRFLQGRRCEHSTDCTYGRDCRAPCDRLMRQCKGDLIQPNLAKVCALLRGYLLPGAPADLREELGTQLRTCTTLSGLASQVEAHHSLVLSHLKTLLWKKISNTKYS
 
[illya@centos7 test]$ /apps/hh-suite/3.3/bin/hhblits -cpu 4 -i query.seq -d /Public/databases/HH-suite/UniRef30_2020_06 -oa3m query.a3m -n -1
- 12:20:21.914 INFO: Search results will be written to query.hhr
 
- 12:20:21.914 INFO: Search results will be written to query.hhr
 
- 12:20:38.622 INFO: Searching 25985124 column state sequences.
 
- 12:20:38.675 INFO: query.seq is in A2M, A3M or FASTA format
 
- 12:20:38.675 INFO: Iteration 1
 
- 12:20:38.867 INFO: Prefiltering database
 
- 12:22:00.098 INFO: HMMs passed 1st prefilter (gapless profile-profile alignment)  : 616605
 
- 12:22:02.105 INFO: HMMs passed 2nd prefilter (gapped profile-profile alignment)   : 419
 
- 12:22:02.105 INFO: HMMs passed 2nd prefilter and not found in previous iterations : 419
 
- 12:22:02.105 INFO: Scoring 419 HMMs using HMM-HMM Viterbi alignment
 
- 12:22:02.299 INFO: Alternative alignment: 0
 
- 12:22:23.947 INFO: 419 alignments done
 
- 12:22:23.948 INFO: Alternative alignment: 1
 
- 12:22:24.044 INFO: 415 alignments done
 
- 12:22:24.045 INFO: Alternative alignment: 2
 
- 12:22:24.075 INFO: 56 alignments done
 
- 12:22:24.075 INFO: Alternative alignment: 3
 
- 12:22:24.087 INFO: 4 alignments done
 
- 12:22:25.010 INFO: Premerge done
 
- 12:22:25.011 INFO: Realigning 318 HMM-HMM alignments using Maximum Accuracy algorithm
 
- 12:22:26.674 INFO: 94 sequences belonging to 94 database HMMs found with an E-value < 0.001
 
- 12:22:26.674 INFO: Number of effective sequences of resulting query HMM: Neff = 6.52866
 
Query         sp|Q5VUD6|FA69B_HUMAN Protein FAM69B OS=Homo sapiens GN=FAM69B PE=2 SV=3
Match_columns 431
No_of_seqs    520 out of 780
Neff          6.52866
Searched_HMMs 419
Date          Tue Feb  2 12:22:26 2021
Command       /apps/hh-suite/3.3/bin/hhblits -cpu 4 -i query.seq -d /Public/databases/HH-suite/UniRef30_2020_06 -oa3m query.a3m -n -1
 
 No Hit                             Prob E-value P-value  Score    SS Cols Query HMM  Template HMM
  1 UniRef100_A0A024R8I5 Family wi 100.0  2E-172  5E-178 1290.6   0.0  431    1-431   240-670 (762)
  2 UniRef100_UPI000C879985 protei 100.0  4E-160  9E-166 1176.4   0.0  412   20-431   235-767 (767)
 :
[illya@centos7 test]$

RaptorX-3DModeling

テスト
参照元と同じく2O6Pで

[illya@centos7 ~]$ cd test
[illya@centos7 test]$ vi 2o6pA.fasta
>2O6P_1|Chains A,B|Iron-regulated surface determinant protein C|Staphylococcus aureus subsp. aureus (158879)
NAADSGTLNYEVYKYNTNDTSIANDYFNKPAKYIKKNGKLYVQITVNHSHWITGMSIEGHKENIISKNTAKDERTSEFEVSKLNGKIDGKIDVYIDEKVNGKPFKYDHHYNI
TYKFNGPTDVAGAN
 
[illya@centos7 test]$ 
[illya@centos7 test]$ export PYENV_ROOT=/apps/pyenv
[illya@centos7 test]$ export PATH=$PYENV_ROOT/bin:$PATH
[illya@centos7 test]$ eval "$(pyenv init - --no-rehash)"
[illya@centos7 test]$ export PATH=$PYENV_ROOT/versions/anaconda3-5.3.1/bin/:$PATH
 
[illya@centos7 test]$ export CUDA_ROOT=/usr/local/cuda-10.2
 
[illya@centos7 test]$ source activate RaptorX
(RaptorX) [illya@centos7 test]$
(RaptorX) [illya@centos7 test]$ export ModelingHome=/apps/RaptorX-3DModeling
(RaptorX) [illya@centos7 test]$ . $ModelingHome/raptorx-path.sh
(RaptorX) [illya@centos7 test]$ . $ModelingHome/raptorx-external.sh
 
(RaptorX) [illya@centos7 test]$ /apps/RaptorX-3DModeling/Server/RaptorXFolder.sh
(略)/RaptorXFolder.sh [ -o outDir | -g gpu | -m MSAmethod | -n numDecoys | -r runningMode | -R remoteAccountInfo | -t machineType | -l maxLen2BeFolded | -c ] inputFile
        This script predicts angle/contact/distance/orientation of a protein and optionally folds it
                ModelingHome=/apps/RaptorX-3DModeling
                Please make sure that ModelingHome is correctly set to the install folder of the RaptorX-3DModeling package
                Before running this script, some external software packages shall be installed. See README.md for instructions.
 
        inputFile: a protein primary sequence file in FASTA format (ending with .fasta or .seq) or a multiple sequence alignment file in a3m format (ending with .a3m)
 :
 :
(RaptorX) [illya@centos7 test]$ /apps/RaptorX-3DModeling/Server/RaptorXFolder.sh -g -1 -l 1050 -m 9 -n 20 -r 1 -t 1 2o6pA.fasta

トップ   編集 添付 複製 名前変更     ヘルプ   最終更新のRSS
Last-modified: 2021-02-04 (木) 01:06:12 (72d)