本家様 https://github.com/PaddlePaddle/PaddleHelix の中のhttps://github.com/PaddlePaddle/PaddleHelix/tree/dev/apps/protein_folding/helixfold3
参照先https://qiita.com/Ag_smith/items/a24ca180cc971e926d89
「Biomolecular Structure Prediction with HelixFold3: Replicating the Capabilities of AlphaFold3」
すごいなぁ。AlphaFold3の論文から非公開のAlphaFold3を実装した感じ。
みかんせい
git clone https://github.com/yyuu/pyenv.git /apps/pyenv
export PYENV_ROOT=/apps/pyenv
export PATH=$PYENV_ROOT/bin:$PATH
pyenv install anaconda3-2024.06-1
pyenv global anaconda3-2024.06-1
source /apps/pyenv/versions/anaconda3-2024.06-1/etc/profile.d/conda.sh
cd /apps
git clone https://github.com/PaddlePaddle/PaddleHelix
ls /apps/PaddleHelix/apps/protein_folding/helixfold3
[root@rockylinux9 ~]# conda create -n helixfold -c conda-forge python=3.9
[root@rockylinux9 ~]# conda activate helixfold
(helixfold) [root@rockylinux9 ~]# conda install -y -c bioconda aria2 hmmer==3.3.2 kalign2==2.04 hhsuite==3.3.0
(helixfold) [root@rockylinux9 ~]# conda install -y -c conda-forge openbabel
(helixfold) [root@rockylinux9 ~]# python3 -m pip install paddlepaddle-gpu==2.6.1.post120 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html
(helixfold) [root@rockylinux9 ~]# cd /apps/PaddleHelix/apps/protein_folding/helixfold3/
(helixfold) [root@rockylinux9 helixfold3]#
(helixfold) [root@rockylinux9 helixfold3]# ls -CF
data/ helixfold/ images/ infer_scripts/ README.md run_infer.sh utils/
demo_output/ helixfold3_report.pdf inference.py LICENSE requirements.txt scripts/
(helixfold) [root@rockylinux9 helixfold3]#
(helixfold) [root@rockylinux9 helixfold3]# which python3
/apps/pyenv/versions/anaconda3-2024.06-1/envs/helixfold/bin/python3
(helixfold) [root@rockylinux9 helixfold3]# python3 -m pip install -r requirements.txt
(helixfold) [root@rockylinux9 helixfold3]# conda search maxit -c bioconda
Loading channels: done
# Name Version Build Channel
maxit 11.200 hdbdd923_0 bioconda
(helixfold) [root@rockylinux9 helixfold3]# conda install maxit -c bioconda
(helixfold) [root@rockylinux9 helixfold3]# which maxit
/apps/pyenv/versions/anaconda3-2024.06-1/envs/helixfold/bin/maxit
(helixfold) [root@rockylinux9 helixfold3]# conda list
:
aria2 1.23.0 0 bioconda
:
hhsuite 3.3.0 py39pl5321h0dd7abe_13 bioconda
hmmer 3.3.2 hdbdd923_4 bioconda
:
jax 0.2.14 pypi_0 pypi
jaxlib 0.4.30 pypi_0 pypi
kalign2 2.04 h031d066_7 bioconda
:
ncurses 6.5 he02047a_1 conda-forge
numpy 1.24.4 pypi_0 pypi
openbabel 3.1.1 py39h2d01fe1_9 conda-forge
openssl 1.1.1w h7f8727e_0
:
paddlepaddle-gpu 2.6.1.post120 pypi_0 pypi
:
python 3.9.15 h47a2c10_0_cpython conda-forge
:
cuda 12.0、CuDNN 8.4.0、NCCL 2.14.3、(Paddleはpaddlepaddle-gpu?)
(helixfold) [root@rockylinux9 helixfold3]# conda install cuda=12.0 cudnn=8.4.0 nccl=2.14.3 -c conda-forge -c nvidia
(helixfold) [root@rockylinux9 helixfold3]# conda list
:
aria2 1.23.0 0 bioconda
:
cuda 12.0.0 ha804496_1 conda-forge
:
cuda-toolkit 12.0.0 ha804496_1 conda-forge
cuda-tools 12.0.0 ha770c72_0 conda-forge
cuda-visual-tools 12.0.0 ha770c72_0 conda-forge
cudatoolkit 11.8.0 h4ba93d1_13 conda-forge
:
hhsuite 3.3.0 py39pl5321h0dd7abe_13 bioconda
hmmer 3.3.2 hdbdd923_4 bioconda
:
jax 0.2.14 pypi_0 pypi
jaxlib 0.4.30 pypi_0 pypi
kalign2 2.04 h031d066_7 bioconda
:
nccl 2.14.3.1 h0800d71_0 conda-forge
ncurses 6.5 he02047a_1 conda-forge
nsight-compute 2022.4.0.15 0 nvidia
numpy 1.24.4 pypi_0 pypi
openbabel 3.1.1 py39h2d01fe1_9 conda-forge
openssl 1.1.1w hd590300_0 conda-forge
:
paddlepaddle-gpu 2.6.1.post120 pypi_0 pypi
:
python 3.9.15 h47a2c10_0_cpython conda-forge
:
(helixfold) [root@rockylinux9 helixfold3]# conda deactivate
[root@rockylinux9 helixfold3]#
データベースを準備
用意するのは...alphafold2とほぼ同じだけど、新たに
が新しいなぁ.
[root@rockylinux9 ~]# ls -l /apps/PaddleHelix/apps/protein_folding/helixfold3/scripts/
total 48
-rw-r--r--. 1 root root 1742 Sep 15 12:05 download_all_data.sh
-rw-r--r--. 1 root root 866 Sep 15 12:05 download_bfd.sh
-rw-r--r--. 1 root root 596 Sep 15 12:05 download_ccd_pkl.sh
-rw-r--r--. 1 root root 700 Sep 15 12:05 download_helixfold3_checkpoints.sh
-rw-r--r--. 1 root root 780 Sep 15 12:05 download_mgnify.sh
-rw-r--r--. 1 root root 1373 Sep 15 12:05 download_pdb_mmcif.sh
-rw-r--r--. 1 root root 586 Sep 15 12:05 download_pdb_seqres.sh
-rw-r--r--. 1 root root 592 Sep 15 12:05 download_rna.sh
-rw-r--r--. 1 root root 692 Sep 15 12:05 download_small_bfd.sh
-rw-r--r--. 1 root root 846 Sep 15 12:05 download_uniclust30.sh
-rw-r--r--. 1 root root 1358 Sep 15 12:05 download_uniprot.sh
-rw-r--r--. 1 root root 662 Sep 15 12:05 download_uniref90.sh
[root@rockylinux9 ~]#
「/apps/modulefiles/HelixFold3」
#%Module1.0
set root /apps/pyenv/versions/anaconda3-2024.06-1/envs/helixfold
prepend-path PATH $root/bin
prepend-path LD_LIBRARY_PATH $root/lib
#!/bin/bash
ROOT=/apps/pyenv/versions/anaconda3-2024.06-1/envs/helixfold
DATA_DIR=/Public/alphafold
export OBABEL_BIN="$ROOT/bin"
CUDA_VISIBLE_DEVICES=0 "$ROOT/bin/python" /apps/PaddleHelix/apps/protein_folding/helixfold3/inference.py \
--maxit_binary "$ROOT/bin/maxit" \
--jackhmmer_binary_path "$ROOT/bin/jackhmmer" \
--hhblits_binary_path "$ROOT/bin/hhblits" \
--hhsearch_binary_path "$ROOT/bin/hhsearch" \
--kalign_binary_path "$ROOT/bin/kalign" \
--hmmsearch_binary_path "$ROOT/bin/hmmsearch" \
--hmmbuild_binary_path "$ROOT/bin/hmmbuild" \
--nhmmer_binary_path "$ROOT/bin/nhmmer" \
--preset='reduced_dbs' \
--bfd_database_path "$DATA_DIR/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt" \
--small_bfd_database_path "$DATA_DIR/small_bfd/bfd-first_non_consensus_sequences.fasta" \
--bfd_database_path "$DATA_DIR/small_bfd/bfd-first_non_consensus_sequences.fasta" \
--uniclust30_database_path "$DATA_DIR/uniclust30/uniclust30_2018_08/uniclust30_2018_08" \
--uniprot_database_path "$DATA_DIR/uniprot/uniprot.fasta" \
--pdb_seqres_database_path "$DATA_DIR/pdb_seqres/pdb_seqres.txt" \
--uniref90_database_path "$DATA_DIR/uniref90/uniref90.fasta" \
--mgnify_database_path "$DATA_DIR/mgnify/mgy_clusters_2018_12.fa" \
--template_mmcif_dir "$DATA_DIR/pdb_mmcif/mmcif_files" \
--obsolete_pdbs_path "$DATA_DIR/pdb_mmcif/obsolete.dat" \
--ccd_preprocessed_path "$DATA_DIR/ccd_preprocessed_etkdg.pkl.gz" \
--rfam_database_path "$DATA_DIR/Rfam-14.9_rep_seq.fasta" \
--max_template_date=2020-05-14 \
--input_json data/demo_protein_ligand.json \
--output_dir ./output \
--model_name allatom_demo \
--init_model $DATA_DIR/params/HelixFold3-240814.pdparams \
--infer_times 3 \
--precision "fp32"