ここでは pyenv を敷いてその上に anaconda や必要に応じて miniforge を入れ、さらにその上に python 実行環境を用意しています。
[root@rockylinux9 ~]# cat /etc/redhat-release
Rocky Linux release 9.4 (Blue Onyx)
[root@rockylinux9 ~]# git clone https://github.com/yyuu/pyenv.git /apps/pyenv
[root@rockylinux9 ~]# export PYENV_ROOT=/apps/pyenv
[root@rockylinux9 ~]# export PATH=$PYENV_ROOT/bin:$PATH
[root@rockylinux9 ~]# pyenv install --list <--これでanaconda,minicondaのバージョンが一覧されます.
[root@rockylinux9 ~]# pyenv install anaconda3-2024.06-1
[root@rockylinux9 ~]# pyenv global anaconda3-2024.06-1
[root@rockylinux9 ~]# source /apps/pyenv/versions/anaconda3-2024.06-1/etc/profile.d/conda.sh
[root@rockylinux9 ~]# conda env list
# conda environments:
base /apps/pyenv/versions/anaconda3-2024.06-1
[root@rockylinux9 ~]#
っと下地を作ってから 各フレームワーク 向けの python 実行環境を用意してみます
[root@rockylinux9 ~]# cat /proc/driver/nvidia/version
NVRM version: NVIDIA UNIX x86_64 Kernel Module 535.179 Fri Apr 26 21:43:18 UTC 2024
GCC version: gcc version 11.4.1 20231218 (Red Hat 11.4.1-3) (GCC)
[root@rockylinux9 ~]#
[root@rockylinux9 ~]# source /apps/pyenv/versions/anaconda3-2024.06-1/etc/profile.d/conda.sh
[root@rockylinux9 ~]# conda create -n tf tensorflow
[root@rockylinux9 ~]# conda activate tf
(tf) [root@rockylinux9 ~]# conda list
keras 2.12.0 py311h06a4308_0
keras-preprocessing 1.1.2 pyhd3eb1b0_0
python 3.11.5 h7a1cb2a_0
tensorflow 2.12.0 mkl_py311h34a0fa1_0
(tf) [root@rockylinux9 ~]# conda search tensorflow
tensorflow 2.12.0 gpu_py311h65739b5_0 pkgs/main
tensorflow 2.12.0 mkl_py311h34a0fa1_0 pkgs/main
(tf) [root@rockylinux9 ~]# conda install tensorflow=2.12=gpu*
[illya@rockylinux9 ~]$ source /apps/pyenv/versions/anaconda3-2024.06-1/etc/profile.d/conda.sh
[illya@rockylinux9 ~]$ conda activate tf
(tf) [illya@rockylinux9 ~]$ python
Python 3.11.9 (main, Apr 19 2024, 16:48:06) [GCC 11.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> from tensorflow.python.client import device_lib
2024-08-08 00:56:12.151689: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
>>> device_lib.list_local_devices()
2024-08-08 00:56:21.938778: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /device:GPU:0 with 4094 MB memory: -> device: 0, name: NVIDIA RTX A2000, pci bus id: 0000:13:00.0, compute capability: 8.6
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
incarnation: 5687668420694823401
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 4293787648
locality {
bus_id: 1
links {
incarnation: 10836478890832035215
physical_device_desc: "device: 0, name: NVIDIA RTX A2000, pci bus id: 0000:13:00.0, compute capability: 8.6"
xla_global_id: 416903419
>>> quit();
(tf) [illya@rockylinux9 ~]$
[root@rockylinux9 ~]# source /apps/pyenv/versions/anaconda3-2024.06-1/etc/profile.d/conda.sh
[root@rockylinux9 ~]# conda create -n pt pytorch
[root@rockylinux9 ~]# conda activate pt
(pt) [root@rockylinux9 ~]# conda list
cudatoolkit 11.8.0 h6a678d5_0
cudnn cuda11_0
python 3.11.9 h955ad1f_0
pytorch 2.3.0 gpu_cuda118py311hd2d20a8_100
(pt) [root@rockylinux9 ~]#
[illya@rockylinux9 ~]$ source /apps/pyenv/versions/anaconda3-2024.06-1/etc/profile.d/conda.sh
[illya@rockylinux9 ~]$ conda activate pt
(pt) [illya@rockylinux9 ~]$
(pt) [illya@rockylinux9 ~]$ python
Python 3.11.9 (main, Apr 19 2024, 16:48:06) [GCC 11.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
>>> print(torch.cuda.is_available())
>>> print(torch.cuda.get_device_name())
>>> quit();
(pt) [illya@rockylinux9 ~]$
https://pypi.org/project/nnabla/を読むと python3.5以上が必要なようで3.10でもよさげ それ以上はまだ未対応の様子
[root@rockylinux9 ~]# source /apps/pyenv/versions/anaconda3-2024.06-1/etc/profile.d/conda.sh
[root@rockylinux9 ~]# conda create -n NNabla python=3.10 cuda-python=12 libcublas libcusolver libcurand libcufft cudnn=8 nccl -c nvidia -c conda-forge
[root@rockylinux9 ~]# conda activate NNabla
(NNabla) [root@rockylinux9 ~]# conda list
cuda-cudart 12.4.127 0 nvidia
cuda-nvrtc 12.4.127 0 nvidia
cuda-python 12.6.0 py310h7ffcb74_0 nvidia
cuda-version 11.1 hdbd7af8_3 conda-forge
cudatoolkit 11.1.74 h6bb024c_0 nvidia
cudnn 8.0.4 cuda11.1_0 nvidia
nccl hee583db_1 conda-forge
pip 24.2 pyhd8ed1ab_0 conda-forge
python 3.10.14 hd12c33a_0_cpython conda-forge
(NNabla) [root@rockylinux9 ~]# pip install nnabla
(NNabla) [root@rockylinux9 ~]# python -c "import nnabla"
2024-08-09 02:22:58,917 [nnabla][INFO]: Initializing CPU extension...
(NNabla) [root@rockylinux9 ~]#
(NNabla) [root@rockylinux9 ~]# pip install nnabla-ext-cuda120
(NNabla) [root@rockylinux9 ~]# python
Python 3.10.14 | packaged by conda-forge | (main, Mar 20 2024, 12:45:18) [GCC 12.3.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import nnabla_ext.cudnn
2024-08-09 03:02:12,603 [nnabla][INFO]: Initializing CPU extension...
2024-08-09 03:02:12,759 [nnabla][INFO]: Initializing CUDA extension...
openmpi library is not found, distribution training feature is disabled.
2024-08-09 03:02:12,768 [nnabla][INFO]: Initializing cuDNN extension...
>>> device_id = '0'
>>> nnabla_ext.cudnn.check_gpu(device_id)
>>> quit();
(NNabla) [root@rockylinux9 ~]#
(NNabla) [root@rockylinux9 ~]# module load mpi/openmpi-x86_64
(NNabla) [root@rockylinux9 ~]# python
>>> import nnabla_ext.cudnn
2024-08-09 03:11:55,728 [nnabla][INFO]: Initializing CPU extension...
2024-08-09 03:11:55,884 [nnabla][INFO]: Initializing CUDA extension...
2024-08-09 03:11:55,894 [nnabla][INFO]: Initializing cuDNN extension...
>>> device_id = '0'
>>> nnabla_ext.cudnn.check_gpu(device_id)
>>> quit();
(NNabla) [root@rockylinux9 ~]#
mpiのライブラリを噛ますとNVIDIA Collective Communication Library (NCCL) も有効になる様子. これで複数のGPUを使った解析もできるそうな.
[root@rockylinux9 ~]# source /apps/pyenv/versions/anaconda3-2024.06-1/etc/profile.d/conda.sh
[root@rockylinux9 ~]# conda create -n theano theano numpy=1.16 nose cuda-nvrtc=11.3 cudnn=8.2 -c nvidia
[root@rockylinux9 ~]# conda activate theano
(theano) [root@rockylinux9 ~]#
(theano) [root@rockylinux9 ~]# conda list
cuda-nvrtc 11.3.58 he300756_0 nvidia
cudatoolkit 11.3.1 ha36c431_9 nvidia
cudnn 8.2.1 cuda11.3_0
nose 1.3.7 pyhd3eb1b0_1008
numpy 1.16.6 py39h7895c89_4
pygpu 0.7.6 py39hce1f21e_1
python 3.9.19 h955ad1f_1
theano 1.0.5 py39h295c915_1
(theano) [root@rockylinux9 ~]#
(theano) [root@rockylinux9 ~]#
(theano) [root@rockylinux9 ~]# THEANO_FLAGS=device=cpu python gpu_tutorial1.py
[Elemwise{exp,no_inplace}(<TensorType(float64, vector)>)]
Looping 1000 times took 1.055831 seconds
Result is [1.23178032 1.61879341 1.52278065 ... 2.20771815 2.29967753 1.62323285]
Used the cpu
(theano) [root@rockylinux9 ~]# THEANO_FLAGS=device=cuda0 python gpu_tutorial1.py
Can not use cuDNN on context None: cannot compile with cuDNN. We got this error:
b'/tmp/try_flags_jovwr1bi.c:4:10: fatal error: cudnn.h: No such file or directory\n #include <cudnn.h>\n ^~~~~~~~~\ncompilation terminated.\n'
Mapped name None to device cuda0: NVIDIA RTX A2000 (0000:13:00.0)
[GpuElemwise{exp,no_inplace}(<GpuArrayType<None>(float64, vector)>), HostFromGpu(gpuarray)(GpuElemwise{exp,no_inplace}.0)]
Looping 1000 times took 0.696092 seconds
Result is [1.23178032 1.61879341 1.52278065 ... 2.20771815 2.29967753 1.62323285]
Used the gpu
(theano) [root@rockylinux9 ~]#
Microsoft Cognitive Toolkit 開発終了みたい
https://pypi.org/project/cntk/から python 3.6 が上限みたい
[root@rockylinux9 ~]# source /apps/pyenv/versions/anaconda3-2024.06-1/etc/profile.d/conda.sh
[root@rockylinux9 ~]# conda create -n CNTK python=3.6
[root@rockylinux9 ~]# conda activate CNTK
(CNTK) [root@rockylinux9 ~]# pip install cntk
keras2はTensorFlowに同封される. keras3はTensorFlow, Pytorch, jaxをバックエンドとして選べる
[root@rockylinux9 ~]# bash /Public/cuda/cuda_11.2.2_460.32.03_linux.run --override *cuda-11.2ライブラリを入れます。このバージョンが必要
[root@rockylinux9 ~]# ldconfig
[root@rockylinux9 ~]# source /apps/pyenv/versions/anaconda3-2024.06-1/etc/profile.d/conda.sh
[root@rockylinux9 ~]# conda create -n mxnet python=3.8 "numpy<1.20" cudnn=8 nccl=2 libcusolver=11 -c nvidia
[root@rockylinux9 ~]# conda activate mxnet
(mxnet) [root@rockylinux9 ~]# pip install mxnet-cu112
(mxnet) [root@rockylinux9 ~]# conda install pillow=8.0
(mxnet) [root@rockylinux9 ~]# conda list
cuda-nvrtc 12.6.20 0 nvidia
cuda-version 12.6 3 nvidia
cudnn cuda12_0
libcublas 0 nvidia
libcusolver 0 nvidia
libcusparse 0 nvidia
mxnet-cu112 1.9.1 pypi_0 pypi
nccl ha515578_0
python 3.8.19 h955ad1f_0
(mxnet) [root@rockylinux9 ~]#
テスト 参照先https://touch-sp.hatenablog.com/entry/2020/04/24/145406様
[illya@rockylinux9 ~]$ source /apps/pyenv/versions/anaconda3-2024.06-1/etc/profile.d/conda.sh
[illya@rockylinux9 ~]$ conda activate mxnet
(mxnet) [illya@rockylinux9 ~]$ python
import mxnet as mx
from mxnet import image, gluon
ctx = mx.gpu() if mx.context.num_gpus() >0 else mx.cpu()
img = image.imread('super_res_input.jpg').astype('float32')/255
img = mx.nd.transpose(img, (2,0,1))
net = gluon.SymbolBlock.imports("aran_c0_s1_x4-symbol.json",['data'], "aran_c0_s1_x4-0000.params")
output = net(img.expand_dims(0).as_in_context(ctx))
output = mx.nd.squeeze(output)
output = (mx.nd.transpose(output, (1,2,0))*255).astype('uint8')
from PIL import Image
img = Image.fromarray(output.asnumpy())
(mxnet) [illya@rockylinux9 ~]$