海光c86-4G AI推理K100
milvus连接工具
docker run -d --name zttu-milvus --restart=always -p 8000:3000 -e MILVUS_URL=localhost:19530 zilliz/attu:v2.5 123456
海光相关
开发者社区
https://developer.sourcefind.cn/
论坛
https://forum.sourcefind.cn/
https://forum.sourcefind.cn/thread/855
去光源下载docker镜像
https://sourcefind.cn/#/image/dcu/vllm?activeName=overview
去model scope下载模型
https://www.modelscope.cn/organization/Qwen
启动容器
docker run -itd \
--restart=unless-stopped \
--network=host \
--ipc=host \
--name=vllm-vl \
--shm-size=32G \
--device=/dev/kfd \
--device=/dev/mkfd \
--device=/dev/dri \
-v /opt/hyhal:/opt/hyhal \
--group-add video \
--cap-add=SYS_PTRACE \
--security-opt seccomp=unconfined \
-v /root/models:/workspace/models \
image.sourcefind.cn:5000/dcu/admin/base/vllm:0.8.5-ubuntu22.04-dtk25.04.1-py3.10 \
/bin/bash -c "HIP_VISIBLE_DEVICES=0,1 vllm serve \
models/Qwen/Qwen2.5-VL-7B-Instruct \
--trust-remote-code \
--max-model-len 32768 \
--max-num-batched-tokens 32768 \
--enforce-eager \
--tensor-parallel-size 2 \
--gpu-memory-utilization 0.9 \
--host 0.0.0.0 \
--port 8001"
docker run -itd \
--restart=unless-stopped \
--network=host \
--ipc=host \
--name=vllm \
--shm-size=32G \
--device=/dev/kfd \
--device=/dev/mkfd \
--device=/dev/dri \
-v /opt/hyhal:/opt/hyhal \
--group-add video \
--cap-add=SYS_PTRACE \
--security-opt seccomp=unconfined \
-v /root/models:/workspace/models \
image.sourcefind.cn:5000/dcu/admin/base/vllm:0.8.5-ubuntu22.04-dtk25.04.1-py3.10 \
/bin/bash -c "HIP_VISIBLE_DEVICES=2,3 vllm serve \
models/Qwen/Qwen3-8B-MLX-bf16 \
--trust-remote-code \
--max-model-len 40960 \
--max-num-batched-tokens 40960 \
--enforce-eager \
--tensor-parallel-size 2 \
--gpu-memory-utilization 0.9 \
--host 0.0.0.0 \
--port 8002"
docker run -itd \
--restart=unless-stopped \
--network=host \
--ipc=host \
--name=vllm-embedding \
--shm-size=1G \
--device=/dev/kfd \
--device=/dev/mkfd \
--device=/dev/dri \
-v /opt/hyhal:/opt/hyhal \
--group-add video \
--cap-add=SYS_PTRACE \
--security-opt seccomp=unconfined \
-v /root/models:/workspace/models \
image.sourcefind.cn:5000/dcu/admin/base/vllm:0.8.5-ubuntu22.04-dtk25.04.1-py3.10 \
/bin/bash -c "vllm serve \
models/Qwen/Qwen3-Embedding-0.6B \
--trust-remote-code \
--max-model-len 16384 \
--max-num-batched-tokens 16384 \
--enforce-eager \
--gpu-memory-utilization 0.5 \
--host 0.0.0.0 \
--port 8003"
source /opt/dtk-24.04.3/env.sh rocm-smi