海光c86-4G AI推理K100

老卫同学

2025-08-27

milvus连接工具

docker run -d --name zttu-milvus --restart=always -p 8000:3000 -e MILVUS_URL=localhost:19530 zilliz/attu:v2.5 123456

海光相关

开发者社区

https://developer.sourcefind.cn/

论坛

https://forum.sourcefind.cn/

https://forum.sourcefind.cn/thread/855

去光源下载docker镜像

https://sourcefind.cn/#/image/dcu/vllm?activeName=overview

去model scope下载模型

https://www.modelscope.cn/organization/Qwen

启动容器

docker run -itd \
  --restart=unless-stopped \
  --network=host \
  --ipc=host \
  --name=vllm-vl \
  --shm-size=32G \
  --device=/dev/kfd \
  --device=/dev/mkfd \
  --device=/dev/dri \
  -v /opt/hyhal:/opt/hyhal \
  --group-add video \
  --cap-add=SYS_PTRACE \
  --security-opt seccomp=unconfined \
  -v /root/models:/workspace/models \
  image.sourcefind.cn:5000/dcu/admin/base/vllm:0.8.5-ubuntu22.04-dtk25.04.1-py3.10 \
  /bin/bash -c "HIP_VISIBLE_DEVICES=0,1 vllm serve \
  models/Qwen/Qwen2.5-VL-7B-Instruct \
  --trust-remote-code \
  --max-model-len 32768 \
  --max-num-batched-tokens 32768 \
  --enforce-eager \
  --tensor-parallel-size 2 \
  --gpu-memory-utilization 0.9 \
  --host 0.0.0.0 \
  --port 8001"

docker run -itd \
  --restart=unless-stopped \
  --network=host \
  --ipc=host \
  --name=vllm \
  --shm-size=32G \
  --device=/dev/kfd \
  --device=/dev/mkfd \
  --device=/dev/dri \
  -v /opt/hyhal:/opt/hyhal \
  --group-add video \
  --cap-add=SYS_PTRACE \
  --security-opt seccomp=unconfined \
  -v /root/models:/workspace/models \
  image.sourcefind.cn:5000/dcu/admin/base/vllm:0.8.5-ubuntu22.04-dtk25.04.1-py3.10 \
  /bin/bash -c "HIP_VISIBLE_DEVICES=2,3 vllm serve \
  models/Qwen/Qwen3-8B-MLX-bf16 \
  --trust-remote-code \
  --max-model-len 40960 \
  --max-num-batched-tokens 40960 \
  --enforce-eager \
  --tensor-parallel-size 2 \
  --gpu-memory-utilization 0.9 \
  --host 0.0.0.0 \
  --port 8002"
  
docker run -itd \
  --restart=unless-stopped \
  --network=host \
  --ipc=host \
  --name=vllm-embedding \
  --shm-size=1G \
  --device=/dev/kfd \
  --device=/dev/mkfd \
  --device=/dev/dri \
  -v /opt/hyhal:/opt/hyhal \
  --group-add video \
  --cap-add=SYS_PTRACE \
  --security-opt seccomp=unconfined \
  -v /root/models:/workspace/models \
  image.sourcefind.cn:5000/dcu/admin/base/vllm:0.8.5-ubuntu22.04-dtk25.04.1-py3.10 \
  /bin/bash -c "vllm serve \
  models/Qwen/Qwen3-Embedding-0.6B \
  --trust-remote-code \
  --max-model-len 16384 \
  --max-num-batched-tokens 16384 \
  --enforce-eager \
  --gpu-memory-utilization 0.5 \
  --host 0.0.0.0 \
  --port 8003"

source /opt/dtk-24.04.3/env.sh rocm-smi