Skip to content

daily_ete_test_3090

daily_ete_test_3090 #5

name: daily_ete_test_3090
on:
workflow_dispatch:
inputs:
repo_org:
required: false
description: 'Tested repository organization name. Default is InternLM'
type: string
default: 'InternLM/lmdeploy'
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
backend:
required: true
description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "["turbomind", "pytorch"]"'
type: string
default: "['turbomind', 'pytorch']"
model:
required: true
description: 'Set testcase module filter: llm, vllm. Default contains all models'
type: string
default: "['llm','mllm']"
function:
required: true
description: 'Set testcase function filter: chat, restful, pipeline. Default contains all functions'
type: string
default: '["pipeline", "restful", "chat"]'
offline_mode:
required: true
description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
type: boolean
default: false
regression_func:
required: true
description: 'regression functions'
type: string
default: "['quant', 'tools']"
schedule:
- cron: '00 16 * * 0-4'
env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
OUTPUT_FOLDER: cuda12.1_dist_${{ github.run_id }}
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.run_id }}
COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
FAIL_CONFIG: ${{ github.event_name == 'schedule' && github.run_attempt != 1 && '--lf --lfnf none' || '--lf'}}
TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy
OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
jobs:
linux-build:
if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py310]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda12.1
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Build
run: |
echo ${PYTHON_VERSION}
echo ${PLAT_NAME}
echo ${DOCKER_TAG}
echo ${OUTPUT_FOLDER}
echo ${GITHUB_RUN_ID}
# remove -it
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
retention-days: 1
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}
download_pkgs:
needs: linux-build
if: ${{!cancelled()}}
runs-on: [self-hosted, 3090-r1]
timeout-minutes: 50
container:
image: openmmlab/lmdeploy:latest
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/qa_test_models:/nvme/qa_test_models
- /data1:/data1
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Clone repository
uses: actions/checkout@v2
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Copy repository
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}} && mv ${{env.TEST_CODE_PATH}}/autotest/config-3090.yaml ${{env.TEST_CODE_PATH}}/autotest/config.yaml
- name: Copy repository - offline
if: ${{inputs.offline_mode}}
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}} && mv ${{env.TEST_CODE_PATH}}/autotest/config-3090.yaml ${{env.TEST_CODE_PATH}}/autotest/config.yaml
- name: Download Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
- name: Copy Artifacts
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
- name: Copy Artifacts - offline
if: ${{inputs.offline_mode}}
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
test_quantization:
needs: download_pkgs
if: ${{!cancelled() && contains(needs.download_pkgs.result, 'success') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'quant') )}}
runs-on: [self-hosted, 3090-r1]
timeout-minutes: 150
env:
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
container:
image: openmmlab/lmdeploy:latest
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/qa_test_models:/nvme/qa_test_models
- /data1:/data1
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Check env
run: |
pip uninstall -y nvidia-nccl-cu11
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
# remove tmp log in testcase
rm -rf /nvme/qa_test_models/autotest_model/log/*
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - quantization w4a16
continue-on-error: true
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'turbomind')
run: |
pytest autotest/tools/quantization/test_quantization_awq.py -m 'not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} --clean-alluredir ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - quantization w8a8
continue-on-error: true
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'pytorch')
run: |
pytest autotest/tools/quantization/test_quantization_w8a8.py --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Clear workfile
if: always()
run: |
chmod -R 777 $REPORT_DIR
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
test_tools:
if: ${{!cancelled() && !contains(needs.test_quantization.result, 'fail') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'tools'))}}
runs-on: [self-hosted, 3090-r1]
needs: test_quantization
timeout-minutes: 300
strategy:
fail-fast: false
matrix:
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
model: ${{ fromJSON(inputs.model || '["llm", "mllm"]')}}
function: ${{ fromJSON(inputs.function || '["pipeline","restful","chat"]')}}
exclude:
- backend: turbomind
model: mllm
function: chat
- backend: pytorch
model: mllm
function: chat
env:
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
container:
image: openmmlab/lmdeploy:latest
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/qa_test_models:/nvme/qa_test_models
- /data1:/data1
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Check env
run: |
pip uninstall -y nvidia-nccl-cu11
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
# remove tmp log in testcase
rm -rf /nvme/qa_test_models/autotest_model/log/*
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - chat
continue-on-error: true
if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') && matrix.model == 'llm' && matrix.function == 'chat'
run: |
pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
- name: Test lmdeploy - pipeline
continue-on-error: true
if: matrix.function == 'pipeline'
run: |
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
- name: Test lmdeploy - restful
continue-on-error: true
if: matrix.function == 'restful'
run: |
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
- name: Clear workfile
if: always()
run: |
chmod -R 777 $REPORT_DIR
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
test_restful:
if: ${{!cancelled() && !contains(needs.test_quantization.result, 'fail') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'restful'))}}
runs-on: [self-hosted, 3090-r1]
needs: test_quantization
strategy:
fail-fast: false
matrix:
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
timeout-minutes: 60
container:
image: openmmlab/lmdeploy:latest
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/qa_test_models:/nvme/qa_test_models
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: |
cp -r ${{env.TEST_CODE_PATH}}/. .
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Check env
run: |
pip uninstall -y nvidia-nccl-cu11
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
# remove tmp log in testcase
rm -rf /nvme/qa_test_models/autotest_model/log/*
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Start restful api turbomind
if: matrix.backend == 'turbomind'
run: |
lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm3-8b-instruct > restful.log 2>&1 &
echo "restful_pid=$!" >> "$GITHUB_ENV"
sleep 120s
- name: Start restful api pytorch
if: matrix.backend == 'pytorch'
run: |
lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm3-8b-instruct --backend pytorch > restful.log 2>&1 &
echo "restful_pid=$!" >> "$GITHUB_ENV"
sleep 180s
- name: Test lmdeploy - restful api
timeout-minutes: 75
run: |
pytest autotest/interface/restful/test_restful_chat_func.py -n 20 -m 'not not_${{matrix.backend}}' --alluredir=${{env.REPORT_DIR}}/interface-${{matrix.backend}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Kill api server
if: always()
run: |
kill -15 "$restful_pid"
- name: Start restful api turbomind - base
if: matrix.backend == 'turbomind'
run: |
lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm3-8b-instruct > restful.log 2>&1 &
echo "restful_pid=$!" >> "$GITHUB_ENV"
sleep 120s
- name: Start restful api pytorch - base
if: matrix.backend == 'pytorch'
run: |
lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm3-8b-instruct --backend pytorch > restful.log 2>&1 &
echo "restful_pid=$!" >> "$GITHUB_ENV"
sleep 180s
- name: Test lmdeploy - restful api - base
timeout-minutes: 40
run: |
pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 --alluredir=${{env.REPORT_DIR}}/interface-${{matrix.backend}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Kill api server
if: always()
run: |
kill -15 "$restful_pid"
- name: Clear workfile
if: always()
run: |
chmod -R 777 $REPORT_DIR
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir
get_coverage_report:
if: ${{!cancelled() && success()}}
runs-on: [self-hosted, 3090-r1]
needs: [test_tools, test_restful]
timeout-minutes: 5
container:
image: openmmlab/lmdeploy:latest
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/qa_test_models:/nvme/qa_test_models
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Copy repository and Artifacts
run: cp -r ${{env.TEST_CODE_PATH}}/. .
- name: Install lmdeploy
run: |
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Get coverage report
run: |
pip install coverage
coverage combine ${{env.REPORT_DIR}}
coverage xml -o ${{env.REPORT_DIR}}/coverage.xml
coverage report -m
mv .coverage ${{env.REPORT_DIR}}/.coverage
- name: Clear workfile
if: always()
run: |
chmod -R 777 $REPORT_DIR
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir