daily_ete_test_3090 #5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: daily_ete_test_3090 | |
on: | |
workflow_dispatch: | |
inputs: | |
repo_org: | |
required: false | |
description: 'Tested repository organization name. Default is InternLM' | |
type: string | |
default: 'InternLM/lmdeploy' | |
repo_ref: | |
required: false | |
description: 'Set branch or tag or commit id. Default is "main"' | |
type: string | |
default: 'main' | |
backend: | |
required: true | |
description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "["turbomind", "pytorch"]"' | |
type: string | |
default: "['turbomind', 'pytorch']" | |
model: | |
required: true | |
description: 'Set testcase module filter: llm, vllm. Default contains all models' | |
type: string | |
default: "['llm','mllm']" | |
function: | |
required: true | |
description: 'Set testcase function filter: chat, restful, pipeline. Default contains all functions' | |
type: string | |
default: '["pipeline", "restful", "chat"]' | |
offline_mode: | |
required: true | |
description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself' | |
type: boolean | |
default: false | |
regression_func: | |
required: true | |
description: 'regression functions' | |
type: string | |
default: "['quant', 'tools']" | |
schedule: | |
- cron: '00 16 * * 0-4' | |
env: | |
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache | |
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai | |
OUTPUT_FOLDER: cuda12.1_dist_${{ github.run_id }} | |
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true | |
REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.run_id }} | |
COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy | |
FAIL_CONFIG: ${{ github.event_name == 'schedule' && github.run_attempt != 1 && '--lf --lfnf none' || '--lf'}} | |
TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy | |
OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy | |
OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt | |
jobs: | |
linux-build: | |
if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}} | |
strategy: | |
matrix: | |
pyver: [py310] | |
runs-on: ubuntu-latest | |
env: | |
PYTHON_VERSION: ${{ matrix.pyver }} | |
PLAT_NAME: manylinux2014_x86_64 | |
DOCKER_TAG: cuda12.1 | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Build | |
run: | | |
echo ${PYTHON_VERSION} | |
echo ${PLAT_NAME} | |
echo ${DOCKER_TAG} | |
echo ${OUTPUT_FOLDER} | |
echo ${GITHUB_RUN_ID} | |
# remove -it | |
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh | |
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER} | |
- name: Upload Artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
if-no-files-found: error | |
path: builder/manywheel/${{ env.OUTPUT_FOLDER }} | |
retention-days: 1 | |
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }} | |
download_pkgs: | |
needs: linux-build | |
if: ${{!cancelled()}} | |
runs-on: [self-hosted, 3090-r1] | |
timeout-minutes: 50 | |
container: | |
image: openmmlab/lmdeploy:latest | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /data1:/data1 | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Clone repository | |
uses: actions/checkout@v2 | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Copy repository | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}} && mv ${{env.TEST_CODE_PATH}}/autotest/config-3090.yaml ${{env.TEST_CODE_PATH}}/autotest/config.yaml | |
- name: Copy repository - offline | |
if: ${{inputs.offline_mode}} | |
run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}} && mv ${{env.TEST_CODE_PATH}}/autotest/config-3090.yaml ${{env.TEST_CODE_PATH}}/autotest/config.yaml | |
- name: Download Artifacts | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
uses: actions/download-artifact@v4 | |
with: | |
name: my-artifact-${{ github.run_id }}-py310 | |
- name: Copy Artifacts | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}} | |
- name: Copy Artifacts - offline | |
if: ${{inputs.offline_mode}} | |
run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}} | |
test_quantization: | |
needs: download_pkgs | |
if: ${{!cancelled() && contains(needs.download_pkgs.result, 'success') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'quant') )}} | |
runs-on: [self-hosted, 3090-r1] | |
timeout-minutes: 150 | |
env: | |
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA | |
MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub | |
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules | |
container: | |
image: openmmlab/lmdeploy:latest | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /data1:/data1 | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Copy repository and Artifacts | |
run: | | |
cp -r ${{env.TEST_CODE_PATH}}/. . | |
- name: Install lmdeploy - dependency | |
run: | | |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}} | |
- name: Install lmdeploy | |
run: | | |
python3 -m pip install lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
- name: Check env | |
run: | | |
pip uninstall -y nvidia-nccl-cu11 | |
python3 -m pip list | |
lmdeploy check_env | |
rm -rf allure-results | |
# remove tmp log in testcase | |
rm -rf /nvme/qa_test_models/autotest_model/log/* | |
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p | |
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
- name: Test lmdeploy - quantization w4a16 | |
continue-on-error: true | |
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'turbomind') | |
run: | | |
pytest autotest/tools/quantization/test_quantization_awq.py -m 'not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} --clean-alluredir ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - quantization w8a8 | |
continue-on-error: true | |
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'pytorch') | |
run: | | |
pytest autotest/tools/quantization/test_quantization_w8a8.py --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Clear workfile | |
if: always() | |
run: | | |
chmod -R 777 $REPORT_DIR | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
test_tools: | |
if: ${{!cancelled() && !contains(needs.test_quantization.result, 'fail') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'tools'))}} | |
runs-on: [self-hosted, 3090-r1] | |
needs: test_quantization | |
timeout-minutes: 300 | |
strategy: | |
fail-fast: false | |
matrix: | |
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}} | |
model: ${{ fromJSON(inputs.model || '["llm", "mllm"]')}} | |
function: ${{ fromJSON(inputs.function || '["pipeline","restful","chat"]')}} | |
exclude: | |
- backend: turbomind | |
model: mllm | |
function: chat | |
- backend: pytorch | |
model: mllm | |
function: chat | |
env: | |
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA | |
MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub | |
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules | |
container: | |
image: openmmlab/lmdeploy:latest | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /data1:/data1 | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Copy repository and Artifacts | |
run: | | |
cp -r ${{env.TEST_CODE_PATH}}/. . | |
- name: Install lmdeploy - dependency | |
run: | | |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}} | |
- name: Install lmdeploy | |
run: | | |
python3 -m pip install lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
- name: Check env | |
run: | | |
pip uninstall -y nvidia-nccl-cu11 | |
python3 -m pip list | |
lmdeploy check_env | |
rm -rf allure-results | |
# remove tmp log in testcase | |
rm -rf /nvme/qa_test_models/autotest_model/log/* | |
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p | |
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
- name: Test lmdeploy - chat | |
continue-on-error: true | |
if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') && matrix.model == 'llm' && matrix.function == 'chat' | |
run: | | |
pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
- name: Test lmdeploy - pipeline | |
continue-on-error: true | |
if: matrix.function == 'pipeline' | |
run: | | |
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
- name: Test lmdeploy - restful | |
continue-on-error: true | |
if: matrix.function == 'restful' | |
run: | | |
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test and test_3090' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
- name: Clear workfile | |
if: always() | |
run: | | |
chmod -R 777 $REPORT_DIR | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
test_restful: | |
if: ${{!cancelled() && !contains(needs.test_quantization.result, 'fail') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'restful'))}} | |
runs-on: [self-hosted, 3090-r1] | |
needs: test_quantization | |
strategy: | |
fail-fast: false | |
matrix: | |
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}} | |
timeout-minutes: 60 | |
container: | |
image: openmmlab/lmdeploy:latest | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Copy repository and Artifacts | |
run: | | |
cp -r ${{env.TEST_CODE_PATH}}/. . | |
- name: Install lmdeploy - dependency | |
run: | | |
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}} | |
- name: Install lmdeploy | |
run: | | |
python3 -m pip install lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
- name: Check env | |
run: | | |
pip uninstall -y nvidia-nccl-cu11 | |
python3 -m pip list | |
lmdeploy check_env | |
rm -rf allure-results | |
# remove tmp log in testcase | |
rm -rf /nvme/qa_test_models/autotest_model/log/* | |
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p | |
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
- name: Start restful api turbomind | |
if: matrix.backend == 'turbomind' | |
run: | | |
lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm3-8b-instruct > restful.log 2>&1 & | |
echo "restful_pid=$!" >> "$GITHUB_ENV" | |
sleep 120s | |
- name: Start restful api pytorch | |
if: matrix.backend == 'pytorch' | |
run: | | |
lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm3-8b-instruct --backend pytorch > restful.log 2>&1 & | |
echo "restful_pid=$!" >> "$GITHUB_ENV" | |
sleep 180s | |
- name: Test lmdeploy - restful api | |
timeout-minutes: 75 | |
run: | | |
pytest autotest/interface/restful/test_restful_chat_func.py -n 20 -m 'not not_${{matrix.backend}}' --alluredir=${{env.REPORT_DIR}}/interface-${{matrix.backend}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Kill api server | |
if: always() | |
run: | | |
kill -15 "$restful_pid" | |
- name: Start restful api turbomind - base | |
if: matrix.backend == 'turbomind' | |
run: | | |
lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm3-8b-instruct > restful.log 2>&1 & | |
echo "restful_pid=$!" >> "$GITHUB_ENV" | |
sleep 120s | |
- name: Start restful api pytorch - base | |
if: matrix.backend == 'pytorch' | |
run: | | |
lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm3-8b-instruct --backend pytorch > restful.log 2>&1 & | |
echo "restful_pid=$!" >> "$GITHUB_ENV" | |
sleep 180s | |
- name: Test lmdeploy - restful api - base | |
timeout-minutes: 40 | |
run: | | |
pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 --alluredir=${{env.REPORT_DIR}}/interface-${{matrix.backend}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Kill api server | |
if: always() | |
run: | | |
kill -15 "$restful_pid" | |
- name: Clear workfile | |
if: always() | |
run: | | |
chmod -R 777 $REPORT_DIR | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
get_coverage_report: | |
if: ${{!cancelled() && success()}} | |
runs-on: [self-hosted, 3090-r1] | |
needs: [test_tools, test_restful] | |
timeout-minutes: 5 | |
container: | |
image: openmmlab/lmdeploy:latest | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Copy repository and Artifacts | |
run: cp -r ${{env.TEST_CODE_PATH}}/. . | |
- name: Install lmdeploy | |
run: | | |
python3 -m pip install lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
- name: Get coverage report | |
run: | | |
pip install coverage | |
coverage combine ${{env.REPORT_DIR}} | |
coverage xml -o ${{env.REPORT_DIR}}/coverage.xml | |
coverage report -m | |
mv .coverage ${{env.REPORT_DIR}}/.coverage | |
- name: Clear workfile | |
if: always() | |
run: | | |
chmod -R 777 $REPORT_DIR | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir |