modelscope · Jintao-Huang · Feb 10, 2025 · Feb 9, 2025 · Feb 9, 2025 · Feb 9, 2025
diff --git a/README.md b/README.md
@@ -247,7 +247,7 @@ Here is a minimal example of training to deployment using ms-swift. For more det
 
 Supported Training Methods:
 
-| Method                             | Full-Parameter                                               | LoRA                                                         | QLoRA                                                        | Deepspeed                                                    | Multi-modal                                                  |
+| Method                             | Full-Parameter                                               | LoRA                                                         | QLoRA                                                        | Deepspeed                                                    | Multi-Modal                                                  |
 | ---------------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
 | Pre-training                       | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/pretrain/train.sh) | ✅                                                            | ✅                                                            | ✅                                                            | ✅                                                            |
 | Instruction Supervised Fine-tuning | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/full/train.sh) | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/lora_sft.sh) | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/qlora) | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/multi-gpu/deepspeed) | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/multimodal) |

diff --git a/docs/source/Customization/自定义数据集.md b/docs/source/Customization/自定义数据集.md
@@ -128,8 +128,8 @@ query-response格式：
 {"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "<image>帮我打开谷歌浏览器"}, {"role": "assistant", "content": "Action: click(start_box='<bbox>')"}], "images": ["/xxx/x.jpg"], "objects": {"ref": [], "bbox": [[615, 226]]}}
 ```
 该格式将自动转换数据集格式为对应模型的grounding任务格式，且选择对应模型的bbox归一化方式。该格式比通用格式多了objects字段，该字段包含的字段有：
- - ref：用于替换`<ref-object>`。
- - bbox：用于替换`<bbox>`。
+ - ref: 用于替换`<ref-object>`。
+ - bbox: 用于替换`<bbox>`。若bbox中每个box长度为2，则代表x和y坐标，若box长度为4，则代表2个点的x和y坐标。
  - bbox_type: 可选项为'real'，'norm1'。默认为'real'，即bbox为真实bbox值。若是'norm1'，则bbox已经归一化为0~1。
  - image_id: 该参数只有当bbox_type为'real'时生效。代表bbox对应的图片是第几张，用于缩放bbox。索引从0开始，默认全为第0张。
 

diff --git a/docs/source_en/Customization/Custom-dataset.md b/docs/source_en/Customization/Custom-dataset.md
@@ -138,7 +138,7 @@ When using this type of data, please note:
 The format will automatically convert the dataset format to the corresponding model's grounding task format and select the appropriate model's bbox normalization method. Compared to the general format, this format includes an additional "objects" field, which contains the following subfields:
 
 - ref: Used to replace `<ref-object>`.
-- bbox: Used to replace `<bbox>`.
+- bbox: Used to replace `<bbox>`. If the length of each box in the bbox is 2, it represents the x and y coordinates. If the box length is 4, it represents the x and y coordinates of two points.
 - bbox_type: Optional values are 'real' and 'norm1'. The default is 'real', meaning the bbox represents the actual bounding box value. If set to 'norm1', the bbox is normalized to the range 0~1.
 - image_id: This parameter is only effective when bbox_type is 'real'. It indicates the index of the image corresponding to the bbox, used for scaling the bbox. The index starts from 0, and the default is 0 for all.
 

diff --git a/swift/llm/argument/eval_args.py b/swift/llm/argument/eval_args.py
@@ -52,22 +52,29 @@ def __post_init__(self):
         logger.info(f'eval_output_dir: {self.eval_output_dir}')
 
     @staticmethod
-    def list_eval_dataset():
+    def list_eval_dataset(eval_backend=None):
         from evalscope.constants import EvalBackend
         from evalscope.benchmarks.benchmark import BENCHMARK_MAPPINGS
         from evalscope.backend.opencompass import OpenCompassBackendManager
-        from evalscope.backend.vlm_eval_kit import VLMEvalKitBackendManager
-        return {
+        res = {
             EvalBackend.NATIVE: list(BENCHMARK_MAPPINGS.keys()),
             EvalBackend.OPEN_COMPASS: OpenCompassBackendManager.list_datasets(),
-            EvalBackend.VLM_EVAL_KIT: VLMEvalKitBackendManager.list_supported_datasets()
         }
+        try:
+            from evalscope.backend.vlm_eval_kit import VLMEvalKitBackendManager
+            vlm_datasets = VLMEvalKitBackendManager.list_supported_datasets()
+            res[EvalBackend.VLM_EVAL_KIT] = vlm_datasets
+        except ImportError:
+            # fix cv2 import error
+            if eval_backend == 'VLMEvalKit':
+                raise
+        return res
 
     def _init_eval_dataset(self):
         if isinstance(self.eval_dataset, str):
             self.eval_dataset = [self.eval_dataset]
 
-        all_eval_dataset = self.list_eval_dataset()
+        all_eval_dataset = self.list_eval_dataset(self.eval_backend)
         dataset_mapping = {dataset.lower(): dataset for dataset in all_eval_dataset[self.eval_backend]}
         valid_dataset = []
         for dataset in self.eval_dataset:

diff --git a/swift/llm/eval/eval.py b/swift/llm/eval/eval.py
@@ -27,7 +27,7 @@ def run(self):
         deploy_context = nullcontext() if args.eval_url else run_deploy(args, return_url=True)
         with deploy_context as base_url:
             base_url = args.eval_url or base_url
-            url = os.path.join(base_url, 'chat/completions')
+            url = f"{base_url.rstrip('/')}/chat/completions"
 
             task_cfg = self.get_task_cfg(args.eval_dataset, args.eval_backend, url)
             result = self.get_task_result(task_cfg)

diff --git a/swift/llm/infer/infer_engine/infer_client.py b/swift/llm/infer/infer_engine/infer_client.py
@@ -70,7 +70,7 @@ def _get_request_kwargs(self) -> Dict[str, Any]:
         return request_kwargs
 
     async def get_model_list_async(self) -> ModelList:
-        url = os.path.join(self.base_url, 'models')
+        url = f"{self.base_url.rstrip('/')}/models"
         async with aiohttp.ClientSession() as session:
             async with session.get(url, **self._get_request_kwargs()) as resp:
                 resp_obj = await resp.json()
@@ -133,7 +133,7 @@ async def infer_async(
                 model = self.models[0]
             else:
                 raise ValueError(f'Please explicitly specify the model. Available models: {self.models}.')
-        url = os.path.join(self.base_url, 'chat/completions')
+        url = f"{self.base_url.rstrip('/')}/chat/completions"
 
         request_data = self._prepare_request_data(model, infer_request, request_config)
         if request_config.stream: