From 2334fc42aa5a69eeabec7e6c21be5b882966aa4c Mon Sep 17 00:00:00 2001 From: Akshay Tripathi <142379735+AkshayTripathiShorthillsAI@users.noreply.github.com> Date: Thu, 7 May 2026 23:51:19 -0700 Subject: [PATCH 1/3] [Fix] Qwen3-VL: set do_sample explicitly to avoid sampling param conflicts The generate_kwargs dict passed top_p, top_k, and temperature to model.generate() without setting do_sample. HuggingFace defaults do_sample to False, which silently ignores sampling parameters and falls back to greedy decoding, causing evaluation results to not reflect the configured sampling behavior. This commit derives do_sample from temperature (do_sample=True when temperature > 0) and only includes top_p, top_k, and temperature in generate_kwargs when sampling is enabled, avoiding conflicting-flag warnings in the greedy path. Users can override via kwargs. Closes #1275 --- vlmeval/vlm/qwen3_vl/model.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/vlmeval/vlm/qwen3_vl/model.py b/vlmeval/vlm/qwen3_vl/model.py index 7f224371f..0c26fd8d8 100644 --- a/vlmeval/vlm/qwen3_vl/model.py +++ b/vlmeval/vlm/qwen3_vl/model.py @@ -74,13 +74,19 @@ def __init__( self.temperature = temperature if self.total_pixels and self.total_pixels > 24576 * 32 * 32: print('The total number of video tokens might too large, resulting in an overly long input sequence.') + do_sample = kwargs.pop('do_sample', temperature is not None and temperature > 0) + self.generate_kwargs = dict( max_new_tokens=self.max_new_tokens, - top_p=top_p, - top_k=top_k, - temperature=temperature, repetition_penalty=repetition_penalty, + do_sample=do_sample, ) + if do_sample: + self.generate_kwargs.update( + top_p=top_p, + top_k=top_k, + temperature=temperature, + ) self.system_prompt = system_prompt self.verbose = verbose self.post_process = post_process From d8e95021228a9f140c12096483837fed8cf71a2f Mon Sep 17 00:00:00 2001 From: Akshay Tripathi <142379735+SHAI-Akshay-Tripathi@users.noreply.github.com> Date: Sat, 16 May 2026 19:35:00 +0530 Subject: [PATCH 2/3] Remove unused Polygon3 from requirements.txt Removed polygon3 dependency from requirements. --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index ef9c78bbc..27c8a2014 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,7 +38,6 @@ openpyxl pandas pdf2image>=1.17.0 pillow -polygon3>=3.0.9.1 portalocker protobuf pylatexenc==2.10 From 672428333bc5f474ff0b65c7a998f0ed0f0a888f Mon Sep 17 00:00:00 2001 From: Akshay Tripathi <142379735+SHAI-Akshay-Tripathi@users.noreply.github.com> Date: Sat, 16 May 2026 19:36:09 +0530 Subject: [PATCH 3/3] Remove unused Polygon3 from Ocrbench_v2 requirements Removed 'Polygon3' from the requirements. --- vlmeval/dataset/utils/Ocrbench_v2/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/vlmeval/dataset/utils/Ocrbench_v2/requirements.txt b/vlmeval/dataset/utils/Ocrbench_v2/requirements.txt index ecdff4394..801d02b3c 100644 --- a/vlmeval/dataset/utils/Ocrbench_v2/requirements.txt +++ b/vlmeval/dataset/utils/Ocrbench_v2/requirements.txt @@ -7,6 +7,5 @@ Levenshtein lxml nltk numpy -Polygon3 tqdm zss