diff --git a/compare_alpha_baselines.py b/compare_alpha_baselines.py
new file mode 100644
index 000000000..232da9f56
--- /dev/null
+++ b/compare_alpha_baselines.py
@@ -0,0 +1,210 @@
+"""
+Compare ALPHA20 vs ALPHA158 baseline backtest performance.
+
+This script runs backtests for both factor sets and compares key metrics:
+- 年化收益 (Annualized Return)
+- 最大回撤 (Maximum Drawdown)
+- 信息比率 (Information Ratio)
+- IC均值 (Mean IC)
+- ICIR (IC Information Ratio)
+
+Usage:
+    python compare_alpha_baselines.py
+
+Note: Requires Docker environment with qlib image prepared.
+"""
+
+from __future__ import annotations
+
+import sys
+from typing import Any
+
+import docker
+from rdagent.log import rdagent_logger as logger
+from rdagent.scenarios.qlib.developer.factor_runner import QlibFactorRunner
+from rdagent.scenarios.qlib.experiment.factor_experiment import QlibFactorExperiment
+from rdagent.utils.qlib import ALPHA20, ALPHA158
+
+BACKTEST_CONFIG = {
+    "train_start": "2024-01-01",
+    "train_end": "2024-12-31",
+    "valid_start": "2025-01-01",
+    "valid_end": "2025-06-30",
+    "test_start": "2025-07-01",
+    "test_end": "2026-03-30",
+    "market": "csi500",
+}
+
+
+def create_experiment(
+    factor_dict: dict[str, str],
+    name: str,
+) -> QlibFactorExperiment:
+    """
+    Create a QlibFactorExperiment with given factors.
+
+    Args:
+        factor_dict: Dictionary of factor names to expressions
+        name: Experiment name for logging
+
+    Returns:
+        QlibFactorExperiment instance ready for backtest
+    """
+    logger.info(f"Creating experiment: {name} with {len(factor_dict)} factors")
+
+    exp = QlibFactorExperiment()
+    exp.base_features = factor_dict.copy()
+
+    return exp
+
+
+def run_backtest(exp: QlibFactorExperiment) -> dict[str, Any]:
+    """
+    Run backtest for the experiment and extract metrics.
+
+    Args:
+        exp: QlibFactorExperiment instance
+
+    Returns:
+        Dictionary with backtest metrics
+    """
+    runner = QlibFactorRunner()
+
+    try:
+        result_exp = runner.develop(exp)
+        if result_exp.result is not None:
+            return extract_metrics(result_exp.result)
+        logger.error(f"Backtest failed: {result_exp.stdout}")
+    except (RuntimeError, ValueError, KeyError) as e:
+        logger.error(f"Exception during backtest: {e}")
+        return {"error": str(e)}
+    else:
+        return {"error": result_exp.stdout}
+
+
+def extract_metrics(result: Any) -> dict[str, Any]:
+    """
+    Extract key metrics from backtest result.
+
+    Args:
+        result: Backtest result (pandas Series or DataFrame)
+
+    Returns:
+        Dictionary with extracted metrics
+    """
+    metrics = {}
+
+    if hasattr(result, "index"):
+        for key in result.index:
+            if "annualized_return" in key.lower():
+                metrics["年化收益"] = result[key]
+            if "max_drawdown" in key.lower():
+                metrics["最大回撤"] = result[key]
+            if "information_ratio" in key.lower():
+                metrics["信息比率"] = result[key]
+            if key.lower() == "ic.mean" or "ic_mean" in key.lower():
+                metrics["IC均值"] = result[key]
+            if key.lower() == "ic.ir" or "icir" in key.lower():
+                metrics["ICIR"] = result[key]
+
+    return metrics
+
+
+def format_percentage(value: Any) -> str:
+    """Format value as percentage string."""
+    if value is None:
+        return "N/A"
+    try:
+        return f"{float(value) * 100:.2f}%"
+    except (TypeError, ValueError):
+        return str(value)
+
+
+def format_number(value: Any) -> str:
+    """Format value as number string."""
+    if value is None:
+        return "N/A"
+    try:
+        return f"{float(value):.2f}"
+    except (TypeError, ValueError):
+        return str(value)
+
+
+def print_comparison_table(
+    alpha20_results: dict[str, Any],
+    alpha158_results: dict[str, Any],
+) -> None:
+    """
+    Print comparison table for ALPHA20 vs ALPHA158.
+
+    Args:
+        alpha20_results: Metrics from ALPHA20 backtest
+        alpha158_results: Metrics from ALPHA158 backtest
+    """
+    print("\n" + "=" * 60)
+    print("ALPHA20 vs ALPHA158 Baseline Comparison")
+    print("=" * 60)
+
+    print(f"| {'Metric':<15} | {'ALPHA20':<12} | {'ALPHA158':<12} |")
+    print(f"|{'-' * 17}|{'-' * 14}|{'-' * 14}|")
+
+    metrics_order = ["年化收益", "最大回撤", "信息比率", "IC均值", "ICIR"]
+
+    for metric in metrics_order:
+        a20_val = alpha20_results.get(metric)
+        a158_val = alpha158_results.get(metric)
+
+        if metric in ["年化收益", "最大回撤"]:
+            a20_str = format_percentage(a20_val)
+            a158_str = format_percentage(a158_val)
+        else:
+            a20_str = format_number(a20_val)
+            a158_str = format_number(a158_val)
+
+        print(f"| {metric:<15} | {a20_str:<12} | {a158_str:<12} |")
+
+    print("=" * 60)
+    print(f"\nFactor Count: ALPHA20 = {len(ALPHA20)}, ALPHA158 = {len(ALPHA158)}")
+
+
+def main() -> None:
+    """Main entry point for baseline comparison."""
+    print("=" * 60)
+    print("ALPHA Baseline Comparison Script")
+    print("=" * 60)
+    print("\nConfiguration:")
+    print(f"  Train period: {BACKTEST_CONFIG['train_start']} ~ {BACKTEST_CONFIG['train_end']}")
+    print(f"  Valid period: {BACKTEST_CONFIG['valid_start']} ~ {BACKTEST_CONFIG['valid_end']}")
+    print(f"  Test period:  {BACKTEST_CONFIG['test_start']} ~ {BACKTEST_CONFIG['test_end']}")
+    print(f"  Market:       {BACKTEST_CONFIG['market']}")
+    print(f"\nALPHA20 factors: {len(ALPHA20)}")
+    print(f"ALPHA158 factors: {len(ALPHA158)}")
+
+    try:
+        client = docker.from_env()
+        client.ping()
+        print("\n✓ Docker connection successful")
+    except (docker.errors.DockerException, OSError) as e:
+        print(f"\n⚠ Docker not available: {e}")
+        print("  Backtests require Docker with qlib image")
+        print("  Run: docker build -t local_qlib:latest -f Dockerfile_qlib .")
+        sys.exit(1)
+
+    print("\n" + "-" * 60)
+    print("Creating experiments...")
+    alpha20_exp = create_experiment(ALPHA20, "ALPHA20")
+    alpha158_exp = create_experiment(ALPHA158, "ALPHA158")
+
+    print("\n" + "-" * 60)
+    print("Running ALPHA20 backtest...")
+    alpha20_results = run_backtest(alpha20_exp)
+
+    print("\n" + "-" * 60)
+    print("Running ALPHA158 backtest...")
+    alpha158_results = run_backtest(alpha158_exp)
+
+    print_comparison_table(alpha20_results, alpha158_results)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/rdagent/app/cli.py b/rdagent/app/cli.py
index 079371728..6b34c4720 100644
--- a/rdagent/app/cli.py
+++ b/rdagent/app/cli.py
@@ -82,12 +82,13 @@ def ds_user_interact(port=19900):
 @app.command(name="fin_factor")
 def fin_factor_cli(
     path: Optional[str] = None,
+    base_features_path: Optional[str] = None,
     step_n: Optional[int] = None,
     loop_n: Optional[int] = None,
     all_duration: Optional[str] = None,
     checkout: CheckoutOption = True,
 ):
-    fin_factor(path=path, step_n=step_n, loop_n=loop_n, all_duration=all_duration, checkout=checkout)
+    fin_factor(path=path, base_features_path=base_features_path, step_n=step_n, loop_n=loop_n, all_duration=all_duration, checkout=checkout)
 
 
 @app.command(name="fin_model")
diff --git a/rdagent/components/workflow/rd_loop.py b/rdagent/components/workflow/rd_loop.py
index f93c51f95..d66b612e7 100644
--- a/rdagent/components/workflow/rd_loop.py
+++ b/rdagent/components/workflow/rd_loop.py
@@ -124,7 +124,7 @@ def _interact_init_params(self) -> None:
             logger.info("Received user instruction response.")
             self.plan.update(res_dict)
 
-            if "feature_codes" not in self.plan:
+            if "feature_codes" in self.plan:
                 self.plan[
                     "user_instruction"
                 ] += f"\n\n{str(list(self.plan['feature_codes'].keys()))} has been configured as the base factor; do not generate duplicate factors."
diff --git a/rdagent/scenarios/qlib/developer/utils.py b/rdagent/scenarios/qlib/developer/utils.py
index cd4abef3b..36f84b59f 100644
--- a/rdagent/scenarios/qlib/developer/utils.py
+++ b/rdagent/scenarios/qlib/developer/utils.py
@@ -1,7 +1,6 @@
-from typing import List
+import re
 
 import pandas as pd
-
 from rdagent.components.coder.CoSTEER.evaluators import CoSTEERMultiFeedback
 from rdagent.components.coder.factor_coder.factor import FactorFBWorkspace, FactorTask
 from rdagent.core.conf import RD_AGENT_SETTINGS
@@ -11,6 +10,33 @@
 from rdagent.scenarios.qlib.experiment.factor_experiment import QlibFactorExperiment
 
 
+def _fix_groupby_rolling_pattern(code: str) -> str:
+    """
+    Fix pandas groupby().rolling() patterns that cause index duplication.
+
+    Converts: .groupby(level='instrument').rolling(window=N).mean()
+    To:       .groupby(level='instrument').transform(lambda x: x.rolling(window=N).mean())
+    """
+    # Pattern to match: groupby(...).rolling(...).{mean|sum|std|min|max}()
+    pattern = (
+        r"\.groupby\s*\(\s*level\s*=\s*['\"]instrument['\"]\s*\)"
+        r"\s*\.\s*rolling\s*\(\s*window\s*=\s*(\d+)\s*\)"
+        r"\s*\.\s*(mean|sum|std|min|max)\s*\(\s*\)"
+    )
+
+    def replace_func(match: re.Match[str]) -> str:
+        window = match.group(1)
+        operation = match.group(2)
+        return f".groupby(level='instrument').transform(lambda x: x.rolling(window={window}).{operation}())"
+
+    fixed_code = re.sub(pattern, replace_func, code)
+
+    if fixed_code != code:
+        logger.info("Auto-fixed groupby().rolling() pattern to use transform()")
+
+    return fixed_code
+
+
 def _build_base_feature_workspaces(exp: QlibFactorExperiment) -> list[FactorFBWorkspace]:
     workspaces: list[FactorFBWorkspace] = []
     for file_name, code in exp.base_feature_codes.items():
@@ -19,9 +45,10 @@ def _build_base_feature_workspaces(exp: QlibFactorExperiment) -> list[FactorFBWo
                 factor_name=file_name,
                 factor_description=f"Base feature from {file_name}",
                 factor_formulation="",
-            )
+            ),
         )
-        workspace.inject_files(**{"factor.py": code})
+        fixed_code = _fix_groupby_rolling_pattern(code)
+        workspace.inject_files(**{"factor.py": fixed_code})
         workspaces.append(workspace)
     return workspaces
 
@@ -54,13 +81,13 @@ def _resolve_index_level_values(df: pd.DataFrame, level_name: str) -> pd.Index |
     if all(first_values.equals(values) for values in candidate_values[1:]):
         logger.warning(
             f"Factor dataframe has duplicated '{level_name}' index levels at positions {matching_levels}; "
-            "their values are identical, so the first one is used."
+            "their values are identical, so the first one is used.",
         )
         return first_values
 
     logger.warning(
         f"Skip factor dataframe because index has ambiguous duplicated '{level_name}' levels at positions "
-        f"{matching_levels}. index names={list(df.index.names)}"
+        f"{matching_levels}. index names={list(df.index.names)}",
     )
     return None
 
@@ -128,7 +155,7 @@ def _process_message_and_df(
     return error_message
 
 
-def process_factor_data(exp_or_list: List[QlibFactorExperiment] | QlibFactorExperiment) -> pd.DataFrame:
+def process_factor_data(exp_or_list: list[QlibFactorExperiment] | QlibFactorExperiment) -> pd.DataFrame:
     """
     Process and combine factor data from experiment implementations.
 
@@ -165,13 +192,13 @@ def process_factor_data(exp_or_list: List[QlibFactorExperiment] | QlibFactorExpe
         except Exception as concat_error:
             concat_index_info = " | ".join([f"df#{i}: {_format_index_info(df)}" for i, df in enumerate(factor_dfs)])
             logger.warning(
-                f"Failed to concat factor data due to index misalignment. concat_error={concat_error}; collected_index_info={concat_index_info}"
+                f"Failed to concat factor data due to index misalignment. concat_error={concat_error}; collected_index_info={concat_index_info}",
             )
             raise FactorEmptyError(
                 "Failed to concat factor data due to index misalignment or incompatible index structure. "
-                f"concat_error={concat_error}; collected_index_info={concat_index_info}; details={error_message}"
+                f"concat_error={concat_error}; collected_index_info={concat_index_info}; details={error_message}",
             ) from concat_error
     else:
         raise FactorEmptyError(
-            f"No valid factor data found to merge (in process_factor_data) because of {error_message}."
+            f"No valid factor data found to merge (in process_factor_data) because of {error_message}.",
         )
diff --git a/rdagent/scenarios/qlib/experiment/prompts.yaml b/rdagent/scenarios/qlib/experiment/prompts.yaml
index ae173a77b..8bc34b762 100644
--- a/rdagent/scenarios/qlib/experiment/prompts.yaml
+++ b/rdagent/scenarios/qlib/experiment/prompts.yaml
@@ -31,6 +31,40 @@ qlib_factor_interface: |-
   Your python code should follow the interface to better interact with the user's system.
   Your python code should contain the following part: the import part, the function part, and the main part. You should write a main function name: "calculate_{function_name}" and call this function in "if __name__ == __main__" part. Don't write any try-except block in your python code. The user will catch the exception message and provide the feedback to you.
   User will write your python code into a python file and execute the file directly with "python {your_file_name}.py". You should calculate the factor values and save the result into a HDF5(H5) file named "result.h5" in the same directory as your python file. The result file is a HDF5(H5) file containing a pandas dataframe. The index of the dataframe is the "datetime" and "instrument", and the single column name is the factor name,and the value is the factor value. The result file should be saved in the same directory as your python file.
+  
+  **CRITICAL: Pandas MultiIndex groupby().rolling() Pattern**
+  
+  When working with MultiIndexed Series (index: ['datetime', 'instrument']), you MUST use the correct pattern for rolling operations:
+  
+  ❌ WRONG - This causes "ValueError: The name instrument occurs multiple times":
+  ```python
+  # DO NOT use this pattern - it creates a 3-level index with duplicate 'instrument'
+  ma_20 = volume.groupby(level='instrument').rolling(window=20).mean()
+  result = volume / ma_20  # FAILS!
+  ```
+  
+  ✅ CORRECT - Use transform() to preserve the 2-level index structure:
+  ```python
+  # CORRECT: Use transform() with lambda to preserve index structure
+  ma_20 = volume.groupby(level='instrument').transform(lambda x: x.rolling(window=20).mean())
+  result = volume / ma_20  # Works correctly!
+  ```
+  
+  The key difference:
+  - `groupby().rolling()` returns a Series with extra index level: ['instrument', 'datetime', 'instrument']
+  - `groupby().transform(lambda x: x.rolling().mean())` preserves original index: ['datetime', 'instrument']
+  
+  For other rolling operations (sum, std, min, max, etc.), always use transform():
+  ```python
+  # Rolling standard deviation
+  rolling_std = series.groupby(level='instrument').transform(lambda x: x.rolling(window=20).std())
+  
+  # Rolling sum
+  rolling_sum = series.groupby(level='instrument').transform(lambda x: x.rolling(window=10).sum())
+  
+  # Rolling min/max
+  rolling_min = series.groupby(level='instrument').transform(lambda x: x.rolling(window=5).min())
+  ```
 
 qlib_factor_strategy: |-
   Ensure that for every step of data processing, the data format (including indexes) is clearly explained through comments.
diff --git a/rdagent/scenarios/qlib/prompts.yaml b/rdagent/scenarios/qlib/prompts.yaml
index b1a147af3..2165a9988 100644
--- a/rdagent/scenarios/qlib/prompts.yaml
+++ b/rdagent/scenarios/qlib/prompts.yaml
@@ -93,10 +93,11 @@ model_hypothesis_specification: |-
   8. Use standard libraries for baseline models, but also explore custom architecture designs to investigate novel structures. After sufficient trials with traditional models, aim for innovation comparable to top-tier AI conferences (NeurIPS, ICLR, ICML, SIGKDD, etc.) in time series modeling.
 
 factor_hypothesis_specification: |-
-  1. **1-5 Factors per Generation:**
-    - Ensure each generation produces 1-5 factors.
-    - Balance simplicity and complexity to build a robust factor library.
-    - Make full use of the financial data provided to you instead of focusing solely on a specific field.
+  1. **Quality Over Quantity (1-3 Factors per Generation):**
+    - Generate 1-3 HIGH-QUALITY factors, not 5 random ones.
+    - Historical validation: 29 quality factors beat 158 quantity factors.
+    - Each factor must have clear economic intuition and be distinct from baseline.
+    - Justify why each factor is expected to improve performance.
   2. **Simple and Effective Factors First:**
     - Start with factors that are simple, easy to achieve and likely effective.
     - Concisely explain why these factors are expected to work.
@@ -111,6 +112,36 @@ factor_hypothesis_specification: |-
     - Highlight that factors surpassing SOTA are included in the library to avoid re-implementation.
     - No matter how many factors you plan to generate, only reply with one set of hypothesis and reason. The hypothesis can include the proposal of multiple factors at the same time.
 
+baseline_context: |-
+  **Current Baseline (29 Factors):**
+  
+  **ALPHA20 Covers (20 built-in factors):**
+  - Correlation: CORR5, CORR10, CORR20, CORR60 (price-volume correlation)
+  - Correlation: CORD5, CORD10, CORD60 (return-volume correlation)
+  - Residuals: RESI5, RESI10 (regression residuals)
+  - Volatility: STD5, WVMA5, WVMA60, VSTD5 (price/volume volatility)
+  - Momentum: ROC60 (price momentum)
+  - Trend Strength: RSQR5, RSQR10, RSQR20, RSQR60 (R-squared)
+  - Price Patterns: KLEN, KLOW (candlestick patterns)
+  
+  **Custom 9 Factors (from Selected Baseline):**
+  - Net_Volume_Flow_20d: 20-day signed volume flow ratio (volume direction)
+  - Return_Kurtosis_20D: 20-day return distribution kurtosis (tail risk)
+  - Return_Skewness_20D: 20-day return distribution skewness (asymmetry)
+  - Return_ZScore_20D: 20-day return normalization (standardized returns)
+  - Return_Sign_Autocorrelation_20D: 20-day sign persistence (momentum quality)
+  - Risk_Adjusted_Momentum_20D: 20-day momentum/volatility ratio (risk-adjusted)
+  - Volatility_20D: 20-day volatility (price variability)
+  - Volume_Momentum_20D: 20-day volume momentum (volume trends)
+  - Volume_ZScore_20D: 20-day volume normalization (standardized volume)
+  
+  **Direction Guidance:**
+  - ✅ UNDEREXPLORED: liquidity risk, order flow imbalances, sentiment proxies, tail risk measures
+  - ⚠️ PARTIALLY COVERED: risk-adjusted returns, volume-price correlation, distribution moments
+  - ❌ SATURATED: simple moving averages, basic momentum, raw volatility
+  
+  **IMPORTANT: Avoid duplicating these directions. Focus on NEW dimensions that complement the baseline.**
+
 factor_experiment_output_format: |-
   The output should follow JSON format. The schema is as follows:
   {
diff --git a/rdagent/scenarios/qlib/proposal/factor_proposal.py b/rdagent/scenarios/qlib/proposal/factor_proposal.py
index 5ce9b70de..e19e79e71 100644
--- a/rdagent/scenarios/qlib/proposal/factor_proposal.py
+++ b/rdagent/scenarios/qlib/proposal/factor_proposal.py
@@ -1,5 +1,4 @@
 import json
-from typing import List, Tuple
 
 from rdagent.components.coder.factor_coder.factor import FactorExperiment, FactorTask
 from rdagent.components.proposal import FactorHypothesis2Experiment, FactorHypothesisGen
@@ -12,11 +11,61 @@
 QlibFactorHypothesis = Hypothesis
 
 
+def _generate_dynamic_rag(trace: Trace) -> str:
+    """Generate RAG advice based on exploration history."""
+    direction_keywords = {
+        "momentum": ["momentum", "roc", "return", "price change"],
+        "volatility": ["volatility", "std", "variance", "risk"],
+        "volume": ["volume", "vwap", "turnover", "flow"],
+        "correlation": ["correlation", "corr", "cord"],
+        "distribution": ["kurtosis", "skewness", "zscore", "distribution"],
+        "ml": ["machine learning", "neural", "transform", "predict"],
+        "liquidity": ["liquidity", "bid", "ask", "spread"],
+        "sentiment": ["sentiment", "news", "analyst", "emotion"],
+        "tail_risk": ["tail", "extreme", "var", "drawdown"],
+    }
+
+    if len(trace.hist) == 0:
+        return (
+            "Start with high-quality factors from underexplored directions: "
+            "liquidity risk, order flow imbalances, sentiment proxies, tail risk measures. "
+            "Avoid duplicating existing baseline factors."
+        )
+
+    explored = set()
+    for exp, _ in trace.hist:
+        if hasattr(exp, "hypothesis") and hasattr(exp.hypothesis, "hypothesis"):
+            hypothesis_text = exp.hypothesis.hypothesis.lower()
+            for direction, keywords in direction_keywords.items():
+                if any(kw in hypothesis_text for kw in keywords):
+                    explored.add(direction)
+
+    underexplored = ["liquidity", "sentiment", "tail_risk", "order_flow"]
+    recommendations = [d for d in underexplored if d not in explored]
+
+    if len(trace.hist) < 5:
+        return (
+            "Focus on simple, interpretable factors. "
+            "Start with underexplored directions: liquidity risk, order flow imbalances."
+        )
+
+    if recommendations:
+        return (
+            f"Focus on underexplored directions: {', '.join(recommendations[:3])}. "
+            "These have high potential for alpha generation."
+        )
+
+    return (
+        "Consider machine learning-based factors or enhancing existing directions "
+        "with better normalization techniques."
+    )
+
+
 class QlibFactorHypothesisGen(FactorHypothesisGen):
-    def __init__(self, scen: Scenario) -> Tuple[dict, bool]:
+    def __init__(self, scen: Scenario) -> tuple[dict, bool]:
         super().__init__(scen)
 
-    def prepare_context(self, trace: Trace) -> Tuple[dict, bool]:
+    def prepare_context(self, trace: Trace) -> tuple[dict, bool]:
         hypothesis_and_feedback = (
             T("scenarios.qlib.prompts:hypothesis_and_feedback").r(
                 trace=trace,
@@ -26,7 +75,7 @@ def prepare_context(self, trace: Trace) -> Tuple[dict, bool]:
         )
         last_hypothesis_and_feedback = (
             T("scenarios.qlib.prompts:last_hypothesis_and_feedback").r(
-                experiment=trace.hist[-1][0], feedback=trace.hist[-1][1]
+                experiment=trace.hist[-1][0], feedback=trace.hist[-1][1],
             )
             if len(trace.hist) > 0
             else "No previous hypothesis and feedback available since it's the first round."
@@ -35,11 +84,8 @@ def prepare_context(self, trace: Trace) -> Tuple[dict, bool]:
         context_dict = {
             "hypothesis_and_feedback": hypothesis_and_feedback,
             "last_hypothesis_and_feedback": last_hypothesis_and_feedback,
-            "RAG": (
-                "Try the easiest and fastest factors to experiment with from various perspectives first."
-                if len(trace.hist) < 15
-                else "Now, you need to try factors that can achieve high IC (e.g., machine learning-based factors)."
-            ),
+            "baseline_context": T("scenarios.qlib.prompts:baseline_context").r(),
+            "RAG": _generate_dynamic_rag(trace),
             "hypothesis_output_format": T("scenarios.qlib.prompts:factor_hypothesis_output_format").r(),
             "hypothesis_specification": T("scenarios.qlib.prompts:factor_hypothesis_specification").r(),
         }
@@ -59,7 +105,7 @@ def convert_response(self, response: str) -> Hypothesis:
 
 
 class QlibFactorHypothesis2Experiment(FactorHypothesis2Experiment):
-    def prepare_context(self, hypothesis: Hypothesis, trace: Trace) -> Tuple[dict | bool]:
+    def prepare_context(self, hypothesis: Hypothesis, trace: Trace) -> tuple[dict | bool]:
         if isinstance(trace.scen, QlibQuantScenario):
             scenario = trace.scen.get_scenario_all_desc(action="factor")
         else:
@@ -105,7 +151,7 @@ def convert_response(self, response: str, hypothesis: Hypothesis, trace: Trace)
                     factor_description=description,
                     factor_formulation=formulation,
                     variables=variables,
-                )
+                ),
             )
 
         exp = QlibFactorExperiment(tasks, hypothesis=hypothesis)