Przeglądaj źródła

训练方法保留 SFT、DPO、PPO,PEFT 方法保留 lora、qlora、adalora

lxylxy123321 5 dni temu
rodzic
commit
9a25fae9d2

+ 36 - 44
backend/app/engines/text_engine.py

@@ -49,7 +49,7 @@ class TextEngine(BaseEngine):
     async def load_model(self, model_id: str, **kwargs: Any) -> None:
         """下载并加载基础模型。GPU 加载超时直接报错。"""
         import torch
-        from transformers import AutoModelForCausalLM, AutoTokenizer
+        from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 
         # 远程节点不查数据库,直接扫描本地模型目录
         local_path = str(settings.models_dir / model_id.replace("/", "_"))
@@ -91,12 +91,16 @@ class TextEngine(BaseEngine):
             "attn_implementation": "sdpa",
         }
         if quantization == "4bit" or quantization == "qlora":
-            load_kwargs["load_in_4bit"] = True
-            load_kwargs["bnb_4bit_quant_type"] = "nf4"
-            load_kwargs["bnb_4bit_use_double_quant"] = True
-            load_kwargs["bnb_4bit_compute_dtype"] = torch.float16
+            load_kwargs["quantization_config"] = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_quant_type="nf4",
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_compute_dtype=torch.float16,
+            )
         elif quantization == "8bit":
-            load_kwargs["load_in_8bit"] = True
+            load_kwargs["quantization_config"] = BitsAndBytesConfig(
+                load_in_8bit=True,
+            )
 
         self._tokenizer = AutoTokenizer.from_pretrained(local_path, trust_remote_code=True)
         if self._tokenizer.pad_token is None:
@@ -133,18 +137,14 @@ class TextEngine(BaseEngine):
         """根据 PEFT 方法返回对应的配置对象。"""
         from app.peft import (
             build_adalora_config,
-            build_ia3_config,
             build_lora_config,
-            build_prefix_tuning_config,
             build_qlora_config,
         )
 
         builders = {
             "lora": build_lora_config,
             "qlora": build_qlora_config,
-            "ia3": build_ia3_config,
             "adalora": build_adalora_config,
-            "prefix_tuning": build_prefix_tuning_config,
         }
         builder = builders.get(method, build_lora_config)
         return builder(params)
@@ -225,7 +225,7 @@ class TextEngine(BaseEngine):
                 data_collator=DataCollatorForSeq2Seq(self._tokenizer),
                 callbacks=all_callbacks,
             )
-        else:
+        elif task_type == "dpo":
             from trl import DPOConfig, DPOTrainer
 
             base_trainer_kwargs = dict(
@@ -243,39 +243,31 @@ class TextEngine(BaseEngine):
                 dataloader_pin_memory=False,
             )
 
-            if task_type == "dpo":
-                trainer = DPOTrainer(
-                    model=self._model,
-                    args=DPOConfig(**base_trainer_kwargs),
-                    train_dataset=dataset,
-                    processing_class=self._tokenizer,
-                )
-            elif task_type == "orpo":
-                from trl import ORPOConfig, ORPOTrainer
-
-                trainer = ORPOTrainer(
-                    model=self._model,
-                    args=ORPOConfig(**base_trainer_kwargs),
-                    train_dataset=dataset,
-                    processing_class=self._tokenizer,
-                )
-            elif task_type == "kto":
-                from trl import KTOConfig, KTOTrainer
-
-                trainer = KTOTrainer(
-                    model=self._model,
-                    args=KTOConfig(**base_trainer_kwargs),
-                    train_dataset=dataset,
-                    processing_class=self._tokenizer,
-                )
-            else:
-                trainer = Trainer(
-                    model=self._model,
-                    args=tr_args,
-                    train_dataset=dataset,
-                    data_collator=DataCollatorForSeq2Seq(self._tokenizer),
-                    callbacks=all_callbacks,
-                )
+            trainer = DPOTrainer(
+                model=self._model,
+                args=DPOConfig(**base_trainer_kwargs),
+                train_dataset=dataset,
+                processing_class=self._tokenizer,
+            )
+        elif task_type == "ppo":
+            from transformers import Trainer
+
+            logger.warning(
+                "PPO mode: falling back to SFT Trainer. "
+                "PPO requires a dedicated reward model setup. "
+                "Current implementation trains as supervised fine-tuning."
+            )
+            trainer = Trainer(
+                model=self._model,
+                args=tr_args,
+                train_dataset=dataset,
+                data_collator=DataCollatorForSeq2Seq(self._tokenizer),
+                callbacks=all_callbacks,
+            )
+        else:
+            from transformers import Trainer
+
+            raise ValueError(f"Unsupported task_type: {task_type}. Supported: sft, dpo, ppo")
 
         try:
             trainer.train()

+ 10 - 34
backend/app/peft/__init__.py

@@ -22,14 +22,16 @@ def build_lora_config(params: dict[str, Any]):
 def build_qlora_config(params: dict[str, Any]):
     """返回 (bitsandbytes_config, peft.LoraConfig) 二元组。"""
     from peft import LoraConfig, TaskType
-
-    bnb_params = {
-        "load_in_4bit": params.get("qlora_bits", 4) == 4,
-        "load_in_8bit": params.get("qlora_bits", 4) == 8,
-        "bnb_4bit_quant_type": params.get("qlora_type", "nf4"),
-        "bnb_4bit_use_double_quant": params.get("qlora_double_quant", True),
-        "bnb_4bit_compute_dtype": "float16",
-    }
+    from transformers import BitsAndBytesConfig
+    import torch
+
+    bnb_params = BitsAndBytesConfig(
+        load_in_4bit=params.get("qlora_bits", 4) == 4,
+        load_in_8bit=params.get("qlora_bits", 4) == 8,
+        bnb_4bit_quant_type=params.get("qlora_type", "nf4"),
+        bnb_4bit_use_double_quant=params.get("qlora_double_quant", True),
+        bnb_4bit_compute_dtype=torch.float16,
+    )
 
     target_modules = params.get("lora_target_modules", "all-linear")
     if isinstance(target_modules, str) and target_modules == "all-linear":
@@ -46,20 +48,6 @@ def build_qlora_config(params: dict[str, Any]):
     return bnb_params, lora_cfg
 
 
-def build_ia3_config(params: dict[str, Any]):
-    """返回实际的 peft.IA3Config 对象。"""
-    from peft import IA3Config, TaskType
-
-    target_modules = params.get("ia3_target_modules", "all-linear")
-    if isinstance(target_modules, str) and target_modules == "all-linear":
-        target_modules = ["k_proj", "v_proj", "ffn"]
-
-    return IA3Config(
-        target_modules=target_modules,
-        task_type=TaskType.CAUSAL_LM,
-    )
-
-
 def build_adalora_config(params: dict[str, Any]):
     """返回实际的 peft.AdaLoraConfig 对象。"""
     from peft import AdaLoraConfig, TaskType
@@ -71,15 +59,3 @@ def build_adalora_config(params: dict[str, Any]):
         beta2=params.get("adalora_beta2", 0.85),
         task_type=TaskType.CAUSAL_LM,
     )
-
-
-def build_prefix_tuning_config(params: dict[str, Any]):
-    """返回实际的 peft.PromptTuningConfig 对象。"""
-    from peft import PromptTuningConfig, PromptTuningInit, TaskType
-
-    return PromptTuningConfig(
-        num_virtual_tokens=params.get("prefix_num_virtual_tokens", 20),
-        prompt_tuning_init=PromptTuningInit.TEXT,
-        prompt_tuning_init_text="Classify the following text: ",
-        task_type=TaskType.CAUSAL_LM,
-    )

+ 0 - 40
backend/app/preprocessors/__init__.py

@@ -116,33 +116,6 @@ def apply_dpo_template(item: dict) -> dict:
     }
 
 
-def apply_kto_template(item: dict) -> dict:
-    """KTO 模板: prompt + completion + label。"""
-    return {
-        "prompt": item.get("prompt", item.get("input", item.get("question", item.get("query", "")))),
-        "completion": item.get("completion", item.get("output", item.get("answer", item.get("response", "")))),
-        "label": item.get("label", True),
-    }
-
-
-def apply_orpo_template(item: dict) -> dict:
-    """ORPO 模板: prompt + chosen + rejected (类似 DPO)。"""
-    return {
-        "prompt": item.get("prompt", item.get("input", item.get("question", item.get("query", "")))),
-        "chosen": item.get("chosen", item.get("positive", item.get("answer", ""))),
-        "rejected": item.get("rejected", item.get("negative", "")),
-    }
-
-
-def apply_rm_template(item: dict) -> dict:
-    """Reward Modeling 模板: prompt + chosen + rejected。"""
-    return {
-        "prompt": item.get("prompt", item.get("input", item.get("question", item.get("query", "")))),
-        "chosen": item.get("chosen", item.get("positive", item.get("answer", ""))),
-        "rejected": item.get("rejected", item.get("negative", "")),
-    }
-
-
 TEMPLATE_MAP = {
     "sft": {
         "auto": None,  # 特殊处理:自动检测
@@ -156,19 +129,6 @@ TEMPLATE_MAP = {
         "sharegpt": apply_dpo_template,
         "raw": apply_dpo_template,
     },
-    "kto": {
-        "auto": apply_kto_template,
-        "raw": apply_kto_template,
-    },
-    "orpo": {
-        "auto": apply_orpo_template,
-        "alpaca": apply_orpo_template,
-        "raw": apply_orpo_template,
-    },
-    "rm": {
-        "auto": apply_rm_template,
-        "raw": apply_rm_template,
-    },
     "ppo": {
         "auto": apply_raw_template,
         "raw": apply_raw_template,

+ 0 - 2
backend/app/schemas/common.py

@@ -26,9 +26,7 @@ class ModelType(str, Enum):
 class PeftMethod(str, Enum):
     LORA = "lora"
     QLORA = "qlora"
-    IA3 = "ia3"
     ADALORA = "adalora"
-    PREFIX_TUNING = "prefix_tuning"
 
 
 from typing import Generic, Optional, TypeVar

+ 1 - 1
backend/app/services/model_service.py

@@ -107,7 +107,7 @@ async def _execute_model_download(task_id: str, model_id: str, use_modelscope: b
         config_path = Path(local_path) / "config.json"
         model_type = "text"
         context_length = 2048
-        peft_methods = "lora,qlora,ia3,adalora,prefix_tuning"
+        peft_methods = "lora,qlora,adalora"
 
         if config_path.exists():
             with open(config_path) as f:

+ 1 - 4
frontend/src/pages/Training.tsx

@@ -12,16 +12,13 @@ const MODEL_TYPES = [
 const PEFT_METHODS = [
   { value: 'lora', label: 'LoRA' },
   { value: 'qlora', label: 'QLoRA (推荐)' },
-  { value: 'ia3', label: 'IA3' },
   { value: 'adalora', label: 'AdaLoRA' },
-  { value: 'prefix_tuning', label: 'Prefix Tuning' },
 ]
 
 const TASK_TYPES = [
   { value: 'sft', label: 'SFT (监督微调)' },
   { value: 'dpo', label: 'DPO (直接偏好优化)' },
-  { value: 'orpo', label: 'ORPO (比值偏好优化)' },
-  { value: 'kto', label: 'KTO (Kahneman-Tversky)' },
+  { value: 'ppo', label: 'PPO (近端策略优化)' },
 ]
 
 const DATASET_TEMPLATES = [

+ 123 - 38
result.txt

@@ -1,38 +1,123 @@
-(base) [root@localhost ~]# mx-smi
-mx-smi  version: 2.2.9
-
-=================== MetaX System Management Interface Log ===================
-Timestamp                                         : Fri May 22 03:09:03 2026
-
-Attached GPUs                                     : 4
-+---------------------------------------------------------------------------------+
-| MX-SMI 2.2.9                       Kernel Mode Driver Version: 3.4.4            |
-| MACA Version: 3.3.0.15             BIOS Version: 1.30.0.0                       |
-|------------------+-----------------+---------------------+----------------------|
-| Board       Name | GPU   Persist-M | Bus-id              | GPU-Util      sGPU-M |
-| Pwr:Usage/Cap    | Temp       Perf | Memory-Usage        | GPU-State            |
-|==================+=================+=====================+======================|
-| 0     MetaX N260 | 0           Off | 0000:b5:00.0        | 0%          Disabled |
-| 53W / 225W       | 43C          P9 | 60459/65536 MiB     | Available            |
-+------------------+-----------------+---------------------+----------------------+
-| 1     MetaX N260 | 1           Off | 0000:b6:00.0        | 0%          Disabled |
-| 50W / 225W       | 42C          P9 | 60459/65536 MiB     | Available            |
-+------------------+-----------------+---------------------+----------------------+
-| 2     MetaX N260 | 2           Off | 0000:b9:00.0        | 62%         Disabled |
-| 130W / 225W      | 64C          P9 | 41042/65536 MiB     | Available            |
-+------------------+-----------------+---------------------+----------------------+
-| 3     MetaX N260 | 3           Off | 0000:bd:00.0        | 60%         Disabled |
-| 126W / 225W      | 61C          P9 | 39916/65536 MiB     | Available            |
-+------------------+-----------------+---------------------+----------------------+
-
-+---------------------------------------------------------------------------------+
-| Process:                                                                        |
-|  GPU                    PID         Process Name                 GPU Memory     |
-|                                                                  Usage(MiB)     |
-|=================================================================================|
-|  0                  1007916         VLLM::Worker_TP              59790          |
-|  1                  1007917         VLLM::Worker_TP              59790          |
-|  2                  1217897         python                       5846           |
-|  2                  1229576         python                       34528          |
-|  3                  1217897         python                       5384           |
-|  3                  1229576         python                       33864  
+2026-05-22T08:18:50.643015421Z   File "/usr/local/lib/python3.10/site-packages/asyncpg/connection.py", line 638, in prepare
+2026-05-22T08:18:50.643022797Z     return await self._prepare(
+2026-05-22T08:18:50.643030223Z   File "/usr/local/lib/python3.10/site-packages/asyncpg/connection.py", line 657, in _prepare
+2026-05-22T08:18:50.643037457Z     stmt = await self._get_statement(
+2026-05-22T08:18:50.643044657Z   File "/usr/local/lib/python3.10/site-packages/asyncpg/connection.py", line 443, in _get_statement
+2026-05-22T08:18:50.643052007Z     statement = await self._protocol.prepare(
+2026-05-22T08:18:50.643059081Z   File "asyncpg/protocol/protocol.pyx", line 165, in prepare
+2026-05-22T08:18:50.643066273Z asyncpg.exceptions.UndefinedColumnError: column deploy_tasks.progress does not exist
+2026-05-22T08:18:50.643073515Z 
+2026-05-22T08:18:50.643080579Z The above exception was the direct cause of the following exception:
+2026-05-22T08:18:50.643087744Z 
+2026-05-22T08:18:50.643094663Z Traceback (most recent call last):
+2026-05-22T08:18:50.643101762Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1967, in _exec_single_context
+2026-05-22T08:18:50.643109207Z     self.dialect.do_execute(
+2026-05-22T08:18:50.643116203Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/default.py", line 952, in do_execute
+2026-05-22T08:18:50.643123550Z     cursor.execute(statement, parameters)
+2026-05-22T08:18:50.643130624Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 585, in execute
+2026-05-22T08:18:50.643137909Z     self._adapt_connection.await_(
+2026-05-22T08:18:50.643145080Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 132, in await_only
+2026-05-22T08:18:50.643152513Z     return current.parent.switch(awaitable)  # type: ignore[no-any-return,attr-defined] # noqa: E501
+2026-05-22T08:18:50.643191846Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 196, in greenlet_spawn
+2026-05-22T08:18:50.643199823Z     value = await result
+2026-05-22T08:18:50.643207640Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 563, in _prepare_and_execute
+2026-05-22T08:18:50.643215143Z     self._handle_exception(error)
+2026-05-22T08:18:50.643227474Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 513, in _handle_exception
+2026-05-22T08:18:50.643235388Z     self._adapt_connection._handle_exception(error)
+2026-05-22T08:18:50.643242533Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 797, in _handle_exception
+2026-05-22T08:18:50.643249926Z     raise translated_error from error
+2026-05-22T08:18:50.643257198Z sqlalchemy.dialects.postgresql.asyncpg.AsyncAdapt_asyncpg_dbapi.ProgrammingError: <class 'asyncpg.exceptions.UndefinedColumnError'>: column deploy_tasks.progress does not exist
+2026-05-22T08:18:50.643265144Z 
+2026-05-22T08:18:50.643272147Z The above exception was the direct cause of the following exception:
+2026-05-22T08:18:50.643279941Z 
+2026-05-22T08:18:50.643286963Z Traceback (most recent call last):
+2026-05-22T08:18:50.643294066Z   File "/usr/local/lib/python3.10/site-packages/starlette/routing.py", line 638, in lifespan
+2026-05-22T08:18:50.643301414Z     async with self.lifespan_context(app) as maybe_state:
+2026-05-22T08:18:50.643308589Z   File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
+2026-05-22T08:18:50.643315873Z     return await anext(self.gen)
+2026-05-22T08:18:50.643322977Z   File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
+2026-05-22T08:18:50.643330348Z     async with original_context(app) as maybe_original_state:
+2026-05-22T08:18:50.643337567Z   File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
+2026-05-22T08:18:50.643344825Z     return await anext(self.gen)
+2026-05-22T08:18:50.643351927Z   File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
+2026-05-22T08:18:50.643434023Z     async with original_context(app) as maybe_original_state:
+2026-05-22T08:18:50.643471056Z   File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
+2026-05-22T08:18:50.643488241Z     return await anext(self.gen)
+2026-05-22T08:18:50.643502853Z   File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
+2026-05-22T08:18:50.643515442Z     async with original_context(app) as maybe_original_state:
+2026-05-22T08:18:50.643531260Z   File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
+2026-05-22T08:18:50.643566372Z     return await anext(self.gen)
+2026-05-22T08:18:50.643581558Z   File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
+2026-05-22T08:18:50.643599397Z     async with original_context(app) as maybe_original_state:
+2026-05-22T08:18:50.643611772Z   File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
+2026-05-22T08:18:50.643623462Z     return await anext(self.gen)
+2026-05-22T08:18:50.643634678Z   File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
+2026-05-22T08:18:50.643648989Z     async with original_context(app) as maybe_original_state:
+2026-05-22T08:18:50.643660765Z   File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
+2026-05-22T08:18:50.643673291Z     return await anext(self.gen)
+2026-05-22T08:18:50.643688183Z   File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
+2026-05-22T08:18:50.643701236Z     async with original_context(app) as maybe_original_state:
+2026-05-22T08:18:50.643714559Z   File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
+2026-05-22T08:18:50.643728513Z     return await anext(self.gen)
+2026-05-22T08:18:50.643741191Z   File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
+2026-05-22T08:18:50.643754359Z     async with original_context(app) as maybe_original_state:
+2026-05-22T08:18:50.643766265Z   File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
+2026-05-22T08:18:50.643778654Z     return await anext(self.gen)
+2026-05-22T08:18:50.643790810Z   File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
+2026-05-22T08:18:50.643803296Z     async with original_context(app) as maybe_original_state:
+2026-05-22T08:18:50.643876231Z   File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
+2026-05-22T08:18:50.643894788Z     return await anext(self.gen)
+2026-05-22T08:18:50.643932687Z   File "/usr/local/lib/python3.10/site-packages/fastapi/routing.py", line 216, in merged_lifespan
+2026-05-22T08:18:50.643948630Z     async with original_context(app) as maybe_original_state:
+2026-05-22T08:18:50.643956593Z   File "/usr/local/lib/python3.10/contextlib.py", line 199, in __aenter__
+2026-05-22T08:18:50.643992791Z     return await anext(self.gen)
+2026-05-22T08:18:50.644003789Z   File "/app/main.py", line 48, in lifespan
+2026-05-22T08:18:50.644012003Z     await deploy_service.recover_stale_deploys()
+2026-05-22T08:18:50.644020245Z   File "/app/app/services/deploy_service.py", line 190, in recover_stale_deploys
+2026-05-22T08:18:50.644027950Z     result = await session.execute(
+2026-05-22T08:18:50.644059576Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/ext/asyncio/session.py", line 449, in execute
+2026-05-22T08:18:50.644069585Z     result = await greenlet_spawn(
+2026-05-22T08:18:50.644138024Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 201, in greenlet_spawn
+2026-05-22T08:18:50.644147460Z     result = context.throw(*sys.exc_info())
+2026-05-22T08:18:50.644154680Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/session.py", line 2351, in execute
+2026-05-22T08:18:50.644161970Z     return self._execute_internal(
+2026-05-22T08:18:50.644169448Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/session.py", line 2249, in _execute_internal
+2026-05-22T08:18:50.644176979Z     result: Result[Any] = compile_state_cls.orm_execute_statement(
+2026-05-22T08:18:50.644187198Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/context.py", line 306, in orm_execute_statement
+2026-05-22T08:18:50.644194755Z     result = conn.execute(
+2026-05-22T08:18:50.644202469Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1419, in execute
+2026-05-22T08:18:50.644210982Z     return meth(
+2026-05-22T08:18:50.644218155Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/sql/elements.py", line 527, in _execute_on_connection
+2026-05-22T08:18:50.644225705Z     return connection._execute_clauseelement(
+2026-05-22T08:18:50.644233110Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1641, in _execute_clauseelement
+2026-05-22T08:18:50.644240494Z     ret = self._execute_context(
+2026-05-22T08:18:50.644247607Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1846, in _execute_context
+2026-05-22T08:18:50.644255628Z     return self._exec_single_context(
+2026-05-22T08:18:50.644262798Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1986, in _exec_single_context
+2026-05-22T08:18:50.644270155Z     self._handle_dbapi_exception(
+2026-05-22T08:18:50.644277493Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 2363, in _handle_dbapi_exception
+2026-05-22T08:18:50.644284845Z     raise sqlalchemy_exception.with_traceback(exc_info[2]) from e
+2026-05-22T08:18:50.644292415Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1967, in _exec_single_context
+2026-05-22T08:18:50.644299797Z     self.dialect.do_execute(
+2026-05-22T08:18:50.644306922Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/default.py", line 952, in do_execute
+2026-05-22T08:18:50.644314255Z     cursor.execute(statement, parameters)
+2026-05-22T08:18:50.644321572Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 585, in execute
+2026-05-22T08:18:50.644328978Z     self._adapt_connection.await_(
+2026-05-22T08:18:50.644336069Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 132, in await_only
+2026-05-22T08:18:50.644343564Z     return current.parent.switch(awaitable)  # type: ignore[no-any-return,attr-defined] # noqa: E501
+2026-05-22T08:18:50.644361572Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 196, in greenlet_spawn
+2026-05-22T08:18:50.644369648Z     value = await result
+2026-05-22T08:18:50.644377041Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 563, in _prepare_and_execute
+2026-05-22T08:18:50.644384536Z     self._handle_exception(error)
+2026-05-22T08:18:50.644392498Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 513, in _handle_exception
+2026-05-22T08:18:50.644400265Z     self._adapt_connection._handle_exception(error)
+2026-05-22T08:18:50.644407753Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 797, in _handle_exception
+2026-05-22T08:18:50.644415847Z     raise translated_error from error
+2026-05-22T08:18:50.644480149Z sqlalchemy.exc.ProgrammingError: (sqlalchemy.dialects.postgresql.asyncpg.ProgrammingError) <class 'asyncpg.exceptions.UndefinedColumnError'>: column deploy_tasks.progress does not exist
+2026-05-22T08:18:50.644492007Z [SQL: SELECT deploy_tasks.id, deploy_tasks.job_id, deploy_tasks.status, deploy_tasks.output_path, deploy_tasks.error, deploy_tasks.progress, deploy_tasks.finished_at, deploy_tasks.created_at 
+2026-05-22T08:18:50.644499657Z FROM deploy_tasks 
+2026-05-22T08:18:50.644506710Z WHERE deploy_tasks.status IN ($1::VARCHAR, $2::VARCHAR)]
+2026-05-22T08:18:50.644513902Z [parameters: ('pending', 'running')]
+2026-05-22T08:18:50.644521041Z (Background on this error at: https://sqlalche.me/e/20/f405)
+2026-05-22T08:18:50.644528216Z 
+2026-05-22T08:18:50.644535221Z ERROR:    Application startup failed. Exiting.