(base) [root@localhost ~]# docker exec finetune-trainer cat /tmp/train_f3038ef4-bb2c-44e5-bba5-fc481d1415e8.log | grep -A 30 "Traceback"
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 320, in <module>
    lib = get_native_library()
  File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 288, in get_native_library
    raise RuntimeError(f"Configured {BNB_BACKEND} binary not found at {cuda_binary_path}")
RuntimeError: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
[transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
[transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
trainable params: 5,070,848 || all params: 757,463,872 || trainable%: 0.6695
Map: 100%|██████████| 5/5 [00:00<00:00, 155.69 examples/s]
  0%|          | 0/1 [00:00<?, ?it/s]Training failed for job f3038ef4-bb2c-44e5-bba5-fc481d1415e8: DPOTrainer.compute_loss() got an unexpected keyword argument 'num_items_in_batch'
[remote_train] [rank 0] ERROR: DPOTrainer.compute_loss() got an unexpected keyword argument 'num_items_in_batch'
[remote_train] Traceback (most recent call last):
  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 236, in run_training
    adapter_path = await engine.train(
  File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 546, in train
    trainer.train()
  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1427, in train
    return inner_training_loop(
  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1509, in _inner_training_loop
    self._run_epoch(
  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1737, in _run_epoch
    tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1909, in training_step
    loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
TypeError: DPOTrainer.compute_loss() got an unexpected keyword argument 'num_items_in_batch'

[remote_train] === Training job failed: f3038ef4-bb2c-44e5-bba5-fc481d1415e8 ===
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/opt/conda/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 466, in <module>
    main()
  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 461, in main
    asyncio.run(run_training(job_id, model_id, model_type, dataset_id, config,
  File "/opt/conda/lib/python3.10/asyncio/runners.py", line 44, in run
    return loop.run_until_complete(main)
  File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
    return future.result()
  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 236, in run_training
    adapter_path = await engine.train(
  File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 546, in train
    trainer.train()
  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1427, in train
    return inner_training_loop(
  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1509, in _inner_training_loop
    self._run_epoch(
  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1737, in _run_epoch
    tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1909, in training_step
    loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
TypeError: DPOTrainer.compute_loss() got an unexpected keyword argument 'num_items_in_batch'
  0%|          | 0/1 [00:12<?, ?it/s]
