(base) [root@localhost ~]# docker exec finetune-trainer /opt/conda/bin/python -c 'from trl.experimental.ppo import PPOTrainer; print([m for m in dir(PPOTrainer) if not m.startswith("_")])'<string>:1: TRLExperimentalWarning: You are importing from 'trl.experimental'. APIs here are unstable and may change or be removed without notice. Silence this warning by setting environment variable TRL_EXPERIMENTAL_SILENCE=1.
/opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
  warnings.warn(_BETA_TRANSFORMS_WARNING)
/opt/conda/lib/python3.10/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
  warnings.warn(_BETA_TRANSFORMS_WARNING)
['add_callback', 'autocast_smart_context_manager', 'call_model_init', 'compute_loss', 'compute_loss_context_manager', 'create_accelerator_and_postprocess', 'create_model_card', 'create_optimizer', 'create_optimizer_and_scheduler', 'create_scheduler', 'evaluate', 'evaluation_loop', 'floating_point_ops', 'generate_completions', 'get_batch_samples', 'get_cp_size', 'get_decay_parameter_names', 'get_eval_dataloader', 'get_learning_rates', 'get_num_trainable_parameters', 'get_optimizer_cls_and_kwargs', 'get_optimizer_group', 'get_sp_size', 'get_test_dataloader', 'get_total_train_batch_size', 'get_tp_size', 'get_train_dataloader', 'hyperparameter_search', 'init_hf_repo', 'is_local_process_zero', 'is_world_process_zero', 'log', 'log_metrics', 'metrics_format', 'null_ref_context', 'num_examples', 'pop_callback', 'predict', 'prediction_step', 'push_to_hub', 'remove_callback', 'save_metrics', 'save_model', 'save_state', 'set_initial_training_values', 'store_flos', 'train', 'training_step']
(base) [root@localhost ~]# 
