result.txt 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. base) [root@localhost Fine-tuning]# docker exec finetune-backend cat /root/Fine-tuning/backend/data/models/Qwen/Qwen3.5-0.8B/config.json
  2. {
  3. "architectures": [
  4. "Qwen3_5ForConditionalGeneration"
  5. ],
  6. "image_token_id": 248056,
  7. "model_type": "qwen3_5",
  8. "text_config": {
  9. "attention_bias": false,
  10. "attention_dropout": 0.0,
  11. "attn_output_gate": true,
  12. "dtype": "bfloat16",
  13. "eos_token_id": 248044,
  14. "full_attention_interval": 4,
  15. "head_dim": 256,
  16. "hidden_act": "silu",
  17. "hidden_size": 1024,
  18. "initializer_range": 0.02,
  19. "intermediate_size": 3584,
  20. "layer_types": [
  21. "linear_attention",
  22. "linear_attention",
  23. "linear_attention",
  24. "full_attention",
  25. "linear_attention",
  26. "linear_attention",
  27. "linear_attention",
  28. "full_attention",
  29. "linear_attention",
  30. "linear_attention",
  31. "linear_attention",
  32. "full_attention",
  33. "linear_attention",
  34. "linear_attention",
  35. "linear_attention",
  36. "full_attention",
  37. "linear_attention",
  38. "linear_attention",
  39. "linear_attention",
  40. "full_attention",
  41. "linear_attention",
  42. "linear_attention",
  43. "linear_attention",
  44. "full_attention"
  45. ],
  46. "linear_conv_kernel_dim": 4,
  47. "linear_key_head_dim": 128,
  48. "linear_num_key_heads": 16,
  49. "linear_num_value_heads": 16,
  50. "linear_value_head_dim": 128,
  51. "max_position_embeddings": 262144,
  52. "mlp_only_layers": [],
  53. "model_type": "qwen3_5_text",
  54. "mtp_num_hidden_layers": 1,
  55. "mtp_use_dedicated_embeddings": false,
  56. "num_attention_heads": 8,
  57. "num_hidden_layers": 24,
  58. "num_key_value_heads": 2,
  59. "rms_norm_eps": 1e-06,
  60. "tie_word_embeddings": true,
  61. "use_cache": true,
  62. "vocab_size": 248320,
  63. "mamba_ssm_dtype": "float32",
  64. "rope_parameters": {
  65. "mrope_interleaved": true,
  66. "mrope_section": [
  67. 11,
  68. 11,
  69. 10
  70. ],
  71. "rope_type": "default",
  72. "rope_theta": 10000000,
  73. "partial_rotary_factor": 0.25
  74. }
  75. },
  76. "tie_word_embeddings": true,
  77. "transformers_version": "4.57.0.dev0",
  78. "video_token_id": 248057,
  79. "vision_config": {
  80. "deepstack_visual_indexes": [],
  81. "depth": 12,
  82. "hidden_act": "gelu_pytorch_tanh",
  83. "hidden_size": 768,
  84. "in_channels": 3,
  85. "initializer_range": 0.02,
  86. "intermediate_size": 3072,
  87. "model_type": "qwen3_5",
  88. "num_heads": 12,
  89. "num_position_embeddings": 2304,
  90. "out_hidden_size": 1024,
  91. "patch_size": 16,
  92. "spatial_merge_size": 2,
  93. "temporal_patch_size": 2
  94. },
  95. "vision_end_token_id": 248054,
  96. "vision_start_token_id": 248053
  97. }(base) [root@localhost Fine-tuning]#