Upload folder using huggingface_hub

Files changed (10) hide show

coding-0.2/p/latest/README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 base_model: unsloth/deepseek-r1-distill-qwen-7b-unsloth-bnb-4bit
 library_name: peft
-model_name: iter-1
 tags:
 - base_model:adapter:unsloth/deepseek-r1-distill-qwen-7b-unsloth-bnb-4bit
 - lora
@@ -13,7 +13,7 @@ licence: license
 pipeline_tag: text-generation
 ---
-# Model Card for iter-1
 This model is a fine-tuned version of [unsloth/deepseek-r1-distill-qwen-7b-unsloth-bnb-4bit](https://huggingface.co/unsloth/deepseek-r1-distill-qwen-7b-unsloth-bnb-4bit).
 It has been trained using [TRL](https://github.com/huggingface/trl).
@@ -31,7 +31,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/kstaron-/huggingface/runs/lrlmlt1t)
 This model was trained with SFT.

 ---
 base_model: unsloth/deepseek-r1-distill-qwen-7b-unsloth-bnb-4bit
 library_name: peft
+model_name: iter-2
 tags:
 - base_model:adapter:unsloth/deepseek-r1-distill-qwen-7b-unsloth-bnb-4bit
 - lora
 pipeline_tag: text-generation
 ---
+# Model Card for iter-2
 This model is a fine-tuned version of [unsloth/deepseek-r1-distill-qwen-7b-unsloth-bnb-4bit](https://huggingface.co/unsloth/deepseek-r1-distill-qwen-7b-unsloth-bnb-4bit).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/kstaron-/promptcot-em/runs/em_training_6iters)
 This model was trained with SFT.

coding-0.2/p/latest/adapter_config.json CHANGED Viewed

@@ -34,12 +34,12 @@
   "revision": null,
   "target_modules": [
     "up_proj",
-    "down_proj",
     "q_proj",
-    "gate_proj",
     "k_proj",
-    "o_proj",
-    "v_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "revision": null,
   "target_modules": [
     "up_proj",
     "q_proj",
     "k_proj",
+    "down_proj",
+    "gate_proj",
+    "v_proj",
+    "o_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

coding-0.2/p/latest/base_adapter/adapter_config.json CHANGED Viewed

@@ -25,10 +25,7 @@
   "lora_dropout": 0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
-  "modules_to_save": [
-    "lm_head",
-    "embed_tokens"
-  ],
   "peft_type": "LORA",
   "peft_version": "0.18.0",
   "qalora_group_size": 16,
@@ -37,12 +34,12 @@
   "revision": null,
   "target_modules": [
     "up_proj",
-    "down_proj",
     "q_proj",
-    "gate_proj",
     "k_proj",
-    "o_proj",
-    "v_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "lora_dropout": 0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
+  "modules_to_save": null,
   "peft_type": "LORA",
   "peft_version": "0.18.0",
   "qalora_group_size": 16,
   "revision": null,
   "target_modules": [
     "up_proj",
     "q_proj",
     "k_proj",
+    "down_proj",
+    "gate_proj",
+    "v_proj",
+    "o_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

coding-0.2/p/latest/base_adapter/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c9481dac28ce0d5b8ade418c64f05fd46d073d116917d1a498f4235b9f39c72
-size 2825965440

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4ff04ba632410c1d699ff65581199682b73a2fcf21950113b9de8fd7e27ed4f
+size 645975704

coding-0.2/p/latest/checkpoint-28/adapter_config.json CHANGED Viewed

@@ -34,12 +34,12 @@
   "revision": null,
   "target_modules": [
     "up_proj",
-    "down_proj",
     "q_proj",
-    "gate_proj",
     "k_proj",
-    "o_proj",
-    "v_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "revision": null,
   "target_modules": [
     "up_proj",
     "q_proj",
     "k_proj",
+    "down_proj",
+    "gate_proj",
+    "v_proj",
+    "o_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

coding-0.2/p/latest/checkpoint-28/base_adapter/adapter_config.json CHANGED Viewed

@@ -25,10 +25,7 @@
   "lora_dropout": 0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
-  "modules_to_save": [
-    "lm_head",
-    "embed_tokens"
-  ],
   "peft_type": "LORA",
   "peft_version": "0.18.0",
   "qalora_group_size": 16,
@@ -37,12 +34,12 @@
   "revision": null,
   "target_modules": [
     "up_proj",
-    "down_proj",
     "q_proj",
-    "gate_proj",
     "k_proj",
-    "o_proj",
-    "v_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "lora_dropout": 0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
+  "modules_to_save": null,
   "peft_type": "LORA",
   "peft_version": "0.18.0",
   "qalora_group_size": 16,
   "revision": null,
   "target_modules": [
     "up_proj",
     "q_proj",
     "k_proj",
+    "down_proj",
+    "gate_proj",
+    "v_proj",
+    "o_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

coding-0.2/p/latest/checkpoint-28/base_adapter/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c9481dac28ce0d5b8ade418c64f05fd46d073d116917d1a498f4235b9f39c72
-size 2825965440

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4ff04ba632410c1d699ff65581199682b73a2fcf21950113b9de8fd7e27ed4f
+size 645975704

coding-0.2/p/latest/checkpoint-28/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f022fdf1d6b23d3fb608c874aedc470a49d88a3add70d804eca3992e867327bc
-size 5795494515

 version https://git-lfs.github.com/spec/v1
+oid sha256:bdb72360b58fa91fe5d6b16b7eb7744a74ae14dce46ee0eabd0f5ffe5887ce58
+size 328468869

coding-0.2/p/latest/checkpoint-28/trainer_state.json CHANGED Viewed

@@ -26,7 +26,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.438731137024e+17,
   "train_batch_size": 90,
   "trial_name": null,
   "trial_params": null

       "attributes": {}
     }
   },
+  "total_flos": 2.2713079431168e+17,
   "train_batch_size": 90,
   "trial_name": null,
   "trial_params": null

coding-0.2/p/latest/checkpoint-28/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d742ad02897ec0aeab7d86e7684e69ab674b7af5a473943467239abd13221e98
 size 6225

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc86eb21aff7e7c0abc5483802cd6faeb2970251b815689e19b35ddacc65d918
 size 6225