Mistral 24B, Qwen 2.5 VL support (#1598)

* use exact model name * Update save.py * Update _utils.py * Update _utils.py * Update _utils.py * Update _utils.py * print * Update _utils.py * Update _utils.py * Update llama.py * Update _utils.py * Update vision.py * Update _utils.py * Update _utils.py * Update _utils.py * Update _utils.py * Update _utils.py * Update _utils.py * Update _utils.py * Update _utils.py * Update loader.py * accurate_accumulation * Update loader.py * Update loader.py * Update _utils.py * Update loader.py * Update loader.py * Update loader.py * Update loader.py * Update pyproject.toml * Update __init__.py * Update pyproject.toml * Update __init__.py * Update __init__.py * Fix Triton heuristics triton-lang/triton#5224 * Update __init__.py * Update __init__.py * Update __init__.py * Update __init__.py * Xformers * Update loader.py * Update loader.py * Rewind * Update _utils.py * Update _utils.py * requires grad * Update loader.py * Update _utils.py * Update loader.py * changing model to base_model if peft model is already used * Improve debugging experience (#1512) * Create CONTRIBUTING.md (#1472) Creating contributing guidelines * Update CONTRIBUTING.md improved sentence * Improve logging control in `unsloth_compile_transformers` by conditionally redirecting stdout based on UNSLOTH_DISABLE_LOGGER environment variable --------- Co-authored-by: Michael Han <[email protected]> Co-authored-by: Nino Risteski <[email protected]> * Update loader.py * Update llama.py * Update llama.py * Revert "Update llama.py" This reverts commit b7ddf96. * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Update llama.py * Auto change is_bfloat16_supported * Update llama.py * Force data-type * Update llama.py * All attention refactor fix (#1491) * change initilization of n_heads, n_kv_heads, hidden_size in llama.py * do the same for cohere, mistral, gemma2, granite * do the same for flexattention,cohere, mistral, granite * Update llama.py * Update llama.py * Update granite to work with latest post_patch methods (#1502) * Update granite to work with latest post_patch methods * Pass position_embeddings for granite even if transformers<4.47 * Update llama.py --------- Co-authored-by: Daniel Han <[email protected]> * Minor fixes for granite models (#1503) * Update granite.py Grab residual multiplier directly from layer * Update llama.py Version should read >= 4.47.1 as that is the version requiring the changes * Update granite.py * Update llama.py --------- Co-authored-by: Daniel Han <[email protected]> * support modelscope models and datasets (#1481) * support modelscope * change modelscope args * remove useless import * remove useless import * fix * wip * fix * remove useless code * add readme * add some comments * change print to raise error * update comment * Update loader.py --------- Co-authored-by: Daniel Han <[email protected]> * Merge branch 'main' into nightly * Phi 4 * Update llama.py * Torch.Cuda Is Available Condition and Warning (#1545) * check for torch.cuda and triton if available on my machine(mac m3) the cuda were not available * Update pyproject.toml * Update __init__.py --------- Co-authored-by: Daniel Han <[email protected]> * Update mistral.py * Update mistral.py * Update _utils.py * Update _utils.py * Update _utils.py * Update _utils.py * Update _utils.py * Fix * Bug fixes * Update mapper.py * Add dropout to granite to match HF's implementation (#1557) Signed-off-by: datta0 <[email protected]> * Update llama.py * Update llama.py * Bug fixes * fix: flash_attn_detection_error (#1556) * fix: flash_attn_detection_error * Update _utils.py --------- Co-authored-by: Daniel Han <[email protected]> * Update mapper.py --------- Signed-off-by: datta0 <[email protected]> Co-authored-by: Itsuro Tajima <[email protected]> Co-authored-by: Muhammad Osama <[email protected]> Co-authored-by: Edd <[email protected]> Co-authored-by: Michael Han <[email protected]> Co-authored-by: Nino Risteski <[email protected]> Co-authored-by: Kareem <[email protected]> Co-authored-by: Datta Nimmaturi <[email protected]> Co-authored-by: Z <[email protected]> Co-authored-by: tastelikefeet <[email protected]> Co-authored-by: AminWhat <[email protected]> Co-authored-by: Zhe Zhang <[email protected]>
unslothai · Jan 31, 2025 · 038e6d4 · 038e6d4
1 parent ed14d37
commit 038e6d4
Showing 1 changed file with 33 additions and 4 deletions.
diff --git a/unsloth/models/mapper.py b/unsloth/models/mapper.py
@@ -432,21 +432,25 @@
         "unsloth/Qwen2.5-Coder-32B-Instruct",
         "Qwen/Qwen2.5-Coder-32B-Instruct",
     ),
-    "unsloth/Llama-3.2-1B-bnb-4bit" : (
+    "unsloth/Llama-3.2-1B-unsloth-bnb-4bit" : (
         "unsloth/Llama-3.2-1B",
         "meta-llama/Llama-3.2-1B",
+        "unsloth/Llama-3.2-1B-bnb-4bit",
     ),
-    "unsloth/Llama-3.2-3B-bnb-4bit" : (
+    "unsloth/Llama-3.2-3B-unsloth-bnb-4bit" : (
         "unsloth/Llama-3.2-3B",
         "meta-llama/Llama-3.2-3B",
+        "unsloth/Llama-3.2-3B-bnb-4bit",
     ),
-    "unsloth/Llama-3.2-1B-Instruct-bnb-4bit" : (
+    "unsloth/Llama-3.2-1B-Instruct-unsloth-bnb-4bit" : (
         "unsloth/Llama-3.2-1B-Instruct",
         "meta-llama/Llama-3.2-1B-Instruct",
+        "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
     ),
-    "unsloth/Llama-3.2-3B-Instruct-bnb-4bit" : (
+    "unsloth/Llama-3.2-3B-Instruct-unsloth-bnb-4bit" : (
         "unsloth/Llama-3.2-3B-Instruct",
         "meta-llama/Llama-3.2-3B-Instruct",
+        "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
     ),
     "unsloth/Llama-3.1-Nemotron-70B-Instruct-bnb-4bit" : (
         "unsloth/Llama-3.1-Nemotron-70B-Instruct",
@@ -550,6 +554,31 @@
         "unsloth/DeepSeek-R1-Distill-Llama-70B",
         "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
     ),
+    "unsloth/Mistral-Small-24B-Base-2501-unsloth-bnb-4bit" : (
+        "unsloth/Mistral-Small-24B-Base",
+        "mistralai/Mistral-Small-24B-Base-2501",
+        "unsloth/Mistral-Small-24B-Base-2501-bnb-4bit",
+    ),
+    "unsloth/Mistral-Small-24B-Instruct-2501-unsloth-bnb-4bit" : (
+        "unsloth/Mistral-Small-24B-Instruct",
+        "mistralai/Mistral-Small-24B-Instruct-2501",
+        "unsloth/Mistral-Small-24B-Instruct-2501-bnb-4bit",
+    ),
+    "unsloth/Qwen2.5-VL-3B-Instruct-unsloth-bnb-4bit" : (
+        "unsloth/Qwen2.5-VL-3B-Instruct",
+        "Qwen/Qwen2.5-VL-3B-Instruct",
+        "unsloth/Qwen2.5-VL-3B-Instruct-bnb-4bit",
+    ),
+    "unsloth/Qwen2.5-VL-7B-Instruct-unsloth-bnb-4bit" : (
+        "unsloth/Qwen2.5-VL-7B-Instruct",
+        "Qwen/Qwen2.5-VL-7B-Instruct",
+        "unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit",
+    ),
+    "unsloth/Qwen2.5-VL-72B-Instruct-unsloth-bnb-4bit" : (
+        "unsloth/Qwen2.5-VL-72B-Instruct",
+        "Qwen/Qwen2.5-VL-72B-Instruct",
+        "unsloth/Qwen2.5-VL-72B-Instruct-bnb-4bit",
+    ),
 }
 
 INT_TO_FLOAT_MAPPER  = {}