Merge lowering of gguf files with ET tests into pull.yml (pytorch#417)

* Revert "Revert "Embedding quantization per backend (pytorch#402)" (pytorch#411)" This reverts commit 8b35acdff4fded779799ab8a419e55f885dd8918. * merge GGUF tests into pull.yml
yanbing-j · Jul 17, 2024 · a99dc12 · a99dc12
1 parent 2d1c8cc
commit a99dc12
Showing 1 changed file with 28 additions and 0 deletions.
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -279,6 +279,20 @@ jobs:
           python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte  > ./output_et
           cat ./output_et
 
+          echo "******************************************"
+          echo "**** Emb 4bit: channel-wise quantized ****"
+          echo "******************************************"
+          python export.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
+          python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte  > ./output_et
+          cat ./output_et
+
+          echo "******************************************"
+          echo "****** Emb 4bit: group-wise quantized ****"
+          echo "******************************************"
+          python export.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
+          python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte  > ./output_et
+          cat ./output_et
+
           echo "******************************************"
           echo "******* INT8 channel-wise quantized ******"
           echo "******************************************"
@@ -300,6 +314,20 @@ jobs:
           python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte  > ./output_et
           # cat ./output_et
 
+          echo "******************************************"
+          echo "******** INT4 group-wise quantized *******"
+          echo "******************************************"
+          # python export.py --quant '{"linear:int4" : {"groupsize": 32}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
+          # python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte  > ./output_et
+          # cat ./output_et
+
+          echo "******************************************"
+          echo "******** HQQ group-wise quantized *******"
+          echo "******************************************"
+          # python export.py --quant '{"linear:hqq" : {"groupsize": 32}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
+          # python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte  > ./output_et
+          # cat ./output_et
+
           echo "tests complete"
           echo "******************************************"