Skip to content

Commit

Permalink
Merge lowering of gguf files with ET tests into pull.yml (pytorch#417)
Browse files Browse the repository at this point in the history
* Revert "Revert "Embedding quantization per backend (pytorch#402)" (pytorch#411)"

This reverts commit 8b35acdff4fded779799ab8a419e55f885dd8918.

* merge GGUF tests into pull.yml
  • Loading branch information
mikekgfb authored and malfet committed Jul 17, 2024
1 parent 2d1c8cc commit a99dc12
Showing 1 changed file with 28 additions and 0 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,20 @@ jobs:
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
cat ./output_et
echo "******************************************"
echo "**** Emb 4bit: channel-wise quantized ****"
echo "******************************************"
python export.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
cat ./output_et
echo "******************************************"
echo "****** Emb 4bit: group-wise quantized ****"
echo "******************************************"
python export.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
cat ./output_et
echo "******************************************"
echo "******* INT8 channel-wise quantized ******"
echo "******************************************"
Expand All @@ -300,6 +314,20 @@ jobs:
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
# cat ./output_et
echo "******************************************"
echo "******** INT4 group-wise quantized *******"
echo "******************************************"
# python export.py --quant '{"linear:int4" : {"groupsize": 32}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
# python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
# cat ./output_et
echo "******************************************"
echo "******** HQQ group-wise quantized *******"
echo "******************************************"
# python export.py --quant '{"linear:hqq" : {"groupsize": 32}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
# python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
# cat ./output_et
echo "tests complete"
echo "******************************************"
Expand Down

0 comments on commit a99dc12

Please sign in to comment.