Skip to content

Commit

Permalink
fix the contiguous kernel error and prep for eora release
Browse files Browse the repository at this point in the history
  • Loading branch information
nbasyl committed Mar 3, 2025
1 parent bf9aa2f commit dc6f0f1
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 199 deletions.
19 changes: 0 additions & 19 deletions gptqmodel/eora/eora_load_and_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,3 @@ def test_load(backend: BACKEND):
print(f"Result: {result}")
assert "paris" in result.lower()


# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# quant_model_path = "/home/shihyangl/gptqmodel_save/Llama-3.2-1B-gptqmodel-4bit"
# lora_path = "/home/shihyangl/llama3.2-1b-4bit-group128-eora-rank128-arc/adapter_model.safetensors" #"sliuau/llama3.2-1b-4bit-group128-eora-rank128-arc/blob/main/adapter_model.safetensors" #"sliuau/llama3.2-1b-4bit-group128-eora-rank128-arc"

# adapter = EoRA(lora_path=lora_path, rank=128)

# model = GPTQModel.load(
# quant_model_path,
# adapter=adapter,
# backend=BACKEND.TORCH,
# device_map="auto",
# )

# # print(model)
# tokens = model.generate("Capital of France is")[0]
# result = model.tokenizer.decode(tokens)
# print(f"Result: {result}")
# assert "paris" in result.lower()
180 changes: 0 additions & 180 deletions llama.py

This file was deleted.

0 comments on commit dc6f0f1

Please sign in to comment.