Update macOS support.

haotian-liu · Oct 31, 2023 · 785f766 · 785f766
1 parent a546269
commit 785f766
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -58,7 +58,7 @@
 
 ## Install
 
-If you are using Windows, do *NOT* proceed, see instructions [here](https://github.com/haotian-liu/LLaVA/blob/main/docs/Windows.md).
+If you are not using Linux, do *NOT* proceed, see instructions for [macOS](https://github.com/haotian-liu/LLaVA/blob/main/docs/macOS.md) and [Windows](https://github.com/haotian-liu/LLaVA/blob/main/docs/Windows.md).
 
 1. Clone this repository and navigate to LLaVA folder
 ```bash

diff --git a/docs/macOS.md b/docs/macOS.md
@@ -0,0 +1,29 @@
+# Run LLaVA on macOS
+
+*NOTE: LLaVA on macOS is not fully supported. Currently we only support 16-bit inference. More functionalities on macOS is to be added soon, stay tuned.*
+
+## Installation
+
+1. Clone this repository and navigate to LLaVA folder
+```bash
+git clone https://github.com/haotian-liu/LLaVA.git
+cd LLaVA
+```
+
+2. Install Package
+```Shell
+conda create -n llava python=3.10 -y
+conda activate llava
+python -mpip install --upgrade pip  # enable PEP 660 support
+pip install -e .
+pip install torch==2.1.0 torchvision==0.16.0
+pip uninstall bitsandbytes
+```
+
+## Run demo
+
+Specify `--device mps` when launching model worker or CLI.
+
+See instructions [here](https://github.com/haotian-liu/LLaVA#demo).
+
+Note that quantization (4-bit, 8-bit) is *NOT* supported on macOS. Stay tuned for the 4-bit support on macOS!
diff --git a/llava/model/builder.py b/llava/model/builder.py
@@ -26,6 +26,9 @@
 def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, load_4bit=False, device_map="auto", device="cuda"):
     kwargs = {"device_map": device_map}
 
+    if device != "cuda":
+        kwargs['device_map'] = {"": device}
+
     if load_8bit:
         kwargs['load_in_8bit'] = True
     elif load_4bit:
@@ -107,7 +110,7 @@ def load_from_hf(repo_id, filename, subfolder=None):
             # PEFT model
             from peft import PeftModel
             tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
-            model = AutoModelForCausalLM.from_pretrained(model_base, torch_dtype=torch.float16, low_cpu_mem_usage=True, device_map="auto")
+            model = AutoModelForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, **kwargs)
             print(f"Loading LoRA weights from {model_path}")
             model = PeftModel.from_pretrained(model, model_path)
             print(f"Merging weights")

diff --git a/llava/serve/cli.py b/llava/serve/cli.py
@@ -84,7 +84,7 @@ def main(args):
         conv.append_message(conv.roles[1], None)
         prompt = conv.get_prompt()
 
-        input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).cuda()
+        input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).to(model.device)
         stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
         keywords = [stop_str]
         stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)