Skip to content

Commit

Permalink
image to text support, closes #134
Browse files Browse the repository at this point in the history
  • Loading branch information
madox2 committed Dec 20, 2024
1 parent 933a90d commit 2643c4f
Show file tree
Hide file tree
Showing 6 changed files with 345 additions and 107 deletions.
20 changes: 17 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ To get an idea what is possible to do with AI commands see the [prompts](https:/
- Edit selected text in-place with AI
- Interactive conversation with ChatGPT
- Custom roles
- Vision capabilities (image to text)
- Integrates with any OpenAI-compatible API

## How it works
Expand Down Expand Up @@ -194,7 +195,7 @@ You are a Clean Code expert, I have the following code, please refactor it in a
```

To include files in the chat a special `include` role is used:
To include files in the chat a special `include` section is used:

```
>>> user
Expand All @@ -207,9 +208,22 @@ Generate documentation for the following files
/home/user/myproject/**/*.py
```

Each file's contents will be added to an additional `user` role message with the files separated by `==> {path} <==`, where path is the path to the file. Globbing is expanded out via `glob.gob` and relative paths to the current working directory (as determined by `getcwd()`) will be resolved to absolute paths.
Each file's contents will be added to an additional user message with `==> {path} <==` header, relative paths are resolved to the current working directory.

Supported chat roles are **`>>> system`**, **`>>> user`**, **`>>> include`** and **`<<< assistant`**

To use image vision capabilities (image to text) include an image file:

```
>>> user
What object is on the image?
>>> include
~/myimage.jpg
```

Supported chat sections are **`>>> system`**, **`>>> user`**, **`>>> include`** and **`<<< assistant`**

### `:AIRedo`

Expand Down
3 changes: 2 additions & 1 deletion py/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ def initialize_chat_window():
messages = initial_messages + chat_messages

try:
if messages[-1]["content"].strip():
last_content = messages[-1]["content"][-1]
if last_content['type'] != 'text' or last_content['text']:
vim.command("normal! Go\n<<< assistant\n\n")
vim.command("redraw")

Expand Down
116 changes: 69 additions & 47 deletions py/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from urllib.error import HTTPError
import traceback
import configparser
import base64

utils_py_imported = True

Expand Down Expand Up @@ -109,63 +110,84 @@ def render_text_chunks(chunks):
if not full_text.strip():
raise KnownError('Empty response received. Tip: You can try modifying the prompt and retry.')

def encode_image(image_path):
"""Encodes an image file to a base64 string."""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')

def parse_chat_messages(chat_content):
lines = chat_content.splitlines()
messages = []
for line in lines:
if line.startswith(">>> system"):
messages.append({"role": "system", "content": ""})
continue
if line.startswith(">>> user"):
messages.append({"role": "user", "content": ""})
continue
if line.startswith(">>> include"):
messages.append({"role": "include", "content": ""})
continue
if line.startswith("<<< assistant"):
messages.append({"role": "assistant", "content": ""})
continue
if not messages:
continue
messages[-1]["content"] += "\n" + line

for message in messages:
# strip newlines from the content as it causes empty responses
message["content"] = message["content"].strip()
def is_image_path(path):
ext = path.strip().split('.')[-1]
return ext in ['jpg', 'jpeg', 'png', 'gif']

if message["role"] == "include":
message["role"] = "user"
paths = message["content"].split("\n")
message["content"] = ""
def parse_include_paths(path):
if not path:
return []
pwd = vim.eval('getcwd()')

pwd = vim.eval("getcwd()")
for i in range(len(paths)):
path = os.path.expanduser(paths[i])
if not os.path.isabs(path):
path = os.path.join(pwd, path)
path = os.path.expanduser(path)
if not os.path.isabs(path):
path = os.path.join(pwd, path)

paths[i] = path
expanded_paths = [path]
if '*' in path:
expanded_paths = glob.glob(path, recursive=True)

if '**' in path:
paths[i] = None
paths.extend(glob.glob(path, recursive=True))
return [path for path in expanded_paths if not os.path.isdir(path)]

for path in paths:
if path is None:
continue
def make_image_message(path):
ext = path.split('.')[-1]
base64_image = encode_image(path)
return { 'type': 'image_url', 'image_url': { 'url': f"data:image/{ext.replace('.', '')};base64,{base64_image}" } }

def make_text_file_message(path):
try:
with open(path, 'r') as file:
file_content = file.read().strip()
return { 'type': 'text', 'text': f'==> {path} <==\n' + file_content.strip() }
except UnicodeDecodeError:
return { 'type': 'text', 'text': f'==> {path} <==\nBinary file, cannot display' }

def parse_chat_messages(chat_content):
lines = chat_content.splitlines()
messages = []

if os.path.isdir(path):
current_type = ''
for line in lines:
match line:
case '>>> system':
messages.append({'role': 'system', 'content': [{ 'type': 'text', 'text': '' }]})
current_type = 'system'
case '<<< assistant':
messages.append({'role': 'assistant', 'content': [{ 'type': 'text', 'text': '' }]})
current_type = 'assistant'
case '>>> user':
if messages and messages[-1]['role'] == 'user':
messages[-1]['content'].append({ 'type': 'text', 'text': '' })
else:
messages.append({'role': 'user', 'content': [{ 'type': 'text', 'text': '' }]})
current_type = 'user'
case '>>> include':
if not messages or messages[-1]['role'] != 'user':
messages.append({'role': 'user', 'content': []})
current_type = 'include'
case _:
if not messages:
continue
match current_type:
case 'assistant' | 'system' | 'user':
messages[-1]['content'][-1]['text'] += '\n' + line
case 'include':
paths = parse_include_paths(line)
for path in paths:
content = make_image_message(path) if is_image_path(path) else make_text_file_message(path)
messages[-1]['content'].append(content)

try:
with open(path, "r") as file:
file_content = file.read().strip()
message["content"] += f"\n\n==> {path} <==\n" + file_content
except UnicodeDecodeError:
message["content"] += "\n\n" + f"==> {path} <=="
message["content"] += "\n" + "Binary file, cannot display"
message['content'] = message['content'].strip()
for message in messages:
# strip newlines from the text content as it causes empty responses
for content in message['content']:
if content['type'] == 'text':
content['text'] = content['text'].strip()

return messages

Expand Down
Loading

0 comments on commit 2643c4f

Please sign in to comment.