added image generation

madox2 · Dec 22, 2024 · fbc2bfb · fbc2bfb
1 parent 4f1c953
commit fbc2bfb
Show file tree

Hide file tree

Showing 13 changed files with 256 additions and 26 deletions.
diff --git a/README.md b/README.md
@@ -14,6 +14,7 @@ To get an idea what is possible to do with AI commands see the [prompts](https:/
 - Interactive conversation with ChatGPT
 - Custom roles
 - Vision capabilities (image to text)
+- Generate images
 - Integrates with any OpenAI-compatible API
 
 ## How it works
@@ -88,13 +89,14 @@ git clone https://github.com/madox2/vim-ai.git ~/.local/share/nvim/site/pack/plu
 To use an AI command, type the command followed by an instruction prompt. You can also combine it with a visual selection. Here is a brief overview of available commands:
 
 ```
-=========== Basic AI commands ============
+========== Basic AI commands ==========
 
-:AI              complete text
-:AIEdit          edit text
-:AIChat          continue or open new chat
+:AI       complete text
+:AIEdit   edit text
+:AIChat   continue or open new chat
+:AIImage  generate image
 
-=============== Utilities ================
+============== Utilities ==============
 
 :AIRedo          repeat last AI command
 :AIUtilRolesOpen open role config file
@@ -106,7 +108,7 @@ To use an AI command, type the command followed by an instruction prompt. You ca
 
 **Tip:** Press `Ctrl-c` anytime to cancel completion
 
-**Tip:** Use command shortcuts - `:AIE`, `:AIC`, `:AIR` or setup your own [key bindings](#key-bindings)
+**Tip:** Use command shortcuts - `:AIE`, `:AIC`, `:AIR`, `:AII` or setup your own [key bindings](#key-bindings)
 
 **Tip:** Define and use [custom roles](#roles), e.g. `:AIEdit /grammar`.
 
@@ -167,6 +169,16 @@ In the documentation below,  `<selection>` denotes a visual selection or any oth
 
 `<selection>? :AIEdit /{role} {instruction}?` - use role to edit
 
+### `:AIImage`
+
+`:AIImage {prompt}` - generate image with prompt
+
+`<selection> :AIImage` - generate image with seleciton
+
+`<selection>? :AI /{role} {instruction}?` - use role to generate
+
+[Pre-defined](./roles-default.ini) image roles: `/hd`, `/natural`
+
 ### `:AIChat`
 
 `:AIChat` - continue or start a new conversation.
@@ -177,6 +189,8 @@ In the documentation below,  `<selection>` denotes a visual selection or any oth
 
 When the AI finishes answering, you can continue the conversation by entering insert mode, adding your prompt, and then using the command `:AIChat` once again.
 
+[Pre-defined](./roles-default.ini) chat roles: `/right`, `/below`, `/tab`
+
 #### `.aichat` files
 
 You can edit and save the chat conversation to an `.aichat` file and restore it later.
@@ -252,7 +266,7 @@ let g:vim_ai_chat = {
 Alternatively you can use special `default` role:
 
 ```ini
-[default]
+[default.chat]
 options.model = o1-preview
 options.stream = 0
 options.temperature = 1
@@ -397,17 +411,29 @@ let g:vim_ai_chat = {
 \  },
 \}
 
-" Notes:
-" ui.paste_mode
-" - if disabled code indentation will work but AI doesn't always respond with a code block
-"   therefore it could be messed up
-" - find out more in vim's help `:help paste`
-" options.max_tokens
-" - note that prompt + max_tokens must be less than model's token limit, see #42, #46
-" - setting max tokens to 0 will exclude it from the OpenAI API request parameters, it is
-"   unclear/undocumented what it exactly does, but it seems to resolve issues when the model
-"   hits token limit, which respond with `OpenAI: HTTPError 400`
-
+" :AIImage
+" - prompt: optional prepended prompt
+" - options: openai config (https://platform.openai.com/docs/api-reference/images/create)
+" - options.request_timeout: request timeout in seconds
+" - options.enable_auth: enable authorization using openai key
+" - options.token_file_path: override global token configuration
+" - options.download_dir: path to image download directory, `cwd` if not defined
+let g:vim_ai_image_default = {
+\  "prompt": "",
+\  "options": {
+\    "model": "dall-e-3",
+\    "endpoint_url": "https://api.openai.com/v1/images/generations",
+\    "quality": "standard",
+\    "size": "1024x1024",
+\    "style": "vivid",
+\    "request_timeout": 20,
+\    "enable_auth": 1,
+\    "token_file_path": "",
+\  },
+\  "ui": {
+\    "download_dir": "",
+\  },
+\}
 
 " custom roles file location
 let g:vim_ai_roles_config_file = s:plugin_root . "/roles-example.ini"
@@ -418,6 +444,17 @@ let g:vim_ai_token_file_path = "~/.config/openai.token"
 " debug settings
 let g:vim_ai_debug = 0
 let g:vim_ai_debug_log_file = "/tmp/vim_ai_debug.log"
+
+" Notes:
+" ui.paste_mode
+" - if disabled code indentation will work but AI doesn't always respond with a code block
+"   therefore it could be messed up
+" - find out more in vim's help `:help paste`
+" options.max_tokens
+" - note that prompt + max_tokens must be less than model's token limit, see #42, #46
+" - setting max tokens to 0 will exclude it from the OpenAI API request parameters, it is
+"   unclear/undocumented what it exactly does, but it seems to resolve issues when the model
+"   hits token limit, which respond with `OpenAI: HTTPError 400`
 ```
 
 ### Using custom API

diff --git a/autoload/vim_ai.vim b/autoload/vim_ai.vim
@@ -13,7 +13,7 @@ let s:last_config = {}
 let s:scratch_buffer_name = ">>> AI chat"
 
 function! s:ImportPythonModules()
-  for py_module in ['utils', 'context', 'chat', 'complete', 'roles']
+  for py_module in ['utils', 'context', 'chat', 'complete', 'roles', 'image']
     if !py3eval("'" . py_module . "_py_imported' in globals()")
       execute "py3file " . s:plugin_root . "/py/" . py_module . ".py"
     endif
@@ -209,6 +209,37 @@ function! vim_ai#AIEditRun(uses_range, config, ...) range abort
   endtry
 endfunction
 
+" Generate image
+" - uses_range   - truty if range passed
+" - config       - function scoped vim_ai_image config
+" - a:1          - optional instruction prompt
+function! vim_ai#AIImageRun(uses_range, config, ...) range abort
+  call s:ImportPythonModules()
+  let l:instruction = a:0 > 0 ? a:1 : ""
+  let l:is_selection = a:uses_range && a:firstline == line("'<") && a:lastline == line("'>")
+  let l:selection = s:GetSelectionOrRange(l:is_selection, a:uses_range, a:firstline, a:lastline)
+
+  let l:config_input = {
+  \  "config_default": g:vim_ai_image,
+  \  "config_extension": a:config,
+  \  "user_instruction": l:instruction,
+  \  "user_selection": l:selection,
+  \  "is_selection": l:is_selection,
+  \  "command_type": 'image',
+  \}
+  let l:context = py3eval("make_ai_context(unwrap('l:config_input'))")
+  let l:config = l:context['config']
+
+  let s:last_command = "image"
+  let s:last_config = a:config
+  let s:last_instruction = l:instruction
+  let s:last_is_selection = l:is_selection
+  let s:last_firstline = a:firstline
+  let s:last_lastline = a:lastline
+
+  py3 run_ai_image(unwrap('l:context'))
+endfunction
+
 function! s:ReuseOrCreateChatWindow(config)
   let l:open_conf = a:config['ui']['open_chat_command']
 
@@ -292,11 +323,15 @@ endfunction
 
 " Repeat last AI command
 function! vim_ai#AIRedoRun() abort
-  undo
+  if s:last_command !=# "image"
+    undo
+  endif
   if s:last_command ==# "complete"
     exe s:last_firstline.",".s:last_lastline . "call vim_ai#AIRun(s:last_is_selection, s:last_config, s:last_instruction)"
   elseif s:last_command ==# "edit"
     exe s:last_firstline.",".s:last_lastline . "call vim_ai#AIEditRun(s:last_is_selection, s:last_config, s:last_instruction)"
+  elseif s:last_command ==# "image"
+    exe s:last_firstline.",".s:last_lastline . "call vim_ai#AIImageRun(s:last_is_selection, s:last_config, s:last_instruction)"
   elseif s:last_command ==# "chat"
     " chat does not need prompt, all information are in the buffer already
     call vim_ai#AIChatRun(0, s:last_config)
@@ -314,6 +349,10 @@ function! vim_ai#RoleCompletionComplete(A,L,P) abort
   return s:RoleCompletion(a:A, 'complete')
 endfunction
 
+function! vim_ai#RoleCompletionImage(A,L,P) abort
+  return s:RoleCompletion(a:A, 'image')
+endfunction
+
 function! vim_ai#RoleCompletionEdit(A,L,P) abort
   return s:RoleCompletion(a:A, 'edit')
 endfunction

diff --git a/autoload/vim_ai_config.vim b/autoload/vim_ai_config.vim
@@ -48,6 +48,22 @@ let g:vim_ai_edit_default = {
 \    "paste_mode": 1,
 \  },
 \}
+let g:vim_ai_image_default = {
+\  "prompt": "",
+\  "options": {
+\    "model": "dall-e-3",
+\    "endpoint_url": "https://api.openai.com/v1/images/generations",
+\    "quality": "standard",
+\    "size": "1024x1024",
+\    "style": "vivid",
+\    "request_timeout": 20,
+\    "enable_auth": 1,
+\    "token_file_path": "",
+\  },
+\  "ui": {
+\    "download_dir": "",
+\  },
+\}
 
 let s:initial_chat_prompt =<< trim END
 >>> system
@@ -122,6 +138,7 @@ endfunction
 
 call s:MakeConfig("vim_ai_chat")
 call s:MakeConfig("vim_ai_complete")
+call s:MakeConfig("vim_ai_image")
 call s:MakeConfig("vim_ai_edit")
 
 function! vim_ai_config#load()

diff --git a/doc/vim-ai.txt b/doc/vim-ai.txt
@@ -150,6 +150,32 @@ Globbing is expanded out via `glob.gob` and relative paths to the current
 working directory (as determined by `getcwd()`) will be resolved to absolute
 paths.
 
+                                                *:AIImage*
+
+<selection>? :AIImage {instruction}? generate image given the selection or
+                                     the instruction
+
+Options: >
+  let g:vim_ai_image_default = {
+  \  "prompt": "",
+  \  "options": {
+  \    "model": "dall-e-3",
+  \    "endpoint_url": "https://api.openai.com/v1/images/generations",
+  \    "quality": "standard",
+  \    "size": "1024x1024",
+  \    "style": "vivid",
+  \    "request_timeout": 20,
+  \    "enable_auth": 1,
+  \    "token_file_path": "",
+  \  },
+  \  "ui": {
+  \    "download_dir": "",
+  \  },
+  \}
+
+Check OpenAI docs for more information:
+https://platform.openai.com/docs/api-reference/images/create
+
                                                 *:AIRedo*
 
 :AIRedo                             repeat last AI command in order to re-try
@@ -182,7 +208,7 @@ a selection of options: >
 
 Alternatively you can use special `default` role: >
 
-  [default]
+  [default.chat]
   options.model=gpt-4
   options.temperature=0.2
 

diff --git a/plugin/vim-ai.vim b/plugin/vim-ai.vim
@@ -7,6 +7,7 @@ endif
 command! -range -nargs=? -complete=customlist,vim_ai#RoleCompletionComplete AI <line1>,<line2>call vim_ai#AIRun(<range>, {}, <q-args>)
 command! -range -nargs=? -complete=customlist,vim_ai#RoleCompletionEdit AIEdit <line1>,<line2>call vim_ai#AIEditRun(<range>, {}, <q-args>)
 command! -range -nargs=? -complete=customlist,vim_ai#RoleCompletionChat AIChat <line1>,<line2>call vim_ai#AIChatRun(<range>, {}, <q-args>)
+command! -range -nargs=? -complete=customlist,vim_ai#RoleCompletionImage AIImage <line1>,<line2>call vim_ai#AIImageRun(<range>, {}, <q-args>)
 command! -nargs=? AINewChat call vim_ai#AINewChatDeprecatedRun(<f-args>)
 command! AIRedo call vim_ai#AIRedoRun()
 command! AIUtilRolesOpen call vim_ai#AIUtilRolesOpen()

diff --git a/py/context.py b/py/context.py
@@ -79,7 +79,7 @@ def load_role_config(role):
 
     enhance_roles_with_custom_function(roles)
 
-    postfixes = ["", ".complete", ".edit", ".chat"]
+    postfixes = ["", ".complete", ".edit", ".chat", ".image"]
     if not any([f"{role}{postfix}" in roles for postfix in postfixes]):
         raise Exception(f"Role `{role}` not found")
 
@@ -91,6 +91,7 @@ def load_role_config(role):
         'role_complete': parse_role_section(roles.get(f"{role}.complete", {})),
         'role_edit': parse_role_section(roles.get(f"{role}.edit", {})),
         'role_chat': parse_role_section(roles.get(f"{role}.chat", {})),
+        'role_image': parse_role_section(roles.get(f"{role}.image", {})),
     }
 
 def parse_role_names(prompt):
@@ -147,7 +148,7 @@ def make_ai_context(params):
 
     user_prompt, role_config = parse_prompt_and_role_config(user_instruction, command_type)
     final_config = merge_deep([config_default, config_extension, role_config])
-    selection_boundary = final_config['options']['selection_boundary']
+    selection_boundary = final_config['options'].get('selection_boundary', '')
     config_prompt = final_config.get('prompt', '')
     prompt = make_prompt(config_prompt, user_prompt, user_selection, selection_boundary)
 

diff --git a/py/image.py b/py/image.py
@@ -0,0 +1,50 @@
+import vim
+import datetime
+import os
+
+image_py_imported = True
+
+def make_openai_image_options(options):
+    return {
+        'model': options['model'],
+        'quality': 'standard',
+        'size': '1024x1024',
+        'style': 'vivid',
+        'response_format': 'b64_json',
+    }
+
+def make_image_path(ui):
+    download_dir = ui.get('download_dir', vim.eval('getcwd()'))
+    timestamp = datetime.datetime.now(datetime.UTC).strftime("%Y%m%dT%H%M%SZ")
+    filename = f'vim_ai_{timestamp}.png'
+    return os.path.join(download_dir, filename)
+
+def run_ai_image(context):
+    prompt = context['prompt']
+    config = context['config']
+    config_options = config['options']
+    ui = config['ui']
+
+    try:
+        if prompt:
+            print('Generating...')
+            openai_options = make_openai_image_options(config_options)
+            http_options = make_http_options(config_options)
+            request = { 'prompt': prompt, **openai_options }
+
+            print_debug("[image] text:\n" + prompt)
+            print_debug("[image] request: {}", request)
+            url = config_options['endpoint_url']
+
+            response, *_ = openai_request(url, request, http_options)
+            print_debug("[image] response: {}", { 'images_count': len(response['data']) })
+
+            path = make_image_path(ui)
+            b64_data = response['data'][0]['b64_json']
+            save_b64_to_file(path, b64_data)
+
+            clear_echo_message()
+            print(f"Image: {path}")
+    except BaseException as error:
+        handle_completion_error(error)
+        print_debug("[image] error: {}", traceback.format_exc())
diff --git a/py/roles.py b/py/roles.py
@@ -13,8 +13,13 @@ def load_ai_role_names(command_type):
     role_names = set()
     for name in roles.sections():
         parts = name.split('.')
-        if len(parts) == 1 or parts[-1] == command_type:
-            role_names.add(parts[0])
+        if command_type == 'image':
+            # special case - image type have to be explicitely defined
+            if len(parts) > 1 and parts[-1] == command_type:
+                role_names.add(parts[0])
+        else:
+            if len(parts) == 1 or parts[-1] == command_type:
+                role_names.add(parts[0])
 
     role_names = [name for name in role_names if name != DEFAULT_ROLE_NAME]
 

diff --git a/py/utils.py b/py/utils.py
@@ -250,7 +250,7 @@ def openai_request(url, data, options):
     )
 
     with urllib.request.urlopen(req, timeout=request_timeout) as response:
-        if not data['stream']:
+        if not data.get('stream', 0):
             yield json.loads(response.read().decode())
             return
         for line_bytes in response:
@@ -354,3 +354,8 @@ def read_role_files():
     roles = configparser.ConfigParser()
     roles.read([default_roles_config_path, roles_config_path])
     return roles
+
+def save_b64_to_file(path, b64_data):
+    f = open(path, "wb")
+    f.write(base64.b64decode(b64_data))
+    f.close()