Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CLI #60

Merged
merged 5 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,6 @@ eslint.config.mjs
LICENSE
volumes
docker-compose*
Makefile
Makefile
setup
generate_production_env.html
8 changes: 4 additions & 4 deletions .env
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
APP_PORT=8000
APP_EXPOSE_PORT=8000
ENG_ACCESS_PORT=8080
MODEL_SAVE_PATH=volumes/models
INFERENCE_ENG=llamacpp
INFERENCE_ENG_PORT=8080
INFERENCE_ENG_VERSION=server--b1-2321a5e
INFERENCE_ENG_VERSION=server--b1-27d4b7c
NUM_CPU_CORES=8.00
NUM_THREADS_COUNT=8
NUM_THREADS_COUNT=8.00
EMBEDDING_ENG=embedding_eng
EMBEDDING_ENG_PORT=8081
NUM_CPU_CORES_EMBEDDING=4.00
NUM_THREAD_COUNTS_EMBEDDING=4.00
LANGUAGE_MODEL_NAME=Phi3-mini-4k-instruct-Q4.gguf
LANGUAGE_MODEL_URL=https://huggingface.co/aisuko/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi3-mini-4k-instruct-Q4.gguf?download=true
EMBEDDING_MODEL_NAME=all-MiniLM-L6-v2-Q4_K_M-v2.gguf
EMBEDDING_MODEL_URL=https://huggingface.co/aisuko/all-MiniLM-L6-v2-gguf/resolve/main/all-MiniLM-L6-v2-Q4_K_M-v2.gguf?download=true
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
node_modules
.git
volumes
__pycache__
__pycache__
setup/setup
26 changes: 18 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# project related
PROJECT_NAME:=voyager
CONTAINER_NAME:=voyager:v0.1.0
CONTAINER_NAME:=voyager:v0.2.0
APP_PORT:=8000
APP_EXPOSE_PORT:=8000
# compose build related
ENV_FILE:=.env

Expand All @@ -10,15 +11,16 @@ MODEL_SAVE_PATH:=volumes/models

INFERENCE_ENG:=llamacpp
INFERENCE_ENG_PORT:=8080
INFERENCE_ENG_VERSION:=server--b1-2321a5e
INFERENCE_ENG_VERSION:=server--b1-27d4b7c
NUM_CPU_CORES:=8.00
NUM_THREADS_COUNT:=8
NUM_THREADS_COUNT:=8.00

EMBEDDING_ENG:=embedding_eng
EMBEDDING_ENG_PORT:=8081
NUM_CPU_CORES_EMBEDDING:=4.00
LANGUAGE_MODEL_NAME:=Phi3-mini-4k-instruct-Q4.gguf
LANGUAGE_MODEL_URL:=https://huggingface.co/aisuko/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi3-mini-4k-instruct-Q4.gguf?download=true
NUM_THREAD_COUNTS_EMBEDDING:=4.00
LANGUAGE_MODEL_NAME:=ft-smollm-135M-instruct-on-hf-ultrafeedback-f16.gguf
LANGUAGE_MODEL_URL:=https://huggingface.co/aisuko/ft-smollm-135M-instruct-on-hf-ultrafeedback-gguf/resolve/main/ft-smollm-135M-instruct-on-hf-ultrafeedback-f16.gguf
EMBEDDING_MODEL_NAME:=all-MiniLM-L6-v2-Q4_K_M-v2.gguf
EMBEDDING_MODEL_URL:=https://huggingface.co/aisuko/all-MiniLM-L6-v2-gguf/resolve/main/all-MiniLM-L6-v2-Q4_K_M-v2.gguf?download=true

Expand All @@ -35,6 +37,7 @@ run: build
.PHONY: env
env:
@echo "APP_PORT=$(APP_PORT)"> $(ENV_FILE)
@echo "APP_EXPOSE_PORT=$(APP_EXPOSE_PORT)">> $(ENV_FILE)
@echo "ENG_ACCESS_PORT=$(ENG_ACCESS_PORT)">> $(ENV_FILE)
@echo "MODEL_SAVE_PATH=$(MODEL_SAVE_PATH)">> $(ENV_FILE)
@echo "INFERENCE_ENG=$(INFERENCE_ENG)">> $(ENV_FILE)
Expand All @@ -45,10 +48,9 @@ env:
@echo "EMBEDDING_ENG=$(EMBEDDING_ENG)">> $(ENV_FILE)
@echo "EMBEDDING_ENG_PORT=$(EMBEDDING_ENG_PORT)">> $(ENV_FILE)
@echo "NUM_CPU_CORES_EMBEDDING=$(NUM_CPU_CORES_EMBEDDING)">> $(ENV_FILE)
@echo "NUM_THREAD_COUNTS_EMBEDDING=$(NUM_THREAD_COUNTS_EMBEDDING)">> $(ENV_FILE)
@echo "LANGUAGE_MODEL_NAME=$(LANGUAGE_MODEL_NAME)">> $(ENV_FILE)
@echo "LANGUAGE_MODEL_URL=$(LANGUAGE_MODEL_URL)">> $(ENV_FILE)
@echo "EMBEDDING_MODEL_NAME=$(EMBEDDING_MODEL_NAME)">> $(ENV_FILE)
@echo "EMBEDDING_MODEL_URL=$(EMBEDDING_MODEL_URL)">> $(ENV_FILE)

.PHONY: model-prepare
model-prepare:
Expand Down Expand Up @@ -83,4 +85,12 @@ stop:

.PHONY: pytest
pytest:
@python3 -m pytest -v
@python3 -m pytest -v

#########################################################################################
# setup

.PHONY: setup
setup:
gcc setup/setup.c -o setup/setup
setup/setup
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@ The project is OpenAI-like API service of SkywardAI ecosystem.
https://github.com/user-attachments/assets/2b8f1ea7-0aca-44ea-b218-eff8e1769729


### CLI 💥
Introducing our new CLI tool!
> Make sure you can normally run `make`, `docker compose`, `gcc`, `sh` in your host machine.

Simply run `make setup` in the root folder to compile & run the CLI tool.

Don't want to set? Directly go to `Save & Build` menu in it and use `Build and start the server` option to load the app in default settings.

No gcc compiler? You can choose to compile the file `/setup/setup.c` yourself.

Explore it yourself to find more settings!


### Local Machine
* Please make sure you installed `Node.js` on your local machine.
Expand Down
2 changes: 1 addition & 1 deletion database/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ export async function initDB(force = false) {
// create or re-open api key table
await db.createEmptyTable(API_KEY_TABLE, new Schema([
new Field("api_key", new Utf8()),
new Field("usage", new Int32()),
new Field("usage", new Int32())
]), open_options);
}

Expand Down
46 changes: 46 additions & 0 deletions docker-compose-adv.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
services:
llamacpp:
container_name: ${INFERENCE_ENG}
image: gclub/llama.cpp:${INFERENCE_ENG_VERSION}
restart: always
deploy: # https://github.com/compose-spec/compose-spec/blob/master/deploy.md
resources:
reservations:
cpus: "${NUM_CPU_CORES}"
volumes:
- "${DOCKER_VOLUME_DIRECTORY:-.}/${MODEL_SAVE_PATH}:/models"
expose:
- ${ENG_ACCESS_PORT}
ports:
- ${INFERENCE_ENG_PORT}:${ENG_ACCESS_PORT}
command: ["-m", "models/${LANGUAGE_MODEL_NAME}","-t","${NUM_THREADS_COUNT}","-c","8192"]

embedding_eng:
container_name: ${EMBEDDING_ENG}
image: gclub/llama.cpp:${INFERENCE_ENG_VERSION}
restart: always
deploy: # https://github.com/compose-spec/compose-spec/blob/master/deploy.md
resources:
reservations:
cpus: "${NUM_CPU_CORES_EMBEDDING}"
volumes:
- "${DOCKER_VOLUME_DIRECTORY:-.}/${MODEL_SAVE_PATH}:/models"
expose:
- ${ENG_ACCESS_PORT}
ports:
- ${EMBEDDING_ENG_PORT}:${ENG_ACCESS_PORT}
command: ["-m", "models/${EMBEDDING_MODEL_NAME}","--embeddings","--pooling","mean","-t","${NUM_THREAD_COUNTS_EMBEDDING}","-c","512"]

voyager:
container_name: voyager
restart: always
build:
dockerfile: setup/Dockerfile
context: .
expose:
- ${APP_EXPOSE_PORT}
ports:
- ${APP_EXPOSE_PORT}:${APP_PORT}
depends_on:
- llamacpp
- embedding_eng
7 changes: 7 additions & 0 deletions setup/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM node:20.15.1-slim
WORKDIR /app
COPY . .

HEALTHCHECK --interval=300s --timeout=30s --start-period=5s --retries=3 CMD [ "node", "healthy-check.js" ]
RUN npm install -g pnpm && pnpm install
ENTRYPOINT [ "npm", "start" ]
46 changes: 46 additions & 0 deletions setup/config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#include "setup_types.h"

#ifndef SETUP_CONFIG_H
#define SETUP_CONFIG_H

#define INFERENCE_MODEL_NAME "Phi3-mini-4k-instruct-Q4.gguf"
#define INFERENCE_MODEL_URL "https://huggingface.co/aisuko/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi3-mini-4k-instruct-Q4.gguf"
#define INFERENCE_CPU_CORES 8.00
#define INFERENCE_THREAD_COUNTS 8.00

#define EMBEDDING_CPU_CORES 4.00
#define EMBEDDING_THREAD_COUNTS 4.00

#define ALLOW_ORIGIN_NAME "*"

#define HTTPS_ENABLED 0
#define HTTPS_CERT_PATH_HOST "*"
#define HTTPS_CERT_PATH_CONTAINER "*"
#define HTTPS_CERT_NAME "cert.pem"
#define HTTPS_PRIVKEY_NAME "privkey.pem"
#define HTTPS_CA_NAME "chain.pem"
#define APP_EXPOSE_PORT "8000"

#define PLUGIN_ENABLED 0

#define SYSTEM_INSTRUCTION "*"

#define STATIC_API_KEY_ENABLED 0
#define STATIC_API_KEY "*"

#define DEFAULT_DATASET_ENABLED 0
#define DEFAULT_DATASET_NAME "production_dataset"

#define API_INDEX_DOC_ENABLED 1
#define API_INDEX_STATS_ENABLED 1
#define API_INDEX_HEALTHY_ENABLED 1
#define API_INFERENCE_COMP_ENABLED 1
#define API_INFERENCE_RAG_ENABLED 1
#define API_TOKEN_ENABLED 1
#define API_EMBEDDING_CALC_ENABLED 1
#define API_EMBEDDING_DS_ENABLED 1
#define API_VERSION_ENABLED 1

#define DEV_MODE_ENABLED 0

#endif
46 changes: 46 additions & 0 deletions setup/default_config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#include "setup_types.h"

#ifndef SETUP_DEFAULT_CONFIG_H
#define SETUP_DEFAULT_CONFIG_H

#define DEFAULT_INFERENCE_MODEL_NAME MODEL_PHI_NAME
#define DEFAULT_INFERENCE_MODEL_URL MODEL_PHI_URL
#define DEFAULT_INFERENCE_CPU_CORES 8.00
#define DEFAULT_INFERENCE_THREAD_COUNTS 8.00

#define DEFAULT_EMBEDDING_CPU_CORES 4.00
#define DEFAULT_EMBEDDING_THREAD_COUNTS 4.00

#define DEFAULT_ALLOW_ORIGIN_NAME "*"

#define DEFAULT_HTTPS_ENABLED 0
#define DEFAULT_HTTPS_CERT_PATH_HOST "*"
#define DEFAULT_HTTPS_CERT_PATH_CONTAINER "*"
#define DEFAULT_HTTPS_CERT_NAME "cert.pem"
#define DEFAULT_HTTPS_PRIVKEY_NAME "privkey.pem"
#define DEFAULT_HTTPS_CA_NAME "chain.pem"
#define DEFAULT_APP_EXPOSE_PORT "8000"

#define DEFAULT_PLUGIN_ENABLED 0

#define DEFAULT_SYSTEM_INSTRUCTION "*"

#define DEFAULT_STATIC_API_KEY_ENABLED 0
#define DEFAULT_STATIC_API_KEY "*"

#define DEFAULT_DEFAULT_DATASET_ENABLED 0
#define DEFAULT_DEFAULT_DATASET_NAME "production_dataset"

#define DEFAULT_API_INDEX_DOC_ENABLED 1
#define DEFAULT_API_INDEX_STATS_ENABLED 1
#define DEFAULT_API_INDEX_HEALTHY_ENABLED 1
#define DEFAULT_API_INFERENCE_COMP_ENABLED 1
#define DEFAULT_API_INFERENCE_RAG_ENABLED 1
#define DEFAULT_API_TOKEN_ENABLED 1
#define DEFAULT_API_EMBEDDING_CALC_ENABLED 1
#define DEFAULT_API_EMBEDDING_DS_ENABLED 1
#define DEFAULT_API_VERSION_ENABLED 1

#define DEFAULT_DEV_MODE_ENABLED 0

#endif
Loading
Loading