Cargo.toml

[package]
name = "llama2_rs"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
clap = { version = "4.3.23", features = ["derive"] }
memmap2 = "0.7"
rayon = "1.5"
pyo3 = { version = "0.19.0", optional = true}
half = { version = "2.3.1", optional = true}
cust = { version = "0.3.2", optional = true}

[profile.release]
debug = true

[lib]
name = "llama2_rs"
crate-type = ["rlib", "cdylib"]
path = "src/lib.rs"

[[bin]]
name = "llama2_rs"
path = "src/main.rs"

[features]
# Choose between Llama2 models - these are mutually exclusive
7B = []
13B = []
70B = []

# Choose group size - these are mutually exclusive
group_32 = []
group_64 = []
group_128 = []

# Use gpu
gpu = ["cust", "half"]

# currently enables Q_4 quantization
quantized = []


default = []
model_size = []
python = ["pyo3"]