From fedebadc8114a7cbde76161da82748c91b200d3a Mon Sep 17 00:00:00 2001 From: kingbri Date: Tue, 6 Feb 2024 14:48:42 -0500 Subject: [PATCH] Model: Fix generate window fallback Use max_seq_len as the numerator, not the max_tokens. Mismatched parameter. Signed-off-by: kingbri --- backends/exllamav2/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index 84c6724b..2088121c 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -515,7 +515,7 @@ def generate_gen(self, prompt: str, **kwargs): max_tokens = unwrap(kwargs.get("max_tokens"), 150) stream_interval = unwrap(kwargs.get("stream_interval"), 0) generate_window = max( - unwrap(kwargs.get("generate_window"), 512), max_tokens // 8 + unwrap(kwargs.get("generate_window"), 512), self.config.max_seq_len // 8 ) # Sampler settings