From 4618278c7330692df91aab4129f91f1a867329a4 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 10 Feb 2025 14:08:54 -0800 Subject: [PATCH] Remove dead code from pythonChunker.ts --- .../agents/spelunker/src/pythonChunker.ts | 106 +----------------- 1 file changed, 1 insertion(+), 105 deletions(-) diff --git a/ts/packages/agents/spelunker/src/pythonChunker.ts b/ts/packages/agents/spelunker/src/pythonChunker.ts index 90301651..c3285581 100644 --- a/ts/packages/agents/spelunker/src/pythonChunker.ts +++ b/ts/packages/agents/spelunker/src/pythonChunker.ts @@ -9,12 +9,7 @@ import path from "path"; import { fileURLToPath } from "url"; import { promisify } from "util"; -import { - ChunkId, - Chunk, - ChunkedFile, - ChunkerErrorItem, -} from "./chunkSchema.js"; +import { ChunkedFile, ChunkerErrorItem } from "./chunkSchema.js"; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); @@ -65,104 +60,5 @@ export async function chunkifyPythonFiles( } } } - if (true) return results; - else return splitLargeFiles(results); // Alas, this breaks some things. -} - -// *** Everything below here is potentially dead code.*** - -// TODO: Tune these constants (count should be much larger, at least). -const CHUNK_COUNT_LIMIT = 25; // How many chunks at most. -const FILE_SIZE_LIMIT = 25000; // How many characters at most. - -function splitLargeFiles( - items: (ChunkedFile | ChunkerErrorItem)[], -): (ChunkedFile | ChunkerErrorItem)[] { - const results: (ChunkedFile | ChunkerErrorItem)[] = []; - for (const item of items) { - if ( - "error" in item || - (item.chunks.length <= CHUNK_COUNT_LIMIT && - fileSize(item) <= FILE_SIZE_LIMIT) - ) { - results.push(item); - } else { - results.push(...splitFile(item)); - } - } - return results; -} - -// This algorithm is too complex. I needed a debugger and logging to get it right. -function splitFile(file: ChunkedFile): ChunkedFile[] { - const fileName = file.fileName; - const parentMap: Map = new Map(); - for (const chunk of file.chunks) { - // Only nodes with children will be looked up in this map. - if (chunk.children.length) parentMap.set(chunk.chunkId, chunk); - } - - const results: ChunkedFile[] = []; // Where output accumulates. - let chunks = Array.from(file.chunks); // The chunks yet to emit. - let minNumChunks = 1; - - outer: while (true) { - // Keep going until we exit the inner loop. - let totalSize = 0; // Size in characters of chunks to be output. - for (let i = 0; i < chunks.length; i++) { - // Iterate in pre-order. - const currentChunk = chunks[i]; - const size = chunkSize(currentChunk); - if ( - i < minNumChunks || - (i < CHUNK_COUNT_LIMIT && totalSize + size <= FILE_SIZE_LIMIT) - ) { - totalSize += size; - continue; - } - - // Split the file here (current chunk goes into ancestors). - const rest = chunks.splice(i); - if (rest.shift() !== currentChunk) - throw Error( - "Internal error: expected current chunk at head of rest", - ); - results.push({ fileName, chunks }); - const ancestors: Chunk[] = []; - - let c: Chunk | undefined = currentChunk; - do { - ancestors.unshift(c); - c = parentMap.get(c.parentId); - } while (c); - // Note that the current chunk is the last ancestor. - chunks = [...ancestors, ...rest]; - minNumChunks = ancestors.length; - continue outer; - } - // Append the final chunk. - results.push({ fileName, chunks }); - break; - } - // console.log( - // `Split ${file.fileName} (${file.chunks.length} chunks) into ${results.length} files.`, - // ); - // console.log(`Sizes: ${results.map((f) => f.chunks.length).join(", ")}`); return results; } - -function fileSize(file: ChunkedFile): number { - return file.chunks.reduce((acc, chunk) => acc + chunkSize(chunk), 0); -} - -function chunkSize(chunk: Chunk): number { - let totalCharacters = 0; - for (const blob of chunk.blobs) { - if (!blob.breadcrumb) { - for (const line of blob.lines) { - totalCharacters += line.length; - } - } - } - return totalCharacters; -}