Skip to content

Commit

Permalink
Breakthrough in prepareChunks. Needs Chunk.lineNo
Browse files Browse the repository at this point in the history
  • Loading branch information
gvanrossum-ms committed Feb 2, 2025
1 parent 94ab4c5 commit 4141fd0
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 12 deletions.
1 change: 1 addition & 0 deletions ts/packages/agents/spelunker/src/chunkSchema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export interface Chunk {
parentId: ChunkId;
children: ChunkId[];
fileName: string; // Set upon receiving end from ChunkedFile.fileName.
lineNo: number; // 1-based, calculated from first blob.
docs?: FileDocumentation; // Computed later by fileDocumenter.
}

Expand Down
3 changes: 3 additions & 0 deletions ts/packages/agents/spelunker/src/pythonChunker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ export async function chunkifyPythonFiles(
if (!("error" in result)) {
for (const chunk of result.chunks) {
chunk.fileName = result.fileName;
chunk.lineNo = chunk.blobs.length
? chunk.blobs[0].start + 1
: 1;
}
}
}
Expand Down
59 changes: 55 additions & 4 deletions ts/packages/agents/spelunker/src/searchCode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ function createQueryContext(): QueryContext {
mode: 0o700,
};
fs.mkdirSync(databaseFolder, mkdirOptions);
const databaseLocation = path.join(databaseFolder, "codeSearchdatabase.db");
const databaseLocation = path.join(databaseFolder, "codeSearchDatabase.db");
const database = undefined;
return {
chatModel,
Expand Down Expand Up @@ -125,6 +125,15 @@ export async function searchCode(
const blobRows: any[] = db
.prepare(`SELECT * FROM blobs WHERE chunkId = ?`)
.all(chunkRow.chunkId);
for (const blob of blobRows) {
blob.lines = blob.lines.match(/.*(?:\r?\n|$)/g) ?? [];
while (
blob.lines.length &&
!blob.lines[blob.lines.length - 1].trim()
) {
blob.lines.pop();
}
}
const childRows: any[] = db
.prepare(`SELECT * FROM chunks WHERE parentId = ?`)
.all(chunkRow.chunkId);
Expand All @@ -136,6 +145,7 @@ export async function searchCode(
parentId: chunkRow.parentId,
children: childRows.map((row) => row.chunkId),
fileName: chunkRow.fileName,
lineNo: chunkRow.lineNo,
};
allChunks.push(chunk);
}
Expand Down Expand Up @@ -347,7 +357,43 @@ async function selectRelevantChunks(

function prepareChunks(chunks: Chunk[]): string {
// TODO: Format the chunks more efficiently
return JSON.stringify(chunks, undefined, 2);
// return JSON.stringify(chunks, undefined, 2);
chunks.sort(
// Sort by file name and chunk ID (should order by line number)
(a, b) => {
let cmp = a.fileName.localeCompare(b.fileName);
if (!cmp) {
cmp = a.chunkId.localeCompare(b.chunkId);
}
return cmp;
},
);
const output: string[] = [];
function put(line: string): void {
// console_log(line.trimEnd());
output.push(line);
}
let lastFn = "";
let lineNo = 0;
for (const chunk of chunks) {
if (chunk.fileName !== lastFn) {
lastFn = chunk.fileName;
lineNo = 0;
put("\n");
put(`** file=${chunk.fileName}\n`);
}
put(
`* chunkId=${chunk.chunkId} kind=${chunk.treeName} name=${chunk.codeName}\n`,
);
for (const blob of chunk.blobs) {
lineNo = blob.start;
for (const line of blob.lines) {
lineNo += 1;
put(`${lineNo} ${line}`);
}
}
}
return output.join("");
}

function prepareSummaries(db: sqlite.Database): string {
Expand Down Expand Up @@ -462,7 +508,7 @@ async function loadDatabase(
`SELECT COUNT(*) FROM Chunks WHERE fileName = ?`,
);
const prepInsertChunks = db.prepare(
`INSERT OR REPLACE INTO Chunks (chunkId, treeName, codeName, parentId, fileName) VALUES (?, ?, ?, ?, ?)`,
`INSERT OR REPLACE INTO Chunks (chunkId, treeName, codeName, parentId, fileName, lineNo) VALUES (?, ?, ?, ?, ?, ?)`,
);
const prepInsertBlobs = db.prepare(
`INSERT INTO Blobs (chunkId, start, lines, breadcrumb) VALUES (?, ?, ?, ?)`,
Expand Down Expand Up @@ -577,13 +623,17 @@ async function loadDatabase(
if (!chunk.fileName) {
throw new Error(`Chunk ${chunk.chunkId} has no fileName`);
}
if (!chunk.lineNo) {
throw new Error(`Chunk ${chunk.fileName} has no lineNo`);
}
allChunks.push(chunk);
prepInsertChunks.run(
chunk.chunkId,
chunk.treeName,
chunk.codeName,
chunk.parentId || null,
chunk.fileName,
chunk.lineNo,
);
for (const blob of chunk.blobs) {
prepInsertBlobs.run(
Expand Down Expand Up @@ -619,7 +669,8 @@ CREATE TABLE IF NOT EXISTS Chunks (
treeName TEXT NOT NULL,
codeName TEXT NOT NULL,
parentId TEXT KEY REFERENCES chunks(chunkId), -- May be null
fileName TEXT KEY REFERENCES files(fileName) NOT NULL
fileName TEXT KEY REFERENCES files(fileName) NOT NULL,
lineNo INTEGER NOT NULL -- 1-based
);
CREATE TABLE IF NOT EXISTS Blobs (
chunkId TEXT KEY REFERENCES chunks(chunkId) NOT NULL,
Expand Down
27 changes: 19 additions & 8 deletions ts/packages/agents/spelunker/src/typescriptChunker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,16 @@ export async function chunkifyTypeScriptFiles(
const baseName = path.basename(fileName);
const extName = path.extname(fileName);
const codeName = baseName.slice(0, -extName.length || undefined);
const blobs: Blob[] = [
{ start: 0, lines: sourceFile.text.match(/.*(?:\r?\n|$)/g) || [] },
];
const blob: Blob = {
start: 0,
lines: sourceFile.text.match(/.*(?:\r?\n|$)/g) || [],
};
while (blob.lines.length && !blob.lines[0].trim()) {
blob.lines.shift();
blob.start++;
}
const blobs: Blob[] = [blob];
const lineNo = blobs.length ? blobs[0].start + 1 : 1;
const rootChunk: Chunk = {
chunkId: generate_id(),
treeName: "file",
Expand All @@ -47,6 +54,7 @@ export async function chunkifyTypeScriptFiles(
parentId: "",
children: [],
fileName,
lineNo,
};
const chunks: Chunk[] = [rootChunk];
chunks.push(...recursivelyChunkify(sourceFile, rootChunk));
Expand Down Expand Up @@ -74,18 +82,21 @@ export async function chunkifyTypeScriptFiles(
// );
const treeName = ts.SyntaxKind[childNode.kind];
const codeName = tsCode.getStatementName(childNode) ?? "";
const blobs = makeBlobs(
sourceFile,
childNode.getFullStart(),
childNode.getEnd(),
);
const lineNo = blobs.length ? blobs[0].start + 1 : 1;
const childChunk: Chunk = {
chunkId: generate_id(),
treeName,
codeName,
blobs: makeBlobs(
sourceFile,
childNode.getFullStart(),
childNode.getEnd(),
),
blobs,
parentId: parentChunk.chunkId,
children: [],
fileName,
lineNo,
};
spliceBlobs(parentChunk, childChunk);
chunks.push(childChunk);
Expand Down

0 comments on commit 4141fd0

Please sign in to comment.