diff --git a/ts/packages/agentSdk/src/helpers/actionHelpers.ts b/ts/packages/agentSdk/src/helpers/actionHelpers.ts
index efc4c964e..e2fb7cdd7 100644
--- a/ts/packages/agentSdk/src/helpers/actionHelpers.ts
+++ b/ts/packages/agentSdk/src/helpers/actionHelpers.ts
@@ -76,6 +76,19 @@ export function createActionResultFromHtmlDisplayWithScript(
     };
 }
 
+export function createActionResultFromMarkdownDisplay(
+    literalText: string,
+    entities: Entity[] = [],
+    resultEntity?: Entity,
+): ActionResultSuccess {
+    return {
+        literalText,
+        entities,
+        resultEntity,
+        displayContent: { type: "markdown", content: literalText },
+    };
+}
+
 export function createActionResultFromError(error: string): ActionResultError {
     return {
         error,
diff --git a/ts/packages/agents/spelunker/design.md b/ts/packages/agents/spelunker/design.md
index 205c2b7df..5c7665e83 100644
--- a/ts/packages/agents/spelunker/design.md
+++ b/ts/packages/agents/spelunker/design.md
@@ -16,22 +16,36 @@ Questions about the focused code base are answered roughly as follows:
 
 1. Gather all relevant source files. (E.g. `**/*.{py,ts}`)
 2. Chunkify locally (using chunker.py or typescriptChunker.ts)
-3. Send batches of chunks, in parallel, to a cheap, fast LLM
+3. Send batches of chunks, in parallel batches, to a cheap, fast LLM
+   with a prompt asking to summarize each chunk.
+
+(Note that 1-3 need to be done only for new or changed files.)
+
+4. Send batches of chunks, in parallel batches, to a cheap, fast LLM
    with a prompt asking it to find chunks relevant to the user question.
-4. Sort by relevance, keep top `N`. (E.g. `N = 30`)
-5. Send the selected chunks as context to a smart model (the "oracle")
+5. Sort selected chunks by relevance, keep top _N_.
+   (_N_ is dynamically computed to fit in the oracle prompt size limit.)
+6. Send the _N_ top selected chunks as context to a smart model ("the oracle")
    with the request to answer the user question using those chunks as context.
-6. Construct a result from the answer and the chunks used to come up with it.
+7. Construct a result from the answer and the chunks used to come up with it
+   ("references").
 
 ## How easy is it to target other languages?
 
 - Need a chunker for each language; the rest is the same.
-- Chunking TypeScript was, realistically, a week's work.
+- Chunking TypeScript was, realistically, a week's work, so not too terrible.
+
+## Latest changes
+
+The summaries are (so far, only) used to update so-called "breadcrumb" blobs
+(placeholders for sub-chunks) to make the placeholder text look better
+(a comment plus the full signature, rather than just e.g. `def foo ...`).
 
 ## TO DO
 
 - Prompt engineering (borrow from John Lam?)
 - Evaluation of selection process (does the model do a good enough job?)
-- Scaling. It takes 60-80 seconds to select from ~4000 chunks.
-- Do we need a "global index" (of summaries) like John Lam's ask.py?
+- Scaling. It takes 20-50 seconds to select from ~4000 chunks (and $5).
+  About the same to summarize that number of chunks.
+- Do we need to send a "global index" (of summaries) like John Lam's ask.py?
   How to make that scale?
diff --git a/ts/packages/agents/spelunker/scaling.md b/ts/packages/agents/spelunker/scaling.md
new file mode 100644
index 000000000..37b7a3b6f
--- /dev/null
+++ b/ts/packages/agents/spelunker/scaling.md
@@ -0,0 +1,56 @@
+# Scaling ideas
+
+These are very unformed thoughts.
+
+## Local indexing with fuzzy matching
+
+Directly after chunking, add embeddings for all chunks, just based on the code alone.
+(Yes I know that's pretty lame, but it's what we can do without summarizing all chunks.)
+
+Whenever a question is asked, _first_ search the embeddings for _k_ nearest neighbors,
+where _k_ is pretty large (maybe start with 1000).
+Then pass those chunks on to the usual AI-driven selection process.
+
+Do we still need summaries if we do this? How would they be used?
+(Possibly we could generate summaries for the query context on demand.)
+
+### Implementation planning
+
+- For now, skip the summarization phase.
+- Copy vectorTable.ts from _examples/memoryProviders_ (which IMO isn't a real package).
+- Maybe remove stuff we don't need, e.g. generics over `ValueType` and the other weird thing.
+- Keep using `interface typeagent.VectorStore<ChunkId>` and put creation in one place.
+- Add another file defining an `async` function to get an embedding (probably needs a model).
+- After we've got `allChunks` filled (with all the chunks), batch compute and insert
+  embeddings for each chunks into the vectore store.
+- When prepping for a question, instead of sending all chunks off for selection,
+  get the query's embedding and request a generous k nearest neighbors, and send _those_
+  off to the selection process. Let's start with _k_=1000, and then see if reducing it
+  by half or doubling by two makes much of a difference.
+- The rest is the same.
+
+### Again, with feeling
+
+- Copy `vectorTable` from _examples/memoryProviders_, change to pass in the Database object.
+  (We could import sqlite from memory-providers, but then the embeddings are in a different database.)
+- BETTER: `import { sqlite } from "memory-providers"` and add a createStorageFromDb method.
+- EVEN BETTER: Just extract the nearest-neighbors algorithm and do the rest myself. memory-providers is obsolete anyways.
+- Create an embedding model when we initialize `QueryContext` (and put it there).
+  (Look in old spelunker for example code.)
+- Create a table named `ChunkEmbeddings (chunkId TEXT PRIMARY KEY, ebedding BLOB)` when creating the db.
+- Use `generateTextEmbeddings` or `generateEmbedding` from `typeagent` to get embedding(s).
+  Those are async and not free and might fail, but generally pretty reliable.
+  (There are retry versions too if we need them.)
+- IIUC these normalize, so we can use dot product instead of cosine similarity.
+- Skip the summarizing step. (Keep the code and the Summaries table, we may need them later.)
+- Manage embeddings as chunks are removed and added. Probably have to add something
+  to remove all embeddings that reference a chunk for a given file (like we do for blobs).
+- When processing a query, before the selection step, slim down the chunks using embeddings:
+  - Get the embedding for the user query
+  - Call `nearestNeighbors` on the `VectorTable`
+  - Only read the selected chunk IDs from the Chunks table.
+
+### TODO
+
+- When fewer than maxConcurrency batches, create more batches and distribute evenly.
+  (I have an algorithm in mind, this can go in `makeBatches`.)
diff --git a/ts/packages/agents/spelunker/src/batching.ts b/ts/packages/agents/spelunker/src/batching.ts
new file mode 100644
index 000000000..e47feab04
--- /dev/null
+++ b/ts/packages/agents/spelunker/src/batching.ts
@@ -0,0 +1,68 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+import { Chunk } from "./chunkSchema.js";
+import { console_log } from "./logging.js";
+import { ChunkDescription } from "./selectorSchema.js";
+
+export function makeBatches(
+    chunks: Chunk[],
+    batchSize: number, // In characters
+    maxChunks: number, // How many chunks at most per batch
+): Chunk[][] {
+    const batches: Chunk[][] = [];
+    let batch: Chunk[] = [];
+    let size = 0;
+    function flush(): void {
+        batches.push(batch);
+        console_log(
+            `    [Batch ${batches.length} has ${batch.length} chunks and ${size} characters]`,
+        );
+        batch = [];
+        size = 0;
+    }
+    for (const chunk of chunks) {
+        const chunkSize = getChunkSize(chunk);
+        if (
+            size &&
+            (size + chunkSize > batchSize || batch.length >= maxChunks)
+        ) {
+            flush();
+        }
+        batch.push(chunk);
+        size += chunkSize;
+    }
+    if (size) {
+        flush();
+    }
+    return batches;
+}
+
+export function keepBestChunks(
+    chunkDescs: ChunkDescription[], // Sorted by descending relevance
+    allChunks: Chunk[],
+    batchSize: number, // In characters
+): Chunk[] {
+    const chunks: Chunk[] = [];
+    let size = 0;
+    for (const chunkDesc of chunkDescs) {
+        const chunk = allChunks.find((c) => c.chunkId === chunkDesc.chunkId);
+        if (!chunk) continue;
+        const chunkSize = getChunkSize(chunk);
+        if (size + chunkSize > batchSize && chunks.length) {
+            break;
+        }
+        chunks.push(chunk);
+        size += chunkSize;
+    }
+    return chunks;
+}
+
+function getChunkSize(chunk: Chunk): number {
+    // This is all an approximation
+    let size = chunk.fileName.length + 50;
+    for (const blob of chunk.blobs) {
+        size += blob.lines.join("").length + 4 * blob.lines.length;
+    }
+    return size;
+}
diff --git a/ts/packages/agents/spelunker/src/databaseUtils.ts b/ts/packages/agents/spelunker/src/databaseUtils.ts
new file mode 100644
index 000000000..6da2df4c1
--- /dev/null
+++ b/ts/packages/agents/spelunker/src/databaseUtils.ts
@@ -0,0 +1,119 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+import * as fs from "fs";
+import * as path from "path";
+import { createRequire } from "module";
+
+import Database, * as sqlite from "better-sqlite3";
+
+import { SpelunkerContext } from "./spelunkerActionHandler.js";
+
+import { console_log } from "./logging.js";
+
+const databaseSchema = `
+CREATE TABLE IF NOT EXISTS Files (
+    fileName TEXT PRIMARY KEY,
+    mtime FLOAT NOT NULL,
+    size INTEGER NOT NULL
+);
+CREATE TABLE IF NOT EXISTS Chunks (
+    chunkId TEXT PRIMARY KEY,
+    treeName TEXT NOT NULL,
+    codeName TEXT NOT NULL,
+    parentId TEXT KEY REFERENCES Chunks(chunkId), -- May be null
+    fileName TEXT KEY REFERENCES files(fileName) NOT NULL,
+    lineNo INTEGER NOT NULL -- 1-based
+);
+CREATE TABLE IF NOT EXISTS Blobs (
+    chunkId TEXT KEY REFERENCES Chunks(chunkId) NOT NULL,
+    start INTEGER NOT NULL, -- 0-based
+    lines TEXT NOT NULL,
+    breadcrumb TEXT -- Chunk ID or empty string or NULL
+);
+CREATE TABLE IF NOT EXISTS Summaries (
+    chunkId TEXT PRIMARY KEY REFERENCES Chunks(chunkId),
+    language TEXT, -- "python", "typescript", etc.
+    summary TEXT,
+    signature TEXT
+);
+CREATE TABLE IF NOT EXISTS ChunkEmbeddings (
+    chunkId TEXT PRIMARY KEY REFERENCES Chunks(chunkId),
+    embedding BLOB NOT NULL
+);
+`;
+
+function getDbOptions() {
+    if (process?.versions?.electron !== undefined) {
+        return undefined;
+    }
+    const r = createRequire(import.meta.url);
+    const betterSqlitePath = r.resolve("better-sqlite3/package.json");
+    const nativeBinding = path.join(
+        betterSqlitePath,
+        "../build/Release/better_sqlite3.n.node",
+    );
+    return { nativeBinding };
+}
+
+export function createDatabase(context: SpelunkerContext): void {
+    if (!context.queryContext) {
+        throw new Error(
+            "context.queryContext must be set before calling createDatabase",
+        );
+    }
+    const loc = context.queryContext.databaseLocation;
+    if (context.queryContext.database) {
+        console_log(`[Using database at ${loc}]`);
+        return;
+    }
+    if (fs.existsSync(loc)) {
+        console_log(`[Opening database at ${loc}]`);
+    } else {
+        console_log(`[Creating database at ${loc}]`);
+    }
+    const db = new Database(loc, getDbOptions());
+    // Write-Ahead Logging, improving concurrency and performance
+    db.pragma("journal_mode = WAL");
+    // Fix permissions to be read/write only by the owner
+    fs.chmodSync(context.queryContext.databaseLocation, 0o600);
+    // Create all the tables we'll use
+    db.exec(databaseSchema);
+    context.queryContext.database = db;
+}
+
+export function purgeFile(db: sqlite.Database, fileName: string): void {
+    const prepDeleteEmbeddings = db.prepare(`
+        DELETE FROM ChunkEmbeddings WHERE chunkId IN (
+            SELECT chunkId
+            FROM chunks
+            WHERE filename = ?
+        )
+    `);
+    const prepDeleteSummaries = db.prepare(`
+        DELETE FROM Summaries WHERE chunkId IN (
+            SELECT chunkId
+            FROM chunks
+            WHERE fileName = ?
+        )
+    `);
+    const prepDeleteBlobs = db.prepare(`
+        DELETE FROM Blobs WHERE chunkId IN (
+            SELECT chunkId
+            FROM chunks
+            WHERE filename = ?
+        )
+    `);
+    const prepDeleteChunks = db.prepare(
+        `DELETE FROM Chunks WHERE fileName = ?`,
+    );
+    const prepDeleteFiles = db.prepare(`DELETE FROM files WHERE fileName = ?`);
+
+    db.exec(`BEGIN TRANSACTION`);
+    prepDeleteSummaries.run(fileName);
+    prepDeleteBlobs.run(fileName);
+    prepDeleteEmbeddings.run(fileName);
+    prepDeleteChunks.run(fileName);
+    prepDeleteFiles.run(fileName);
+    db.exec(`COMMIT`);
+}
diff --git a/ts/packages/agents/spelunker/src/embeddings.ts b/ts/packages/agents/spelunker/src/embeddings.ts
new file mode 100644
index 000000000..c8d914d5c
--- /dev/null
+++ b/ts/packages/agents/spelunker/src/embeddings.ts
@@ -0,0 +1,198 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+import { Statement } from "better-sqlite3";
+import { Result } from "typechat";
+
+import { openai, TextEmbeddingModel } from "aiclient";
+import { createLimiter } from "common-utils";
+import { createNormalized, dotProduct } from "typeagent";
+import { NormalizedEmbedding } from "typeagent";
+
+import { Chunk, ChunkId } from "./chunkSchema.js";
+import { console_log } from "./logging.js";
+import { retryOn429 } from "./retryLogic.js";
+import { makeBatches } from "./batching.js";
+import { SpelunkerContext } from "./spelunkerActionHandler.js";
+import path from "path";
+
+export function makeEmbeddingModel(): TextEmbeddingModel {
+    const apiSettings = openai.apiSettingsFromEnv(openai.ModelType.Embedding);
+    apiSettings.maxRetryAttempts = 0;
+    const endpoint = process.env.AZURE_OPENAI_ENDPOINT_EMBEDDING_3_SMALL;
+    if (endpoint) {
+        apiSettings.endpoint = endpoint;
+    }
+    const embeddingModel = openai.createEmbeddingModel(apiSettings);
+    console_log(`[Max embedding batch size: ${embeddingModel.maxBatchSize}]`);
+    return embeddingModel;
+}
+
+export async function loadEmbeddings(
+    context: SpelunkerContext,
+    chunks: Chunk[],
+): Promise<void> {
+    const model = context.queryContext!.embeddingModel;
+    if (!model.generateEmbeddingBatch) {
+        console_log(`[This embedding model does not support batch operations]`); // TODO: Fix this
+        return;
+    }
+
+    console_log(`[Step 1c: Store chunk embeddings]`);
+    const generateEmbeddingBatch = model.generateEmbeddingBatch;
+    const db = context.queryContext!.database!;
+    const prepInsertEmbeddings = db.prepare(
+        `INSERT OR REPLACE INTO ChunkEmbeddings (chunkId, embedding) VALUES (?, ?)`,
+    );
+    const maxCharacters = 100000; // TODO: tune
+    const batches = makeBatches(chunks, maxCharacters, model.maxBatchSize);
+    // const maxConcurrency =
+    //     parseInt(process.env.AZURE_OPENAI_MAX_CONCURRENCY ?? "5") ?? 5;
+    const maxConcurrency = 2; // Seems we can do no better, given the low quota.
+    console_log(
+        `  [${batches.length} batches, maxConcurrency ${maxConcurrency}]`,
+    );
+    const limiter = createLimiter(maxConcurrency);
+    const promises: Promise<void>[] = [];
+    for (const batch of batches) {
+        const p = limiter(() =>
+            generateAndInsertEmbeddings(
+                generateEmbeddingBatch,
+                prepInsertEmbeddings,
+                batch,
+            ),
+        );
+        promises.push(p);
+    }
+    await Promise.all(promises);
+}
+
+async function generateAndInsertEmbeddings(
+    generateEmbeddingBatch: (a: string[]) => Promise<Result<number[][]>>,
+    prepInsertEmbeddings: Statement,
+    batch: Chunk[],
+): Promise<void> {
+    const t0 = new Date().getTime();
+    const stringBatch = batch.map(blobText);
+    const embeddings = await retryOn429(() =>
+        generateEmbeddingBatch(stringBatch),
+    );
+    if (embeddings) {
+        for (let i = 0; i < embeddings.length; i++) {
+            const chunk = batch[i];
+            const embedding: NormalizedEmbedding = createNormalized(
+                embeddings[i],
+            );
+            prepInsertEmbeddings.run(chunk.chunkId, Buffer.from(embedding));
+        }
+        const t1 = new Date().getTime();
+        const dtms = t1 - t0;
+        const dtStr =
+            dtms < 1000 ? `${dtms}ms` : `${(dtms / 1000).toFixed(3)}s`;
+        console_log(
+            `  [Generated and inserted embedding batch of ${batch.length} in ${dtStr}]`,
+        );
+    } else {
+        const t1 = new Date().getTime();
+        const dtms = t1 - t0;
+        const dtStr =
+            dtms < 1000 ? `${dtms}ms` : `${(dtms / 1000).toFixed(3)}s`;
+        console_log(`  [Failed to generate embedding batch in ${dtStr}]`);
+    }
+}
+
+function blobText(chunk: Chunk): string {
+    const lines: string[] = [];
+    for (const blob of chunk.blobs) {
+        lines.push(...blob.lines);
+    }
+    // Keep only alphanumerical words; everything else is removed (hoping to reduce the cost)
+    const fileName = shortenedFilename(chunk.fileName);
+    const line = lines.join("").replace(/\W+/g, " ").trim().slice(0, 20000); // Assuming average 2.5 chars per token
+    return `${fileName}\n${line}\n}`;
+}
+
+function shortenedFilename(fileName: string): string {
+    const prefix = process.env.HOME;
+    if (prefix && fileName.startsWith(prefix + path.sep)) {
+        return "~" + fileName.slice(prefix.length);
+    } else {
+        return fileName;
+    }
+}
+
+export async function preSelectChunks(
+    context: SpelunkerContext,
+    input: string,
+    maxChunks = 1000,
+): Promise<ChunkId[]> {
+    const ta0 = new Date().getTime();
+    const db = context.queryContext!.database!;
+    const prepAllEmbeddings = db.prepare(
+        `SELECT chunkId, embedding FROM ChunkEmbeddings`,
+    );
+    const allEmbeddingRows: {
+        chunkId: ChunkId;
+        embedding: Buffer;
+    }[] = prepAllEmbeddings.all() as any[];
+    const ta1 = new Date().getTime();
+    console_log(
+        `  [Read ${allEmbeddingRows.length} embeddings in ${((ta1 - ta0) / 1000).toFixed(3)} seconds]`,
+    );
+    if (allEmbeddingRows.length <= maxChunks) {
+        console_log(`  [Returning all ${allEmbeddingRows.length} chunk IDs]`);
+        return allEmbeddingRows.map((row) => row.chunkId);
+    }
+
+    const tb0 = new Date().getTime();
+    const queryEmbedding = await getEmbedding(context, input);
+    const tb1 = new Date().getTime();
+    const tail = !queryEmbedding ? " (failure)" : "";
+    console_log(
+        `  [Embedding input of ${input.length} characters took ${((tb1 - tb0) / 1000).toFixed(3)} seconds${tail}]`,
+    );
+    if (!queryEmbedding) {
+        return [];
+    }
+
+    const embeddings = allEmbeddingRows.map(
+        (row) => new Float32Array(Buffer.from(row.embedding)),
+    );
+    const tc0 = new Date().getTime();
+    const similarities: { chunkId: ChunkId; score: number }[] = [];
+    for (let i = 0; i < embeddings.length; i++) {
+        const chunkId = allEmbeddingRows[i].chunkId;
+        const score = dotProduct(embeddings[i], queryEmbedding);
+        similarities.push({ chunkId, score });
+    }
+    similarities.sort((a, b) => b.score - a.score);
+    similarities.splice(maxChunks);
+    const chunkIds = similarities.map((s) => s.chunkId);
+    const tc1 = new Date().getTime();
+    console_log(
+        `  [Found ${chunkIds.length} nearest neighbors in ${((tc1 - tc0) / 1000).toFixed(3)} seconds]`,
+    );
+    return chunkIds;
+}
+
+async function getEmbedding(
+    context: SpelunkerContext,
+    query: string,
+): Promise<NormalizedEmbedding | undefined> {
+    const model = context.queryContext!.embeddingModel!;
+    const generateEmbeddingBatch = model.generateEmbeddingBatch;
+    if (!generateEmbeddingBatch) {
+        console_log(`[This embedding model does not support batch operations]`); // TODO: Fix this
+        return undefined;
+    }
+
+    const rawEmbeddings: number[][] | undefined = await retryOn429(() =>
+        generateEmbeddingBatch([query]),
+    );
+    const rawEmbedding = rawEmbeddings?.[0];
+    if (!rawEmbedding) {
+        console_log(`[Failed to generate embedding]`);
+        return undefined;
+    }
+    return rawEmbedding ? createNormalized(rawEmbedding) : undefined;
+}
diff --git a/ts/packages/agents/spelunker/src/logging.ts b/ts/packages/agents/spelunker/src/logging.ts
new file mode 100644
index 000000000..5d1d80d58
--- /dev/null
+++ b/ts/packages/agents/spelunker/src/logging.ts
@@ -0,0 +1,17 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+let epoch: number = 0;
+
+export function resetEpoch(): void {
+    epoch = 0;
+}
+
+export function console_log(...rest: any[]): void {
+    if (!epoch) {
+        epoch = Date.now();
+        console.log(""); // Start new epoch with a blank line
+    }
+    const t = Date.now();
+    console.log(((t - epoch) / 1000).toFixed(3).padStart(6), ...rest);
+}
diff --git a/ts/packages/agents/spelunker/src/queryContext.ts b/ts/packages/agents/spelunker/src/queryContext.ts
new file mode 100644
index 000000000..686ba5502
--- /dev/null
+++ b/ts/packages/agents/spelunker/src/queryContext.ts
@@ -0,0 +1,110 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+import * as fs from "fs";
+import * as path from "path";
+
+import * as sqlite from "better-sqlite3";
+
+import { createJsonTranslator, TypeChatJsonTranslator } from "typechat";
+import { createTypeScriptJsonValidator } from "typechat/ts";
+
+import { ChatModel, openai, TextEmbeddingModel } from "aiclient";
+import { loadSchema } from "typeagent";
+
+import { makeEmbeddingModel } from "./embeddings.js";
+import { console_log } from "./logging.js";
+import { OracleSpecs } from "./oracleSchema.js";
+import { SelectorSpecs } from "./selectorSchema.js";
+import { SummarizerSpecs } from "./summarizerSchema.js";
+
+export interface QueryContext {
+    chatModel: ChatModel;
+    miniModel: ChatModel;
+    embeddingModel: TextEmbeddingModel;
+    oracle: TypeChatJsonTranslator<OracleSpecs>;
+    chunkSelector: TypeChatJsonTranslator<SelectorSpecs>;
+    chunkSummarizer: TypeChatJsonTranslator<SummarizerSpecs>;
+    databaseLocation: string;
+    database: sqlite.Database | undefined;
+}
+
+function captureTokenStats(req: any, response: any): void {
+    const inputTokens = response.usage.prompt_tokens;
+    const outputTokens = response.usage.completion_tokens;
+    const cost = inputTokens * 0.000005 + outputTokens * 0.000015;
+    console_log(
+        `    [Tokens used: prompt=${inputTokens}, ` +
+            `completion=${outputTokens}, ` +
+            `cost=\$${cost.toFixed(2)}]`,
+    );
+}
+
+export function createQueryContext(): QueryContext {
+    const chatModel = openai.createChatModelDefault("spelunkerChat");
+    chatModel.completionCallback = captureTokenStats;
+    chatModel.retryMaxAttempts = 0;
+
+    const miniModel = openai.createChatModel(
+        undefined, // "GPT_4_O_MINI" is slower than default model?!
+        undefined,
+        undefined,
+        ["spelunkerMini"],
+    );
+    miniModel.completionCallback = captureTokenStats;
+    miniModel.retryMaxAttempts = 0;
+
+    const embeddingModel = makeEmbeddingModel();
+
+    const oracle = createTranslator<OracleSpecs>(
+        chatModel,
+        "oracleSchema.ts",
+        "OracleSpecs",
+    );
+    const chunkSelector = createTranslator<SelectorSpecs>(
+        miniModel,
+        "selectorSchema.ts",
+        "SelectorSpecs",
+    );
+    const chunkSummarizer = createTranslator<SummarizerSpecs>(
+        miniModel,
+        "summarizerSchema.ts",
+        "SummarizerSpecs",
+    );
+
+    const databaseFolder = path.join(
+        process.env.HOME ?? "",
+        ".typeagent",
+        "agents",
+        "spelunker",
+    );
+    const mkdirOptions: fs.MakeDirectoryOptions = {
+        recursive: true,
+        mode: 0o700,
+    };
+    fs.mkdirSync(databaseFolder, mkdirOptions);
+
+    const databaseLocation = path.join(databaseFolder, "codeSearchDatabase.db");
+    const database = undefined;
+    return {
+        chatModel,
+        miniModel,
+        embeddingModel,
+        oracle,
+        chunkSelector,
+        chunkSummarizer,
+        databaseLocation,
+        database,
+    };
+}
+
+function createTranslator<T extends object>(
+    model: ChatModel,
+    schemaFile: string,
+    typeName: string,
+): TypeChatJsonTranslator<T> {
+    const schema = loadSchema([schemaFile], import.meta.url);
+    const validator = createTypeScriptJsonValidator<T>(schema, typeName);
+    const translator = createJsonTranslator<T>(model, validator);
+    return translator;
+}
diff --git a/ts/packages/agents/spelunker/src/retryLogic.ts b/ts/packages/agents/spelunker/src/retryLogic.ts
new file mode 100644
index 000000000..63ebc4bed
--- /dev/null
+++ b/ts/packages/agents/spelunker/src/retryLogic.ts
@@ -0,0 +1,58 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+import { Result } from "typechat";
+
+import { console_log } from "./logging.js";
+
+export async function retryOn429<T>(
+    translate: () => Promise<Result<T>>,
+    retries: number = 3,
+    defaultDelay: number = 5000,
+): Promise<T | undefined> {
+    let wrappedResult: Result<T>;
+    do {
+        retries--;
+        wrappedResult = await translate();
+        // console_log(wrappedResult);
+        if (!wrappedResult.success) {
+            if (
+                retries > 0 &&
+                wrappedResult.message.includes("fetch error: 429:")
+            ) {
+                let delay = defaultDelay;
+                const embeddingTime = wrappedResult.message.match(
+                    /Try again in (\d+) seconds/,
+                );
+                const azureTime = wrappedResult.message.match(
+                    /after (\d+) milliseconds/,
+                );
+                const openaiTime = wrappedResult.message.match(
+                    /Please try again in (\d+\.\d*|\.\d+|\d+m)s./,
+                );
+                if (embeddingTime || azureTime || openaiTime) {
+                    if (embeddingTime) {
+                        delay = parseInt(embeddingTime[1]) * 1000;
+                    } else if (azureTime) {
+                        delay = parseInt(azureTime[1]);
+                    } else if (openaiTime) {
+                        delay = parseFloat(openaiTime[1]);
+                        if (!openaiTime[1].endsWith("m")) {
+                            delay *= 1000;
+                        }
+                    }
+                } else {
+                    console_log(
+                        `      [Couldn't find msec in '${wrappedResult.message}'`,
+                    );
+                }
+                console_log(`    [Retry on 429 error: sleep ${delay} ms]`);
+                await new Promise((resolve) => setTimeout(resolve, delay));
+                continue;
+            }
+            console_log(`    [Giving up: ${wrappedResult.message}]`);
+            return undefined;
+        }
+    } while (!wrappedResult.success);
+    return wrappedResult.data;
+}
diff --git a/ts/packages/agents/spelunker/src/searchCode.ts b/ts/packages/agents/spelunker/src/searchCode.ts
index 022ba844f..bb476ffee 100644
--- a/ts/packages/agents/spelunker/src/searchCode.ts
+++ b/ts/packages/agents/spelunker/src/searchCode.ts
@@ -3,125 +3,36 @@
 
 import * as fs from "fs";
 import * as path from "path";
-import { createRequire } from "module";
 
-import Database, * as sqlite from "better-sqlite3";
+import * as sqlite from "better-sqlite3";
+import { Result, TypeChatJsonTranslator } from "typechat";
 
-import { createJsonTranslator, Result, TypeChatJsonTranslator } from "typechat";
-import { createTypeScriptJsonValidator } from "typechat/ts";
-
-import { ChatModel, openai } from "aiclient";
 import { createLimiter } from "common-utils";
-
+import { ActionResult, Entity } from "@typeagent/agent-sdk";
 import {
-    ActionResult,
-    ActionResultSuccess,
-    Entity,
-} from "@typeagent/agent-sdk";
-import { createActionResultFromError } from "@typeagent/agent-sdk/helpers/action";
-import { loadSchema } from "typeagent";
-
-import {
-    Blob,
-    Chunk,
-    ChunkedFile,
-    ChunkerErrorItem,
-    ChunkId,
-} from "./chunkSchema.js";
-import { OracleSpecs } from "./oracleSchema.js";
+    createActionResultFromMarkdownDisplay,
+    createActionResultFromError,
+} from "@typeagent/agent-sdk/helpers/action";
+
+import { keepBestChunks, makeBatches } from "./batching.js";
+import { Blob, Chunk, ChunkedFile, ChunkerErrorItem } from "./chunkSchema.js";
+import { createDatabase, purgeFile } from "./databaseUtils.js";
+import { loadEmbeddings, preSelectChunks } from "./embeddings.js";
+import { console_log, resetEpoch } from "./logging.js";
 import { chunkifyPythonFiles } from "./pythonChunker.js";
+import { createQueryContext } from "./queryContext.js";
+import { retryOn429 } from "./retryLogic.js";
 import { ChunkDescription, SelectorSpecs } from "./selectorSchema.js";
 import { SpelunkerContext } from "./spelunkerActionHandler.js";
-import { SummarizerSpecs } from "./summarizerSchema.js";
+import { prepareChunks } from "./summarizing.js";
 import { chunkifyTypeScriptFiles } from "./typescriptChunker.js";
 
-let epoch: number = 0;
-
-function console_log(...rest: any[]): void {
-    if (!epoch) {
-        epoch = Date.now();
-        console.log(""); // Start new epoch with a blank line
-    }
-    const t = Date.now();
-    console.log(((t - epoch) / 1000).toFixed(3).padStart(6), ...rest);
-}
-
-export interface QueryContext {
-    chatModel: ChatModel;
-    oracle: TypeChatJsonTranslator<OracleSpecs>;
-    miniModel: ChatModel;
-    chunkSelector: TypeChatJsonTranslator<SelectorSpecs>;
-    chunkSummarizer: TypeChatJsonTranslator<SummarizerSpecs>;
-    databaseLocation: string;
-    database: sqlite.Database | undefined;
-}
-
-function captureTokenStats(req: any, response: any): void {
-    console_log(
-        `    [Tokens used: prompt=${response.usage.prompt_tokens}, ` +
-            `completion=${response.usage.completion_tokens}]`,
-    );
-}
-
-function createQueryContext(): QueryContext {
-    const chatModel = openai.createChatModelDefault("spelunkerChat");
-    chatModel.completionCallback = captureTokenStats;
-
-    const miniModel = openai.createChatModel(
-        undefined, // "GPT_4_O_MINI" is slower than default model?!
-        undefined,
-        undefined,
-        ["spelunkerMini"],
-    );
-    miniModel.completionCallback = captureTokenStats;
-
-    const oracle = createTranslator<OracleSpecs>(
-        chatModel,
-        "oracleSchema.ts",
-        "OracleSpecs",
-    );
-    const chunkSelector = createTranslator<SelectorSpecs>(
-        miniModel,
-        "selectorSchema.ts",
-        "SelectorSpecs",
-    );
-    const chunkSummarizer = createTranslator<SummarizerSpecs>(
-        miniModel,
-        "summarizerSchema.ts",
-        "SummarizerSpecs",
-    );
-
-    const databaseFolder = path.join(
-        process.env.HOME ?? "/",
-        ".typeagent",
-        "agents",
-        "spelunker",
-    );
-    const mkdirOptions: fs.MakeDirectoryOptions = {
-        recursive: true,
-        mode: 0o700,
-    };
-    fs.mkdirSync(databaseFolder, mkdirOptions);
-
-    const databaseLocation = path.join(databaseFolder, "codeSearchDatabase.db");
-    const database = undefined;
-    return {
-        chatModel,
-        oracle,
-        miniModel,
-        chunkSelector,
-        chunkSummarizer,
-        databaseLocation,
-        database,
-    };
-}
-
 // Answer a question; called from request and from searchCode action
 export async function searchCode(
     context: SpelunkerContext,
     input: string,
 ): Promise<ActionResult> {
-    epoch = 0; // Reset logging clock
+    resetEpoch();
     console_log(`[searchCode question='${input}']`);
 
     // 0. Check if the focus is set.
@@ -131,10 +42,15 @@ export async function searchCode(
 
     // 1. Create the database, chunkify all files in the focus folders, and store the chunks.
     //    Or use what's in the database if it looks up-to-date.
-    const db = await loadDatabaseAndChunks(context);
+    if (!context.queryContext) {
+        context.queryContext = createQueryContext();
+    }
+    await createDatabase(context);
+    await loadDatabase(context);
+    const db = context.queryContext!.database!;
 
     // 2. Load all chunks from the database.
-    const allChunks = await loadAllChunksFromDatabase(db);
+    const allChunks = await readAllChunksFromDatabase(db);
 
     // 3. Ask a fast LLM for the most relevant chunk Ids, rank them, and keep the best ones.
     const chunks = await selectChunks(context, allChunks, input);
@@ -168,14 +84,7 @@ export async function searchCode(
     );
 }
 
-async function loadDatabaseAndChunks(
-    context: SpelunkerContext,
-): Promise<sqlite.Database> {
-    console_log(`[Step 1: Load database]`);
-    return await loadDatabase(context);
-}
-
-async function loadAllChunksFromDatabase(
+async function readAllChunksFromDatabase(
     db: sqlite.Database,
 ): Promise<Chunk[]> {
     console_log(`[Step 2: Load chunks from database]`);
@@ -282,21 +191,29 @@ export async function selectChunks(
     console_log(
         `[Step 3: Select relevant chunks from ${allChunks.length} chunks]`,
     );
+    console_log(`[Step 3a: Pre-select with fuzzy matching]`);
+    const nearestChunkIds = await preSelectChunks(context, input, 500);
+    allChunks = allChunks.filter((c) => nearestChunkIds.includes(c.chunkId));
+    console_log(`  [Pre-selected ${allChunks.length} chunks]`);
+
+    console_log(`[Step 3b: Narrow those down with LLM]`);
     const promises: Promise<ChunkDescription[]>[] = [];
     const maxConcurrency =
         parseInt(process.env.AZURE_OPENAI_MAX_CONCURRENCY ?? "5") ?? 5;
     const limiter = createLimiter(maxConcurrency);
-    const batchLimit = process.env.OPENAI_API_KEY ? 100000 : 250000; // TODO: tune
-    const batches = makeBatches(allChunks, batchLimit);
+    const batchLimit = process.env.OPENAI_API_KEY ? 100000 : 100000; // TODO: tune
+    const batches = makeBatches(allChunks, batchLimit, 60); // TODO: tune
     console_log(
         `  [${batches.length} batches, maxConcurrency ${maxConcurrency}]`,
     );
-    for (const batch of batches) {
+    for (let i = 0; i < batches.length; i++) {
+        const batch = batches[i];
         const p = limiter(() =>
             selectRelevantChunks(
                 context.queryContext!.chunkSelector,
                 batch,
                 input,
+                i,
             ),
         );
         promises.push(p);
@@ -315,7 +232,7 @@ export async function selectChunks(
 
     allChunkDescs.sort((a, b) => b.relevance - a.relevance);
     // console_log(`  [${allChunks.map((c) => (c.relevance)).join(", ")}]`);
-    const maxKeep = process.env.OPENAI_API_KEY ? 100000 : 200000; // TODO: tune
+    const maxKeep = process.env.OPENAI_API_KEY ? 100000 : 100000; // TODO: tune
     const chunks = keepBestChunks(allChunkDescs, allChunks, maxKeep);
     console_log(`  [Keeping ${chunks.length} chunks]`);
     // for (let i = 0; i < chunks.length; i++) {
@@ -332,6 +249,7 @@ async function selectRelevantChunks(
     selector: TypeChatJsonTranslator<SelectorSpecs>,
     chunks: Chunk[],
     input: string,
+    batchIndex: number,
 ): Promise<ChunkDescription[]> {
     // TODO: Prompt engineering
     const prompt = `\
@@ -347,91 +265,18 @@ async function selectRelevantChunks(
     ${prepareChunks(chunks)}
     `;
     // console_log(prompt);
-    const result = await retryTranslateOn429(() => selector.translate(prompt));
+    const result = await retryOn429(() => selector.translate(prompt));
     if (!result) {
-        console_log(`  [Failed to select chunks for ${chunks.length} chunks]`);
+        console_log(
+            `  [Failed to select chunks for batch ${batchIndex + 1} with ${chunks.length} chunks]`,
+        );
         return [];
     } else {
         return result.chunkDescs;
     }
 }
 
-function prepareChunks(chunks: Chunk[]): string {
-    chunks.sort(
-        // Sort by file name and chunk ID (should order by line number)
-        (a, b) => {
-            let cmp = a.fileName.localeCompare(b.fileName);
-            if (!cmp) {
-                cmp = a.lineNo - b.lineNo;
-            }
-            return cmp;
-        },
-    );
-    const output: string[] = [];
-    function put(line: string): void {
-        // console_log(line.trimEnd());
-        output.push(line);
-    }
-    let lastFn = "";
-    let lineNo = 0;
-    for (const chunk of chunks) {
-        if (chunk.fileName !== lastFn) {
-            lastFn = chunk.fileName;
-            lineNo = 0;
-            put("\n");
-            put(`** file=${chunk.fileName}\n`);
-        }
-        put(
-            `* chunkId=${chunk.chunkId} kind=${chunk.treeName} name=${chunk.codeName}\n`,
-        );
-        for (const blob of chunk.blobs) {
-            lineNo = blob.start;
-            for (const line of blob.lines) {
-                lineNo += 1;
-                put(`${lineNo} ${line}`);
-            }
-        }
-    }
-    return output.join("");
-}
-
-// TODO: Make the values two elements, comment start and comment end
-// (and then caller should ensure comment end doesn't occur in the comment text).
-const languageCommentMap: { [key: string]: string } = {
-    python: "#",
-    typescript: "//",
-};
-
-// TODO: Remove export once we're using summaries again.
-export function prepareSummaries(db: sqlite.Database): string {
-    const selectAllSummaries = db.prepare(`SELECT * FROM Summaries`);
-    const summaryRows: any[] = selectAllSummaries.all();
-    if (summaryRows.length > 100) {
-        console_log(`  [Over 100 summary rows, skipping summaries in prompt]`);
-        return "";
-    }
-    const lines: string[] = [];
-    for (const summaryRow of summaryRows) {
-        const comment = languageCommentMap[summaryRow.language] ?? "#";
-        lines.push("");
-        lines.push(`${comment} ${summaryRow.summary}`);
-        lines.push(summaryRow.signature);
-    }
-    return lines.join("\n");
-}
-
-function createTranslator<T extends object>(
-    model: ChatModel,
-    schemaFile: string,
-    typeName: string,
-): TypeChatJsonTranslator<T> {
-    const schema = loadSchema([schemaFile], import.meta.url);
-    const validator = createTypeScriptJsonValidator<T>(schema, typeName);
-    const translator = createJsonTranslator<T>(model, validator);
-    return translator;
-}
-
-interface FileMtimeSize {
+export interface FileMtimeSize {
     file: string;
     mtime: number;
     size: number;
@@ -477,46 +322,15 @@ function getAllSourceFiles(dir: string): FileMtimeSize[] {
     return results;
 }
 
-// Should be in actionHelpers.ts
-function createActionResultFromMarkdownDisplay(
-    literalText: string,
-    entities: Entity[] = [],
-    resultEntity?: Entity,
-): ActionResultSuccess {
-    return {
-        literalText,
-        entities,
-        resultEntity,
-        displayContent: { type: "markdown", content: literalText },
-    };
-}
-
-async function loadDatabase(
-    context: SpelunkerContext,
-): Promise<sqlite.Database> {
+// TODO: Break into multiple functions.
+// Notably the part that compares files in the database and files on disk.
+async function loadDatabase(context: SpelunkerContext): Promise<void> {
+    console_log(`[Step 1: Load database]`);
     if (!context.queryContext) {
         context.queryContext = createQueryContext();
     }
-    const db = createDatabase(context);
-
-    const prepDeleteSummaries = db.prepare(`
-        DELETE FROM Summaries WHERE chunkId IN (
-            SELECT chunkId
-            FROM chunks
-            WHERE fileName = ?
-        )
-    `);
-    const prepDeleteBlobs = db.prepare(`
-        DELETE FROM Blobs WHERE chunkId IN (
-            SELECT chunkId
-            FROM chunks
-            WHERE filename = ?
-        )
-    `);
-    const prepDeleteChunks = db.prepare(
-        `DELETE FROM Chunks WHERE fileName = ?`,
-    );
-    const prepDeleteFiles = db.prepare(`DELETE FROM files WHERE fileName = ?`);
+    const db = context.queryContext!.database!;
+
     const prepInsertFiles = db.prepare(
         `INSERT OR REPLACE INTO Files (fileName, mtime, size) VALUES (?, ?, ?)`,
     );
@@ -549,6 +363,7 @@ async function loadDatabase(
             size: fileRow.size,
         });
     }
+    const filesToInsert: FileMtimeSize[] = [];
     for (const file of files) {
         const dbStat = filesInDb.get(file.file);
         if (
@@ -559,7 +374,7 @@ async function loadDatabase(
             // console_log(`  [Need to update ${file} (mtime/size mismatch)]`);
             filesToDo.push(file.file);
             // TODO: Make this insert part of the transaction for this file
-            prepInsertFiles.run(file.file, file.mtime, file.size);
+            filesToInsert.push(file);
             filesInDb.set(file.file, {
                 file: file.file,
                 mtime: file.mtime,
@@ -575,12 +390,7 @@ async function loadDatabase(
         console_log(`  [Deleting ${filesToDelete.length} files from database]`);
         for (const file of filesToDelete) {
             // console_log(`  [Deleting ${file} from database]`);
-            db.exec(`BEGIN TRANSACTION`);
-            prepDeleteSummaries.run(file);
-            prepDeleteBlobs.run(file);
-            prepDeleteChunks.run(file);
-            prepDeleteFiles.run(file);
-            db.exec(`COMMIT`);
+            purgeFile(db, file);
         }
     }
 
@@ -588,7 +398,7 @@ async function loadDatabase(
         console_log(
             `  [No files to update out of ${files.length}, yay cache!]`,
         );
-        return db;
+        return;
     }
 
     // 1b. Chunkify all new files (without LLM help).
@@ -620,10 +430,16 @@ async function loadDatabase(
     );
     const allChunks: Chunk[] = [];
     for (const chunkedFile of allChunkedFiles) {
+        purgeFile(db, chunkedFile.fileName);
         db.exec(`BEGIN TRANSACTION`);
-        prepDeleteSummaries.run(chunkedFile.fileName);
-        prepDeleteBlobs.run(chunkedFile.fileName);
-        prepDeleteChunks.run(chunkedFile.fileName);
+        const file = filesToInsert.find((f) => f.file === chunkedFile.fileName);
+        if (!file) {
+            console_log(
+                `  [*** File ${chunkedFile.fileName} is missing from filesToInsert]`,
+            );
+            continue;
+        }
+        prepInsertFiles.run(file.file, file.mtime, file.size);
         for (const chunk of chunkedFile.chunks) {
             allChunks.push(chunk);
             prepInsertChunks.run(
@@ -648,293 +464,14 @@ async function loadDatabase(
     console_log(
         `  [Chunked ${allChunkedFiles.length} files into ${allChunks.length} chunks]`,
     );
-
-    // 1c. Use a fast model to summarize all chunks.
-    if (allChunks.length) {
-        await summarizeChunks(context, allChunks);
-    }
-
-    return db;
-}
-
-const databaseSchema = `
-CREATE TABLE IF NOT EXISTS Files (
-    fileName TEXT PRIMARY KEY,
-    mtime FLOAT NOT NULL,
-    size INTEGER NOT NULL
-);
-CREATE TABLE IF NOT EXISTS Chunks (
-    chunkId TEXT PRIMARY KEY,
-    treeName TEXT NOT NULL,
-    codeName TEXT NOT NULL,
-    parentId TEXT KEY REFERENCES Chunks(chunkId), -- May be null
-    fileName TEXT KEY REFERENCES files(fileName) NOT NULL,
-    lineNo INTEGER NOT NULL -- 1-based
-);
-CREATE TABLE IF NOT EXISTS Blobs (
-    chunkId TEXT KEY REFERENCES Chunks(chunkId) NOT NULL,
-    start INTEGER NOT NULL, -- 0-based
-    lines TEXT NOT NULL,
-    breadcrumb TEXT -- Chunk ID or empty string or NULL
-);
-CREATE TABLE IF NOT EXISTS Summaries (
-    chunkId TEXT PRIMARY KEY REFERENCES Chunks(chunkId),
-    language TEXT, -- "python", "typescript", etc.
-    summary TEXT,
-    signature TEXT
-)
-`;
-
-function getDbOptions() {
-    if (process?.versions?.electron !== undefined) {
-        return undefined;
-    }
-    const r = createRequire(import.meta.url);
-    const betterSqlitePath = r.resolve("better-sqlite3/package.json");
-    const nativeBinding = path.join(
-        betterSqlitePath,
-        "../build/Release/better_sqlite3.n.node",
-    );
-    return { nativeBinding };
-}
-
-function createDatabase(context: SpelunkerContext): sqlite.Database {
-    if (!context.queryContext) {
-        context.queryContext = createQueryContext();
-    }
-    const loc = context.queryContext.databaseLocation;
-    const db0 = context.queryContext.database;
-    if (db0) {
-        console_log(`  [Using database at ${loc}]`);
-        return db0;
-    }
-    if (fs.existsSync(loc)) {
-        console_log(`  [Opening database at ${loc}]`);
-    } else {
-        console_log(`  [Creating database at ${loc}]`);
-    }
-    const db = new Database(loc, getDbOptions());
-    // Write-Ahead Logging, improving concurrency and performance
-    db.pragma("journal_mode = WAL");
-    // Fix permissions to be read/write only by the owner
-    fs.chmodSync(context.queryContext.databaseLocation, 0o600);
-    // Create all the tables we'll use
-    db.exec(databaseSchema);
-    context.queryContext.database = db;
-    return db;
-}
-
-async function summarizeChunks(
-    context: SpelunkerContext,
-    chunks: Chunk[],
-): Promise<void> {
-    console_log(`[Step 1c: Summarizing ${chunks.length} chunks]`);
-    // NOTE: We cannot stuff the buffer, because the completion size
-    // is limited to 4096 tokens, and we expect a certain number of
-    // tokens per chunk. Experimentally, 40 chunks per job works great.
-    const maxConcurrency =
-        parseInt(process.env.AZURE_OPENAI_MAX_CONCURRENCY ?? "0") ?? 5;
-    let chunksPerJob = 40;
-    let numJobs = Math.ceil(chunks.length / chunksPerJob);
-    console_log(
-        `  [${chunksPerJob} chunks/job, ${numJobs} jobs, maxConcurrency ${maxConcurrency}]`,
-    );
-    const limiter = createLimiter(maxConcurrency);
-    const promises: Promise<void>[] = [];
-    for (let i = 0; i < chunks.length; i += chunksPerJob) {
-        const slice = chunks.slice(i, i + chunksPerJob);
-        promises.push(limiter(() => summarizeChunkSlice(context, slice)));
-    }
-    await Promise.all(promises);
-}
-
-async function summarizeChunkSlice(
-    context: SpelunkerContext,
-    chunks: Chunk[],
-): Promise<void> {
-    const summarizer = context.queryContext!.chunkSummarizer;
-    // TODO: Prompt engineering
-    const prompt = `\
-    Please summarize each of the given chunks.
-    A summary should be a one-line description of the chunk.
-    Also include the signature of the chunk.
-
-    Chunks:
-    ${prepareChunks(chunks)}
-    `;
-    // console_log(prompt);
-    const result = await retryTranslateOn429(() =>
-        summarizer.translate(prompt),
-    );
-    if (!result) {
-        console_log(
-            `  [Failed to summarize chunks for ${chunks.length} chunks]`,
-        );
+    if (!allChunks.length) {
+        console_log(`  [No chunks to load]`);
         return;
     }
 
-    const summarizeSpecs = result;
-    // console_log(`  [Received ${result.summaries.length} summaries]`);
-    // Enter them into the database
-    const db = context.queryContext!.database!;
-    const prepInsertSummary = db.prepare(
-        `INSERT OR REPLACE INTO Summaries (chunkId, language, summary, signature) VALUES (?, ?, ?, ?)`,
-    );
-    const prepGetBlobWithBreadcrumb = db.prepare(
-        `SELECT lines, breadcrumb FROM Blobs WHERE breadcrumb = ?`,
-    );
-    const prepUpdateBlob = db.prepare(
-        "UPDATE Blobs SET lines = ? WHERE breadcrumb = ?",
-    );
-    let errors = 0;
-    for (const summary of summarizeSpecs.summaries) {
-        // console_log(summary);
-        try {
-            prepInsertSummary.run(
-                summary.chunkId,
-                summary.language,
-                summary.summary,
-                summary.signature,
-            );
-        } catch (error) {
-            console_log(
-                `*** Db error for insert summary ${JSON.stringify(summary)}: ${error}`,
-            );
-            errors += 1;
-        }
-        try {
-            type BlobRowType = { lines: string; breadcrumb: ChunkId };
-            const blobRow: BlobRowType = prepGetBlobWithBreadcrumb.get(
-                summary.chunkId,
-            ) as any;
-            if (blobRow) {
-                let blobLines: string = blobRow.lines;
-                // Assume it doesn't start with a blank line /(^\s*\r?\n)*/
-                const indent = blobLines?.match(/^(\s*)\S/)?.[1] ?? ""; // Whitespace followed by non-whitespace
-                blobLines =
-                    `${indent}${languageCommentMap[summary.language ?? "python"]} ${summary.summary}\n` +
-                    `${indent}${summary.signature} ...\n`;
-                // console_log(
-                //     `  [Replacing\n'''\n${blobRow.lines}'''\nwith\n'''\n${blobLines}\n''']`,
-                // );
-                const res = prepUpdateBlob.run(blobLines, summary.chunkId);
-                if (res.changes !== 1) {
-                    console_log(
-                        `  [*** Failed to update blob lines for ${summary.chunkId}]`,
-                    );
-                }
-            }
-        } catch (error) {
-            console_log(
-                `*** Db error for update blob ${JSON.stringify(summary)}: ${error}`,
-            );
-            errors += 1;
-        }
-    }
-    if (errors) console_log(`  [${errors} errors]`);
-}
-
-async function retryTranslateOn429<T>(
-    translate: () => Promise<Result<T>>,
-    retries: number = 3,
-    defaultDelay: number = 5000,
-): Promise<T | undefined> {
-    let wrappedResult: Result<T>;
-    do {
-        retries--;
-        wrappedResult = await translate();
-        // console_log(wrappedResult);
-        if (!wrappedResult.success) {
-            if (
-                retries > 0 &&
-                wrappedResult.message.includes("fetch error: 429:")
-            ) {
-                let delay = defaultDelay;
-                const azureTime = wrappedResult.message.match(
-                    /after (\d+) milliseconds/,
-                );
-                const openaiTime = wrappedResult.message.match(
-                    /Please try again in (\d+\.\d*|\.\d+|\d+m)s./,
-                );
-                if (azureTime || openaiTime) {
-                    if (azureTime) {
-                        delay = parseInt(azureTime[1]);
-                    } else if (openaiTime) {
-                        delay = parseFloat(openaiTime[1]);
-                        if (!openaiTime[1].endsWith("m")) {
-                            delay *= 1000;
-                        }
-                    }
-                } else {
-                    console_log(
-                        `  [Couldn't find msec in '${wrappedResult.message}'`,
-                    );
-                }
-                console_log(`    [Retry on 429 error: sleep ${delay} ms]`);
-                await new Promise((resolve) => setTimeout(resolve, delay));
-                continue;
-            }
-            console_log(`  [${wrappedResult.message}]`);
-            return undefined;
-        }
-    } while (!wrappedResult.success);
-    return wrappedResult.data;
-}
-
-function keepBestChunks(
-    chunkDescs: ChunkDescription[], // Sorted by descending relevance
-    allChunks: Chunk[],
-    batchSize: number, // In characters
-): Chunk[] {
-    const chunks: Chunk[] = [];
-    let size = 0;
-    for (const chunkDesc of chunkDescs) {
-        const chunk = allChunks.find((c) => c.chunkId === chunkDesc.chunkId);
-        if (!chunk) continue;
-        const chunkSize = getChunkSize(chunk);
-        if (size + chunkSize > batchSize && chunks.length) {
-            break;
-        }
-        chunks.push(chunk);
-        size += chunkSize;
-    }
-    return chunks;
-}
-
-function makeBatches(
-    chunks: Chunk[],
-    batchSize: number, // In characters
-): Chunk[][] {
-    const batches: Chunk[][] = [];
-    let batch: Chunk[] = [];
-    let size = 0;
-    function flush(): void {
-        batches.push(batch);
-        console_log(
-            `    [Batch ${batches.length} has ${batch.length} chunks and ${size} bytes]`,
-        );
-        batch = [];
-        size = 0;
-    }
-    for (const chunk of chunks) {
-        const chunkSize = getChunkSize(chunk);
-        if (size + chunkSize > batchSize && batch.length) {
-            flush();
-        }
-        batch.push(chunk);
-        size += chunkSize;
-    }
-    if (batch.length) {
-        flush();
-    }
-    return batches;
-}
+    // 1c. Store all chunk embeddings.
+    await loadEmbeddings(context, allChunks);
 
-function getChunkSize(chunk: Chunk): number {
-    // This is all an approximation
-    let size = chunk.fileName.length + 50;
-    for (const blob of chunk.blobs) {
-        size += blob.lines.join("").length + 4 * blob.lines.length;
-    }
-    return size;
+    // 1d. Use a fast model to summarize all chunks.
+    // await summarizeChunks(context, allChunks);
 }
diff --git a/ts/packages/agents/spelunker/src/spelunkerActionHandler.ts b/ts/packages/agents/spelunker/src/spelunkerActionHandler.ts
index e806f374d..46714cf2b 100644
--- a/ts/packages/agents/spelunker/src/spelunkerActionHandler.ts
+++ b/ts/packages/agents/spelunker/src/spelunkerActionHandler.ts
@@ -25,7 +25,8 @@ import {
     getCommandInterface,
 } from "@typeagent/agent-sdk/helpers/command";
 
-import { searchCode, QueryContext } from "./searchCode.js";
+import { searchCode } from "./searchCode.js";
+import { QueryContext } from "./queryContext.js";
 import { SpelunkerAction } from "./spelunkerSchema.js";
 
 class RequestCommandHandler implements CommandHandler {
diff --git a/ts/packages/agents/spelunker/src/summarizing.ts b/ts/packages/agents/spelunker/src/summarizing.ts
new file mode 100644
index 000000000..12238abdd
--- /dev/null
+++ b/ts/packages/agents/spelunker/src/summarizing.ts
@@ -0,0 +1,183 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+import * as sqlite from "better-sqlite3";
+
+import { createLimiter } from "common-utils";
+
+import { Chunk, ChunkId } from "./chunkSchema.js";
+import { console_log } from "./logging.js";
+import { retryOn429 } from "./retryLogic.js";
+import { SpelunkerContext } from "./spelunkerActionHandler.js";
+
+export async function summarizeChunks(
+    context: SpelunkerContext,
+    chunks: Chunk[],
+): Promise<void> {
+    console_log(`[Step 1d: Summarizing ${chunks.length} chunks]`);
+    // NOTE: We cannot stuff the buffer, because the completion size
+    // is limited to 4096 tokens, and we expect a certain number of
+    // tokens per chunk. Experimentally, 40 chunks per job works great.
+    const maxConcurrency =
+        parseInt(process.env.AZURE_OPENAI_MAX_CONCURRENCY ?? "0") ?? 5;
+    let chunksPerJob = 40;
+    let numJobs = Math.ceil(chunks.length / chunksPerJob);
+    console_log(
+        `  [${chunksPerJob} chunks/job, ${numJobs} jobs, maxConcurrency ${maxConcurrency}]`,
+    );
+    const limiter = createLimiter(maxConcurrency);
+    const promises: Promise<void>[] = [];
+    for (let i = 0; i < chunks.length; i += chunksPerJob) {
+        const slice = chunks.slice(i, i + chunksPerJob);
+        promises.push(limiter(() => summarizeChunkSlice(context, slice)));
+    }
+    await Promise.all(promises);
+}
+
+async function summarizeChunkSlice(
+    context: SpelunkerContext,
+    chunks: Chunk[],
+): Promise<void> {
+    const summarizer = context.queryContext!.chunkSummarizer;
+    // TODO: Prompt engineering
+    const prompt = `\
+    Please summarize each of the given chunks.
+    A summary should be a one-line description of the chunk.
+    Also include the signature of the chunk.
+
+    Chunks:
+    ${prepareChunks(chunks)}
+    `;
+    // console_log(prompt);
+    const result = await retryOn429(() => summarizer.translate(prompt));
+    if (!result) {
+        console_log(
+            `  [Failed to summarize chunks for ${chunks.length} chunks]`,
+        );
+        return;
+    }
+
+    const summarizeSpecs = result;
+    // console_log(`  [Received ${result.summaries.length} summaries]`);
+    // Enter them into the database
+    const db = context.queryContext!.database!;
+    const prepInsertSummary = db.prepare(
+        `INSERT OR REPLACE INTO Summaries (chunkId, language, summary, signature) VALUES (?, ?, ?, ?)`,
+    );
+    const prepGetBlobWithBreadcrumb = db.prepare(
+        `SELECT lines, breadcrumb FROM Blobs WHERE breadcrumb = ?`,
+    );
+    const prepUpdateBlob = db.prepare(
+        "UPDATE Blobs SET lines = ? WHERE breadcrumb = ?",
+    );
+    let errors = 0;
+    for (const summary of summarizeSpecs.summaries) {
+        // console_log(summary);
+        try {
+            prepInsertSummary.run(
+                summary.chunkId,
+                summary.language,
+                summary.summary,
+                summary.signature,
+            );
+        } catch (error) {
+            console_log(
+                `*** Db error for insert summary ${JSON.stringify(summary)}: ${error}`,
+            );
+            errors += 1;
+        }
+        try {
+            type BlobRowType = { lines: string; breadcrumb: ChunkId };
+            const blobRow: BlobRowType = prepGetBlobWithBreadcrumb.get(
+                summary.chunkId,
+            ) as any;
+            if (blobRow) {
+                let blobLines: string = blobRow.lines;
+                // Assume it doesn't start with a blank line /(^\s*\r?\n)*/
+                const indent = blobLines?.match(/^(\s*)\S/)?.[1] ?? ""; // Whitespace followed by non-whitespace
+                blobLines =
+                    `${indent}${languageCommentMap[summary.language ?? "python"]} ${summary.summary}\n` +
+                    `${indent}${summary.signature} ...\n`;
+                // console_log(
+                //     `  [Replacing\n'''\n${blobRow.lines}'''\nwith\n'''\n${blobLines}\n''']`,
+                // );
+                const res = prepUpdateBlob.run(blobLines, summary.chunkId);
+                if (res.changes !== 1) {
+                    console_log(
+                        `  [*** Failed to update blob lines for ${summary.chunkId}]`,
+                    );
+                }
+            }
+        } catch (error) {
+            console_log(
+                `*** Db error for update blob ${JSON.stringify(summary)}: ${error}`,
+            );
+            errors += 1;
+        }
+    }
+    if (errors) console_log(`  [${errors} errors]`);
+}
+
+export function prepareChunks(chunks: Chunk[]): string {
+    chunks.sort(
+        // Sort by file name and chunk ID (should order by line number)
+        (a, b) => {
+            let cmp = a.fileName.localeCompare(b.fileName);
+            if (!cmp) {
+                cmp = a.lineNo - b.lineNo;
+            }
+            return cmp;
+        },
+    );
+    const output: string[] = [];
+    function put(line: string): void {
+        // console_log(line.trimEnd());
+        output.push(line);
+    }
+    let lastFn = "";
+    let lineNo = 0;
+    for (const chunk of chunks) {
+        if (chunk.fileName !== lastFn) {
+            lastFn = chunk.fileName;
+            lineNo = 0;
+            put("\n");
+            put(`** file=${chunk.fileName}\n`);
+        }
+        put(
+            `* chunkId=${chunk.chunkId} kind=${chunk.treeName} name=${chunk.codeName}\n`,
+        );
+        for (const blob of chunk.blobs) {
+            lineNo = blob.start;
+            for (const line of blob.lines) {
+                lineNo += 1;
+                put(`${lineNo} ${line}`);
+            }
+        }
+    }
+    return output.join("");
+}
+
+// TODO: Remove export once we're using summaries again.
+export function prepareSummaries(db: sqlite.Database): string {
+    const selectAllSummaries = db.prepare(`SELECT * FROM Summaries`);
+    const summaryRows: any[] = selectAllSummaries.all();
+    if (summaryRows.length > 100) {
+        console_log(`  [Over 100 summary rows, skipping summaries in prompt]`);
+        return "";
+    }
+    const lines: string[] = [];
+    for (const summaryRow of summaryRows) {
+        const comment = languageCommentMap[summaryRow.language] ?? "#";
+        lines.push("");
+        lines.push(`${comment} ${summaryRow.summary}`);
+        lines.push(summaryRow.signature);
+    }
+    return lines.join("\n");
+}
+
+// TODO: Make the values two elements, comment start and comment end
+// (and then caller should ensure comment end doesn't occur in the comment text).
+const languageCommentMap: { [key: string]: string } = {
+    python: "#",
+    typescript: "//",
+};
diff --git a/ts/packages/agents/spelunker/src/typescriptChunker.ts b/ts/packages/agents/spelunker/src/typescriptChunker.ts
index 05b9a56fe..25f649f45 100644
--- a/ts/packages/agents/spelunker/src/typescriptChunker.ts
+++ b/ts/packages/agents/spelunker/src/typescriptChunker.ts
@@ -1,6 +1,8 @@
 // Copyright (c) Microsoft Corporation.
 // Licensed under the MIT License.
 
+import path from "path";
+
 import ts from "typescript";
 
 import { tsCode } from "code-processor";
@@ -12,7 +14,7 @@ import {
     ChunkedFile,
     ChunkerErrorItem,
 } from "./chunkSchema.js";
-import path from "path";
+import { console_log } from "./logging.js";
 
 let last_ts = Date.now() * 1000;
 export function generate_id(): ChunkId {
@@ -27,10 +29,10 @@ export function generate_id(): ChunkId {
 export async function chunkifyTypeScriptFiles(
     fileNames: string[],
 ): Promise<(ChunkedFile | ChunkerErrorItem)[]> {
-    // console.log("========================================================");
+    // console_log("========================================================");
     const results: (ChunkedFile | ChunkerErrorItem)[] = [];
     for (const fileName of fileNames) {
-        // console.log(fileName);
+        // console_log(fileName);
         const sourceFile: ts.SourceFile = await tsCode.loadSourceFile(fileName);
 
         const baseName = path.basename(fileName);
@@ -76,7 +78,7 @@ export async function chunkifyTypeScriptFiles(
                     ts.isFunctionDeclaration(childNode) ||
                     ts.isClassDeclaration(childNode)
                 ) {
-                    // console.log(
+                    // console_log(
                     //     ts.SyntaxKind[childNode.kind],
                     //     tsCode.getStatementName(childNode),
                     // );
@@ -156,7 +158,7 @@ function spliceBlobs(parentChunk: Chunk, childChunk: Chunk): void {
         blobs.push({ start: startBefore, lines: linesBefore });
     }
     const sig: string = signature(childChunk);
-    // console.log("signature", sig);
+    // console_log("signature", sig);
     if (sig) {
         blobs.push({
             start: childBlob.start,
@@ -209,7 +211,7 @@ function makeBlobs(
         startPos = lineStarts[startLoc.line + 1];
         startLoc = sourceFile.getLineAndCharacterOfPosition(startPos);
     }
-    // console.log(
+    // console_log(
     //     `Start and end: ${startPos}=${startLoc.line + 1}:${startLoc.character}, ` +
     //         `${endPos}=${endLoc.line + 1}:${endLoc.character}`,
     // );
@@ -217,7 +219,7 @@ function makeBlobs(
         startPos = lineStarts[startLoc.line + 1];
         startLoc = sourceFile.getLineAndCharacterOfPosition(startPos);
     }
-    // console.log(
+    // console_log(
     //     `Updated start: ${startPos}=${startLoc.line + 1}:${startLoc.character}`,
     // );
     const lines: string[] = [];
@@ -229,7 +231,7 @@ function makeBlobs(
     while (lines && !lines[lines.length - 1].trim()) {
         lines.pop();
     }
-    // console.log(lines.slice(0, 3), "...", lines.slice(-3));
+    // console_log(lines.slice(0, 3), "...", lines.slice(-3));
     if (!lines.length) {
         return [];
     }
@@ -251,6 +253,6 @@ export class Testing {
             "./packages/agents/spelunker/src/pythonChunker.ts",
         ];
         const results = await chunkifyTypeScriptFiles(fileNames);
-        console.log(JSON.stringify(results, null, 2));
+        console_log(JSON.stringify(results, null, 2));
     }
 }