Skip to content

Commit

Permalink
Add code for interpolation search for asset manifest lookup
Browse files Browse the repository at this point in the history
  • Loading branch information
GregBrimble committed Feb 6, 2025
1 parent 2c2e760 commit 44c87aa
Show file tree
Hide file tree
Showing 8 changed files with 507 additions and 6 deletions.
5 changes: 5 additions & 0 deletions .changeset/stupid-turtles-hammer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@cloudflare/workers-shared": minor
---

chore: Adds analytics and code (zero-percent gated) for a new asset manifest search algorithm
13 changes: 13 additions & 0 deletions packages/workers-shared/asset-worker/crypto-polyfill.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { afterAll, beforeAll } from "vitest";

// Can be deleted once Node.js (where these tests run) version is bumped to one which includes this global :)

beforeAll(() => {
// @ts-expect-error will go away once Node.js is bumped
globalThis.crypto = require("crypto");
});

afterAll(() => {
// @ts-expect-error will go away once Node.js is bumped
delete globalThis.crypto;
});
Binary file not shown.
91 changes: 87 additions & 4 deletions packages/workers-shared/asset-worker/src/assets-manifest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,26 @@ export class AssetsManifest {
this.data = data;
}

async get(pathname: string) {
async getWithBinarySearch(pathname: string) {
const pathHash = await hashPath(pathname);
const entry = binarySearch(
new Uint8Array(this.data, HEADER_SIZE),
pathHash
);
return entry ? contentHashToKey(entry) : null;
}

async getWithInterpolationSearch(pathname: string) {
const pathHash = await hashPath(pathname);
const entry = interpolationSearch(
new Uint8Array(this.data, HEADER_SIZE),
pathHash
);
return entry ? contentHashToKey(entry) : null;
}
}

const hashPath = async (path: string) => {
export const hashPath = async (path: string) => {
const encoder = new TextEncoder();
const data = encoder.encode(path);
const hashBuffer = await crypto.subtle.digest(
Expand All @@ -32,7 +41,7 @@ const hashPath = async (path: string) => {
return new Uint8Array(hashBuffer, 0, PATH_HASH_SIZE);
};

const binarySearch = (
export const binarySearch = (
arr: Uint8Array,
searchValue: Uint8Array
): Uint8Array | false => {
Expand Down Expand Up @@ -67,7 +76,81 @@ const binarySearch = (
}
};

const compare = (a: Uint8Array, b: Uint8Array) => {
const uint8ArrayToNumber = (uint8Array: Uint8Array) => {
const dataView = new DataView(uint8Array.buffer, uint8Array.byteOffset);
return (dataView.getBigUint64(0) << 64n) + dataView.getBigUint64(8);
};

export const interpolationSearch = (
arr: Uint8Array,
searchValue: Uint8Array
) => {
if (arr.byteLength === 0) {
return false;
}
let low = 0;
let high = arr.byteLength / ENTRY_SIZE - 1;
if (high === low) {
const current = new Uint8Array(arr.buffer, arr.byteOffset, PATH_HASH_SIZE);
if (current.byteLength !== searchValue.byteLength) {
throw new TypeError(
"Search value and current value are of different lengths"
);
}
const cmp = compare(current, searchValue);
if (cmp === 0) {
return new Uint8Array(arr.buffer, arr.byteOffset, ENTRY_SIZE);
} else {
return false;
}
}
const searchValueNumber = uint8ArrayToNumber(searchValue);
while (low <= high) {
const lowValue = new Uint8Array(
arr.buffer,
arr.byteOffset + low * ENTRY_SIZE,
PATH_HASH_SIZE
);
const highValue = new Uint8Array(
arr.buffer,
arr.byteOffset + high * ENTRY_SIZE,
PATH_HASH_SIZE
);
const mid = Math.floor(
Number(
BigInt(low) +
(BigInt(high - low) *
(searchValueNumber - uint8ArrayToNumber(lowValue))) /
(uint8ArrayToNumber(highValue) - uint8ArrayToNumber(lowValue))
)
);
const current = new Uint8Array(
arr.buffer,
arr.byteOffset + mid * ENTRY_SIZE,
PATH_HASH_SIZE
);
if (current.byteLength !== searchValue.byteLength) {
throw new TypeError(
"Search value and current value are of different lengths"
);
}
const cmp = compare(current, searchValue);
if (cmp === 0) {
return new Uint8Array(
arr.buffer,
arr.byteOffset + mid * ENTRY_SIZE,
ENTRY_SIZE
);
} else if (cmp < 0) {
low = mid + 1;
} else {
high = mid - 1;
}
}
return false;
};

export const compare = (a: Uint8Array, b: Uint8Array) => {
if (a.byteLength < b.byteLength) {
return -1;
}
Expand Down
54 changes: 54 additions & 0 deletions packages/workers-shared/asset-worker/src/experiment-analytics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import type { ReadyAnalytics } from "./types";

// This will allow us to make breaking changes to the analytic schema
const VERSION = 1;

// When adding new columns please update the schema
type Data = {
// -- Indexes --
accountId?: number;
experimentName?: string;

// -- Doubles --
// double1 - The time it takes to read the manifest in milliseconds
manifestReadTime?: number;

// -- Blobs --
// blob1 - Manifest read method
manifestReadMethod?: string;
};

export class ExperimentAnalytics {
private data: Data = {};
private readyAnalytics?: ReadyAnalytics;

constructor(readyAnalytics?: ReadyAnalytics) {
this.readyAnalytics = readyAnalytics;
}

setData(newData: Partial<Data>) {
this.data = { ...this.data, ...newData };
}

getData(key: keyof Data) {
return this.data[key];
}

write() {
if (!this.readyAnalytics) {
return;
}

this.readyAnalytics.logEvent({
version: VERSION,
accountId: this.data.accountId,
indexId: this.data.experimentName,
doubles: [
this.data.manifestReadTime ?? -1, // double1
],
blobs: [
this.data.manifestReadMethod, // blob1
],
});
}
}
37 changes: 35 additions & 2 deletions packages/workers-shared/asset-worker/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { mockJaegerBinding } from "../../utils/tracing";
import { Analytics } from "./analytics";
import { AssetsManifest } from "./assets-manifest";
import { applyConfigurationDefaults } from "./configuration";
import { ExperimentAnalytics } from "./experiment-analytics";
import { decodePath, getIntent, handleRequest } from "./handler";
import { getAssetWithMetadataFromKV } from "./utils/kv";
import type {
Expand Down Expand Up @@ -39,6 +40,7 @@ export type Env = {
JAEGER: JaegerTracing;

ENVIRONMENT: Environment;
EXPERIMENT_ANALYTICS: ReadyAnalytics;
ANALYTICS: ReadyAnalytics;
COLO_METADATA: ColoMetadata;
UNSAFE_PERFORMANCE: UnsafePerformanceTimer;
Expand Down Expand Up @@ -212,7 +214,38 @@ export default class extends WorkerEntrypoint<Env> {
}

async unstable_exists(pathname: string): Promise<string | null> {
const assetsManifest = new AssetsManifest(this.env.ASSETS_MANIFEST);
return await assetsManifest.get(pathname);
const analytics = new ExperimentAnalytics(this.env.EXPERIMENT_ANALYTICS);
const performance = new PerformanceTimer(this.env.UNSAFE_PERFORMANCE);

const INTERPOLATION_EXPERIMENT_SAMPLE_RATE = 0;
let searchMethod: "binary" | "interpolation" = "binary";
if (Math.random() < INTERPOLATION_EXPERIMENT_SAMPLE_RATE) {
searchMethod = "interpolation";
}
analytics.setData({ manifestReadMethod: searchMethod });

if (
this.env.COLO_METADATA &&
this.env.VERSION_METADATA &&
this.env.CONFIG
) {
analytics.setData({
accountId: this.env.CONFIG.account_id,
experimentName: "manifest-read-timing",
});
}

const startTimeMs = performance.now();
try {
const assetsManifest = new AssetsManifest(this.env.ASSETS_MANIFEST);
if (searchMethod === "interpolation") {
return await assetsManifest.getWithInterpolationSearch(pathname);
} else {
return await assetsManifest.getWithBinarySearch(pathname);
}
} finally {
analytics.setData({ manifestReadTime: performance.now() - startTimeMs });
analytics.write();
}
}
}
Loading

0 comments on commit 44c87aa

Please sign in to comment.