-
Notifications
You must be signed in to change notification settings - Fork 10
/
onnx-image-demo.html
118 lines (99 loc) · 4.99 KB
/
onnx-image-demo.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
<!DOCTYPE html>
<html>
<head>
<title>OpenAI CLIP JavaScript - Image Demo - ONNX Web Runtime</title>
<script src="enable-threads.js"></script>
</head>
<body>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/ort.js"></script>
<div>
imgur.com url (ideally 224x224): <input id="imgUrlInput" value="https://i.imgur.com/RKsLoNB.png">
<!-- karpathy: https://i.imgur.com/WEIKDpX.jpg -->
<!-- 512px astronaut: https://i.imgur.com/ec4Ao4s.png -->
<br>
backend: <select id="backendSelectEl">
<option>wasm</option>
<option>webgl (doesn't work yet)</option>
</select>
<br>
quantized: <select id="quantizedSelectEl">
<option value="no">no</option>
<option value="yes">yes (4x smaller model, but currently the embeddings are inaccurate - see readme)</option>
</select>
<br>
<button id="startBtn" onclick="main()">start</button>
</div>
<p><a href="https://github.com/josephrocca/openai-clip-js">github repo</a> - <a href="https://huggingface.co/rocca/openai-clip-js/tree/main">huggingface repo</a></p>
<script>
if(self.crossOriginIsolated) { // needs to be cross-origin-isolated to use wasm threads. you need to serve this html file with these two headers: https://web.dev/coop-coep/
ort.env.wasm.numThreads = navigator.hardwareConcurrency
}
async function main() {
startBtn.disabled = true;
startBtn.innerHTML = "see console";
console.log("Downloading model... (see network tab for progress)");
// let modelPath = backendSelectEl.value === "webgl" ? './clip-image-vit-32-int32-float32.onnx' : './clip-image-vit-32-float32.onnx';
let modelPath = quantizedSelectEl.value === "no" ? 'https://huggingface.co/rocca/openai-clip-js/resolve/main/clip-image-vit-32-float32.onnx' : 'https://huggingface.co/rocca/openai-clip-js/resolve/main/clip-image-vit-32-uint8.onnx';
const session = await ort.InferenceSession.create(modelPath, { executionProviders: [backendSelectEl.value] });
console.log("Model loaded.");
// for console debugging:
window.session = session;
let rgbData = await getRgbData(imgUrlInput.value);
const feeds = {'input': new ort.Tensor('float32', rgbData, [1,3,224,224])};
console.log("Running inference...");
const results = await session.run(feeds);
console.log("Finished inference.");
const data = results["output"].data;
console.log(`data of result tensor 'output'`, data);
}
async function getRgbData(imgUrl, mode="resizeAndCenterCrop") {
let blob = await fetch(imgUrl, {referrer:""}).then(r => r.blob());
let img = await createImageBitmap(blob);
let canvas;
if(window.document) {
canvas = document.createElement("canvas");
canvas.width = 224;
canvas.height = 224;
} else {
new OffscreenCanvas(224, 224);
}
let ctx = canvas.getContext("2d");
if(mode === "resizeAndCenterCrop") {
// scale img to fit the shorter side to the canvas size
let scale = Math.max(canvas.width / img.width, canvas.height / img.height);
// compute new image dimensions that would maintain the original aspect ratio
let scaledW = img.width * scale;
let scaledH = img.height * scale;
// compute position to center the image
let posX = (canvas.width - scaledW) / 2;
let posY = (canvas.height - scaledH) / 2;
// draw the image centered and scaled on the canvas
ctx.drawImage(img, posX, posY, scaledW, scaledH);
} else if(mode === "squash") {
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
}
if(window.document) document.body.appendChild(canvas); // can be removed - just to visualize the crop
let imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
let rgbData = [[], [], []]; // [r, g, b]
// remove alpha and put into correct shape:
let d = imageData.data;
for(let i = 0; i < d.length; i += 4) {
let x = (i/4) % canvas.width;
let y = Math.floor((i/4) / canvas.width)
if(!rgbData[0][y]) rgbData[0][y] = [];
if(!rgbData[1][y]) rgbData[1][y] = [];
if(!rgbData[2][y]) rgbData[2][y] = [];
rgbData[0][y][x] = d[i+0]/255;
rgbData[1][y][x] = d[i+1]/255;
rgbData[2][y][x] = d[i+2]/255;
// From CLIP repo: Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711))
rgbData[0][y][x] = (rgbData[0][y][x] - 0.48145466) / 0.26862954;
rgbData[1][y][x] = (rgbData[1][y][x] - 0.4578275) / 0.26130258;
rgbData[2][y][x] = (rgbData[2][y][x] - 0.40821073) / 0.27577711;
}
rgbData = Float32Array.from(rgbData.flat().flat());
return rgbData;
}
</script>
</body>
</html>