Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions benchmark/count-utf8.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/* eslint-disable no-console */
import { utf8CountJs, WASM_AVAILABLE } from "../src/utils/utf8.ts";
import { getWasmError, utf8CountWasm } from "../src/utils/utf8-wasm.ts";

// @ts-ignore
import Benchmark from "benchmark";

// description
console.log("utf8CountJs - pure JS implementation");
console.log("utf8CountWasm - WebAssembly implementation");

// Show wasm status
console.log("=".repeat(60));
console.log("WebAssembly Status:");
console.log(` WASM_AVAILABLE: ${WASM_AVAILABLE}`);
if (WASM_AVAILABLE) {
console.log(" js-string-builtins: enabled");
} else {
const error = getWasmError();
console.log(` Error: ${error?.message || "unknown"}`);
if (error?.message?.includes("js-string") || error?.message?.includes("builtin")) {
console.log("\n js-string-builtins is enabled by default in Node.js 24+ (V8 13.6+).");
console.log(" For older versions, run with:");
console.log(" node --experimental-wasm-imported-strings node_modules/.bin/ts-node benchmark/count-utf8.ts");
}
}
console.log("=".repeat(60));

for (const baseStr of ["A", "あ", "🌏"]) {
const dataSet = [10, 30, 50, 100, 200, 500, 1000].map((n) => {
return baseStr.repeat(n);
});

for (const str of dataSet) {
const byteLength = utf8CountJs(str);

console.log(`\n## string "${baseStr}" (strLength=${str.length}, byteLength=${byteLength})\n`);

const suite = new Benchmark.Suite();

suite.add("utf8CountJs", () => {
utf8CountJs(str);
});

if (WASM_AVAILABLE) {
suite.add("utf8CountWasm", () => {
utf8CountWasm(str);
});
}

suite.on("cycle", (event: any) => {
console.log(String(event.target));
});

suite.run();
}
}
36 changes: 34 additions & 2 deletions benchmark/decode-string.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,32 @@
/* eslint-disable no-console */
import { utf8EncodeJs, utf8Count, utf8DecodeJs, utf8DecodeTD } from "../src/utils/utf8";
import { utf8EncodeJs, utf8Count, utf8DecodeJs, utf8DecodeTD, WASM_AVAILABLE } from "../src/utils/utf8.ts";
import { getWasmError, utf8DecodeWasm } from "../src/utils/utf8-wasm.ts";

// @ts-ignore
import Benchmark from "benchmark";

// description
console.log("utf8DecodeJs - pure JS implementation");
console.log("utf8DecodeTD - TextDecoder implementation");
console.log("utf8DecodeWasm - WebAssembly implementation");

// Show wasm status
console.log("=".repeat(60));
console.log("WebAssembly Status:");
console.log(` WASM_AVAILABLE: ${WASM_AVAILABLE}`);
if (WASM_AVAILABLE) {
console.log(" js-string-builtins: enabled");
} else {
const error = getWasmError();
console.log(` Error: ${error?.message || "unknown"}`);
if (error?.message?.includes("js-string") || error?.message?.includes("builtin")) {
console.log("\n js-string-builtins is enabled by default in Node.js 24+ (V8 13.6+).");
console.log(" For older versions, run with:");
console.log(" node --experimental-wasm-imported-strings node_modules/.bin/ts-node benchmark/decode-string.ts");
}
}
console.log("=".repeat(60));

for (const baseStr of ["A", "あ", "🌏"]) {
const dataSet = [10, 100, 500, 1_000].map((n) => {
return baseStr.repeat(n);
Expand All @@ -24,11 +47,20 @@ for (const baseStr of ["A", "あ", "🌏"]) {
}
});

suite.add("TextDecoder", () => {
suite.add("utf8DecodeTD", () => {
if (utf8DecodeTD(bytes, 0, byteLength) !== str) {
throw new Error("wrong result!");
}
});

if (WASM_AVAILABLE) {
suite.add("utf8DecodeWasm", () => {
if (utf8DecodeWasm(bytes, 0, byteLength) !== str) {
throw new Error("wrong result!");
}
});
}

suite.on("cycle", (event: any) => {
console.log(String(event.target));
});
Expand Down
34 changes: 32 additions & 2 deletions benchmark/encode-string.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,32 @@
/* eslint-disable no-console */
import { utf8EncodeJs, utf8Count, utf8EncodeTE } from "../src/utils/utf8";
import { utf8EncodeJs, utf8Count, utf8EncodeTE, WASM_AVAILABLE } from "../src/utils/utf8.ts";
import { getWasmError, utf8EncodeWasm } from "../src/utils/utf8-wasm.ts";

// @ts-ignore
import Benchmark from "benchmark";

// description
console.log("utf8EncodeJs - pure JS implementation");
console.log("utf8EncodeTE - TextEncoder implementation");
console.log("utf8EncodeWasm - WebAssembly implementation");

// Show wasm status
console.log("=".repeat(60));
console.log("WebAssembly Status:");
console.log(` WASM_AVAILABLE: ${WASM_AVAILABLE}`);
if (WASM_AVAILABLE) {
console.log(" js-string-builtins: enabled");
} else {
const error = getWasmError();
console.log(` Error: ${error?.message || "unknown"}`);
if (error?.message?.includes("js-string") || error?.message?.includes("builtin")) {
console.log("\n js-string-builtins is enabled by default in Node.js 24+ (V8 13.6+).");
console.log(" For older versions, run with:");
console.log(" node --experimental-wasm-imported-strings node_modules/.bin/ts-node benchmark/encode-string.ts");
}
}
console.log("=".repeat(60));

for (const baseStr of ["A", "あ", "🌏"]) {
const dataSet = [10, 30, 50, 100].map((n) => {
return baseStr.repeat(n);
Expand All @@ -21,9 +44,16 @@ for (const baseStr of ["A", "あ", "🌏"]) {
utf8EncodeJs(str, buffer, 0);
});

suite.add("utf8DecodeTE", () => {
suite.add("utf8EncodeTE", () => {
utf8EncodeTE(str, buffer, 0);
});

if (WASM_AVAILABLE) {
suite.add("utf8EncodeWasm", () => {
utf8EncodeWasm(str, buffer, 0);
});
}

suite.on("cycle", (event: any) => {
console.log(String(event.target));
});
Expand Down
2 changes: 1 addition & 1 deletion benchmark/key-decoder.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* eslint-disable no-console */
import { utf8EncodeJs, utf8Count, utf8DecodeJs } from "../src/utils/utf8";
import { utf8EncodeJs, utf8Count, utf8DecodeJs } from "../src/utils/utf8.ts";

// @ts-ignore
import Benchmark from "benchmark";
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"prepublishOnly": "npm run test:dist",
"clean": "rimraf build dist dist.*",
"test": "mocha 'test/**/*.test.ts'",
"test:wasm": "MSGPACK_WASM=force node --experimental-wasm-imported-strings node_modules/.bin/mocha 'test/**/*.test.ts'",
"test:dist": "npm run lint && npm run test && npm run test:deno",
"test:cover": "npm run cover:clean && npx nyc --no-clean npm run 'test' && npm run cover:report",
"test:node_with_strip_types": "node --experimental-strip-types test/deno_test.ts",
Expand Down
23 changes: 23 additions & 0 deletions src/utils/utf8-wasm-binary.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Auto-generated by wasm/build.sh - DO NOT EDIT MANUALLY
// Source: wasm/utf8.wat

export const wasmBinary = `
AGFzbQEAAAABNQhedwFgAW8Bf2ACb38Bf2ADb2QAfwF/YANkAH9/AWRvYAJ/ZAABf2ABfwFkAGADZA
B/fwFvAnsEDndhc206anMtc3RyaW5nBmxlbmd0aAABDndhc206anMtc3RyaW5nCmNoYXJDb2RlQXQA
Ag53YXNtOmpzLXN0cmluZxFpbnRvQ2hhckNvZGVBcnJheQADDndhc206anMtc3RyaW5nEWZyb21DaG
FyQ29kZUFycmF5AAQDBgUBAgUGBwUDAQABB1QGBm1lbW9yeQIACXV0ZjhDb3VudAAECnV0ZjhFbmNv
ZGUABRF1dGY4RGVjb2RlVG9BcnJheQAGCmFsbG9jQXJyYXkABw1hcnJheVRvU3RyaW5nAAgK9gUFaw
EEfyAAEAAhBANAIAEgBE9FBEAgACABEAEiA0GAAUkEfyACQQFqBSADQYAQSQR/IAJBAmoFIANB/7cD
TSADQYCwA09xBH8gAUEBaiEBIAJBBGoFIAJBA2oLCwshAiABQQFqIQEMAQsLIAILswICBH8BZAAgAS
ECIAAgABAAIgX7BwAiBkEAEAIaA0AgBCAFT0UEQCAGIAT7DQAiA0GAAUkEfyACIAM6AAAgAkEBagUg
A0GAEEkEfyACIANBBnZBwAFyOgAAIAJBAWogA0E/cUGAAXI6AAAgAkECagUgA0H/twNNIANBgLADT3
EEfyACIANBCnQgBiAEQQFqIgT7DQBqQYC4/xprIgNBEnZB8AFyOgAAIAJBAWogA0EMdkE/cUGAAXI6
AAAgAkECaiADQQZ2QT9xQYABcjoAACACQQNqIANBP3FBgAFyOgAAIAJBBGoFIAIgA0EMdkHgAXI6AA
AgAkEBaiADQQZ2QT9xQYABcjoAACACQQJqIANBP3FBgAFyOgAAIAJBA2oLCwshAiAEQQFqIQQMAQsL
IAIgAWsLvwIBA38DQCAAIAJLBEAgAi0AACIEQYABcUUEQCABIAMgBPsOACADQQFqIQMgAkEBaiECDA
ILIARB4AFxQcABRgRAIAEgAyACQQFqLQAAQT9xIARBH3FBBnRy+w4AIANBAWohAyACQQJqIQIMAgsg
BEHwAXFB4AFGBEAgASADIAJBAmotAABBP3EgBEEPcUEMdCACQQFqLQAAQT9xQQZ0cnL7DgAgA0EBai
EDIAJBA2ohAgwCCyAEQfgBcUHwAUYEQCABIAMgAkEDai0AAEE/cSAEQQdxQRJ0IAJBAWotAABBP3FB
DHRyIAJBAmotAABBP3FBBnRyckGAgARrIgRBCnZBgLADcvsOACABIANBAWoiAyAEQf8HcUGAuANy+w
4AIANBAWohAyACQQRqIQIMAgUgAkEBaiECDAILAAsLIAMLBwAgAPsHAAsKACAAIAEgAhADCw==
`;
170 changes: 170 additions & 0 deletions src/utils/utf8-wasm.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
/**
* WebAssembly-based UTF-8 string processing using js-string-builtins with GC arrays.
*
* Environment variables:
* - MSGPACK_WASM=force: Force wasm mode, throw error if wasm fails to load
* - MSGPACK_WASM=never: Disable wasm, always use pure JS
*
* This implementation uses WASM GC arrays with intoCharCodeArray/fromCharCodeArray
* for efficient bulk string operations instead of character-by-character processing.
*/

import { wasmBinary } from "./utf8-wasm-binary.ts";

// Check environment variable for wasm mode
declare const process: { env?: Record<string, string | undefined> } | undefined;

function getWasmMode(): "force" | "never" | "auto" {
try {
if (process?.env) {
const mode = process.env["MSGPACK_WASM"];
if (mode) {
switch (mode.toLowerCase()) {
case "force":
return "force";
case "never":
return "never";
default:
return "auto";
}
}
}
} catch {
// process may not be defined in browser
}
return "auto";
}

const WASM_MODE = getWasmMode();

// GC array type (opaque reference)
type I16Array = object;

interface WasmExports extends WebAssembly.Exports {
memory: WebAssembly.Memory;
utf8Count(str: string): number;
utf8Encode(str: string, offset: number): number;
utf8DecodeToArray(length: number, arr: I16Array): number;
allocArray(size: number): I16Array;
arrayToString(arr: I16Array, start: number, end: number): string;
}

let wasmInstance: WasmExports | null = null;
let wasmInitError: Error | null = null;

function base64ToBytes(base64: string): Uint8Array {
if (typeof atob === "function") {
const binary = atob(base64);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
return bytes;
}
// Node.js fallback
return new Uint8Array(Buffer.from(base64, "base64"));
}

function tryInitializeWasmInstance(): void {
if (WASM_MODE === "never") {
wasmInitError = new Error("MSGPACK_WASM=never: wasm disabled");
return;
}

try {
if (typeof WebAssembly === "undefined") {
throw new Error("WebAssembly not supported");
}

const bytes = base64ToBytes(wasmBinary);

// Requires js-string builtins support (Node.js 24+ / Chrome 130+ / Firefox 134+)
const module: WebAssembly.Module = new (WebAssembly.Module as any)(bytes, { builtins: ["js-string"] });

Check warning on line 82 in src/utils/utf8-wasm.ts

View workflow job for this annotation

GitHub Actions / lint

Unsafe construction of a(n) `any` typed value
const instance = new WebAssembly.Instance(module);
wasmInstance = instance.exports as WasmExports;
} catch (e) {
wasmInitError = e instanceof Error ? e : new Error(String(e));

if (WASM_MODE === "force") {
throw new Error(`MSGPACK_WASM=force but wasm failed to load: ${wasmInitError.message}`, { cause: wasmInitError });
}
}
}

tryInitializeWasmInstance();

/**
* Whether wasm is available and initialized.
*/
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
export const WASM_AVAILABLE = wasmInstance !== null;

export function getWasmError(): Error | null {
return wasmInitError;
}

export function getWasmExports(): WasmExports | null {
return wasmInstance;
}

/**
* Count UTF-8 byte length of a string.
*/
export function utf8CountWasm(str: string): number {
return wasmInstance!.utf8Count(str);
}

/**
* Encode string to UTF-8 bytes in the provided buffer.
* Returns the number of bytes written.
*/
export function utf8EncodeWasm(str: string, output: Uint8Array, outputOffset: number): number {
// Estimate max byte length without a full pass over the string.
// Each UTF-16 code unit can produce at most 3 UTF-8 bytes (BMP chars).
// Surrogate pairs (2 code units) produce 4 bytes, so 3 bytes/code unit is safe.
const maxByteLength = str.length * 3;

// Ensure wasm memory is large enough
const requiredPages = Math.ceil(maxByteLength / 65536);
const currentPages = wasmInstance!.memory.buffer.byteLength / 65536;

if (requiredPages > currentPages) {
wasmInstance!.memory.grow(requiredPages - currentPages);
}

// Encode to wasm memory (uses intoCharCodeArray for bulk char extraction)
const bytesWritten = wasmInstance!.utf8Encode(str, 0);

// Copy from wasm memory to output buffer
const wasmBytes = new Uint8Array(wasmInstance!.memory.buffer, 0, bytesWritten);
output.set(wasmBytes, outputOffset);

return bytesWritten;
}

/**
* Decode UTF-8 bytes to string.
* Uses GC arrays with fromCharCodeArray for efficient string creation.
*/
export function utf8DecodeWasm(bytes: Uint8Array, inputOffset: number, byteLength: number): string {
// Ensure wasm memory is large enough for UTF-8 input
const requiredPages = Math.ceil(byteLength / 65536);
const currentPages = wasmInstance!.memory.buffer.byteLength / 65536;

if (requiredPages > currentPages) {
wasmInstance!.memory.grow(requiredPages - currentPages);
}

// Copy UTF-8 bytes to wasm linear memory at offset 0
const wasmBytes = new Uint8Array(wasmInstance!.memory.buffer, 0, byteLength);
wasmBytes.set(bytes.subarray(inputOffset, inputOffset + byteLength));

// Allocate GC array for UTF-16 output (max size = byteLength for ASCII)
const arr = wasmInstance!.allocArray(byteLength);

// Decode UTF-8 to UTF-16 in GC array
const codeUnits = wasmInstance!.utf8DecodeToArray(byteLength, arr);

// Create string directly from GC array using fromCharCodeArray
return wasmInstance!.arrayToString(arr, 0, codeUnits);
}
Loading