DevTools encoding supports multibyte characters (e.g. "🟩") (#22424)

Changes our text encoding approach to properly support multibyte characters following this algorithm. Based on benchmarking, this new approach is roughly equivalent in terms of performance (sometimes slightly faster, sometimes slightly slower). I also considered using TextEncoder/TextDecoder for this, but it was much slower (~85%).
2026-01-15 12:15:22 +00:00 · 2021-09-27 13:34:39 -04:00
parent c88fb49d37
commit 2e41568313
3 changed files with 76 additions and 22 deletions
--- a/packages/react-devtools-shared/src/tests/store-test.js
+++ b/packages/react-devtools-shared/src/tests/store-test.js
@@ -101,6 +101,19 @@ describe('Store', () => {
    `);
  });

+  it('should handle multibyte character strings', () => {
+    const Component = () => null;
+    Component.displayName = '🟩💜🔵';
+
+    const container = document.createElement('div');
+
+    act(() => legacyRender(<Component />, container));
+    expect(store).toMatchInlineSnapshot(`
+      [root]
+          <🟩💜🔵>
+    `);
+  });
+
  describe('collapseNodesByDefault:false', () => {
    beforeEach(() => {
      store.collapseNodesByDefault = false;
--- a/packages/react-devtools-shared/src/backend/renderer.js
+++ b/packages/react-devtools-shared/src/backend/renderer.js
@@ -1514,11 +1514,16 @@ export function attach(

  type OperationsArray = Array<number>;

+  type StringTableEntry = {|
+    encodedString: Array<number>,
+    id: number,
+  |};
+
  const pendingOperations: OperationsArray = [];
  const pendingRealUnmountedIDs: Array<number> = [];
  const pendingSimulatedUnmountedIDs: Array<number> = [];
  let pendingOperationsQueue: Array<OperationsArray> | null = [];
-  const pendingStringTable: Map<string, number> = new Map();
+  const pendingStringTable: Map<string, StringTableEntry> = new Map();
  let pendingStringTableLength: number = 0;
  let pendingUnmountedRootID: number | null = null;

@@ -1736,13 +1741,19 @@ export function attach(
    // Now fill in the string table.
    // [stringTableLength, str1Length, ...str1, str2Length, ...str2, ...]
    operations[i++] = pendingStringTableLength;
-    pendingStringTable.forEach((value, key) => {
-      operations[i++] = key.length;
-      const encodedKey = utfEncodeString(key);
-      for (let j = 0; j < encodedKey.length; j++) {
-        operations[i + j] = encodedKey[j];
+    pendingStringTable.forEach((entry, stringKey) => {
+      const encodedString = entry.encodedString;
+
+      // Don't use the string length.
+      // It won't work for multibyte characters (like emoji).
+      const length = encodedString.length;
+
+      operations[i++] = length;
+      for (let j = 0; j < length; j++) {
+        operations[i + j] = encodedString[j];
      }
-      i += key.length;
+
+      i += length;
    });

    if (numUnmountIDs > 0) {
@@ -1789,21 +1800,31 @@ export function attach(
    pendingStringTableLength = 0;
  }

-  function getStringID(str: string | null): number {
-    if (str === null) {
+  function getStringID(string: string | null): number {
+    if (string === null) {
      return 0;
    }
-    const existingID = pendingStringTable.get(str);
-    if (existingID !== undefined) {
-      return existingID;
+    const existingEntry = pendingStringTable.get(string);
+    if (existingEntry !== undefined) {
+      return existingEntry.id;
    }
-    const stringID = pendingStringTable.size + 1;
-    pendingStringTable.set(str, stringID);
-    // The string table total length needs to account
-    // both for the string length, and for the array item
-    // that contains the length itself. Hence + 1.
-    pendingStringTableLength += str.length + 1;
-    return stringID;
+
+    const id = pendingStringTable.size + 1;
+    const encodedString = utfEncodeString(string);
+
+    pendingStringTable.set(string, {
+      encodedString,
+      id,
+    });
+
+    // The string table total length needs to account both for the string length,
+    // and for the array item that contains the length itself.
+    //
+    // Don't use string length for this table.
+    // It won't work for multibyte characters (like emoji).
+    pendingStringTableLength += encodedString.length + 1;
+
+    return id;
  }

  function recordMount(fiber: Fiber, parentFiber: Fiber | null) {
--- a/packages/react-devtools-shared/src/utils.js
+++ b/packages/react-devtools-shared/src/utils.js
@@ -138,17 +138,37 @@ export function utfDecodeString(array: Array<number>): string {
  return string;
 }

+function surrogatePairToCodePoint(
+  charCode1: number,
+  charCode2: number,
+): number {
+  return ((charCode1 & 0x3ff) << 10) + (charCode2 & 0x3ff) + 0x10000;
+}
+
+// Credit for this encoding approach goes to Tim Down:
+// https://stackoverflow.com/questions/4877326/how-can-i-tell-if-a-string-contains-multibyte-characters-in-javascript
 export function utfEncodeString(string: string): Array<number> {
  const cached = encodedStringCache.get(string);
  if (cached !== undefined) {
    return cached;
  }

-  const encoded = new Array(string.length);
-  for (let i = 0; i < string.length; i++) {
-    encoded[i] = string.codePointAt(i);
+  const encoded = [];
+  let i = 0;
+  let charCode;
+  while (i < string.length) {
+    charCode = string.charCodeAt(i);
+    // Handle multibyte unicode characters (like emoji).
+    if ((charCode & 0xf800) === 0xd800) {
+      encoded.push(surrogatePairToCodePoint(charCode, string.charCodeAt(++i)));
+    } else {
+      encoded.push(charCode);
+    }
+    ++i;
  }
+
  encodedStringCache.set(string, encoded);
+
  return encoded;
 }