From 6e169fc65da097629588132f8fec82d8a836afdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Markb=C3=A5ge?= Date: Wed, 3 Jul 2024 13:25:04 -0400 Subject: [PATCH] [Flight] Allow String Chunks to Passthrough in Node streams and renderToMarkup (#30131) It can be efficient to accept raw string chunks to pass through a stream instead of encoding them into a binary copy first. Previously our Flight parsers didn't accept receiving string chunks. That's partly because we sometimes need to encode binary chunks anyway so string only transport isn't enough but some chunks can be strings. This adds a partial ability for chunks to be received as strings. However, accepting strings comes with some downsides. E.g. if the strings are split up we need to buffer it which compromises the perf for the common case. If the chunk represents binary data, then we'd need to encode it back into a typed array which would require a TextEncoder dependency in the parser. If the string chunk represents a byte length encoded string we don't know how many unicode characters to read without measuring them in terms of binary - also requiring a TextEncoder. This PR is mainly intended for use for pass-through within the same memory. We can simplify the implementation by assuming that any string chunk is passed as the original chunk. This requires that the server stream config doesn't arbitrarily concatenate strings (e.g. large strings should not be concatenated which is probably a good heuristic anyway). It also means that this is not suitable to be used with for example receiving string chunks on the client by passing them through SSR hydration data - except if the encoding that way was only used with chunks that were already encoded as strings by Flight. Web streams mostly just work on binary data anyway so they can't use this. In Node.js streams we concatenate precomputed and small strings into larger buffers. It might make sense to do that using string ropes instead. However, in the meantime we can at least pass large strings that are outside our buffer view size as raw strings. There's no benefit to us eagerly encoding those. Also, let Node accept string chunks as long as they're following our expected constraints. This lets us test the mixed protocol using pass-throughs. This can also be useful when the RSC server is in the same environment as the SSR server as they don't have to go from strings to typed arrays back to strings. Now we can also use this in the pass-through used in renderToMarkup. This lets us avoid the dependency on TextDecoder/TextEncoder in that package. --- .../react-client/src/ReactFlightClient.js | 158 +++++++++++++++++- .../src/ReactHTMLLegacyClientStreamConfig.js | 14 +- packages/react-html/src/ReactHTMLServer.js | 6 +- .../src/ReactFlightDOMClientNode.js | 7 +- .../src/__tests__/ReactFlightDOMNode-test.js | 19 ++- .../src/ReactServerStreamConfigNode.js | 3 +- scripts/error-codes/codes.json | 4 +- 7 files changed, 186 insertions(+), 25 deletions(-) diff --git a/packages/react-client/src/ReactFlightClient.js b/packages/react-client/src/ReactFlightClient.js index 7d421f0422..3cb918d6c8 100644 --- a/packages/react-client/src/ReactFlightClient.js +++ b/packages/react-client/src/ReactFlightClient.js @@ -2121,7 +2121,7 @@ function resolveTypedArray( resolveBuffer(response, id, view); } -function processFullRow( +function processFullBinaryRow( response: Response, id: number, tag: number, @@ -2183,6 +2183,15 @@ function processFullRow( row += readPartialStringChunk(stringDecoder, buffer[i]); } row += readFinalStringChunk(stringDecoder, chunk); + processFullStringRow(response, id, tag, row); +} + +function processFullStringRow( + response: Response, + id: number, + tag: number, + row: string, +): void { switch (tag) { case 73 /* "I" */: { resolveModule(response, id, row); @@ -2385,7 +2394,7 @@ export function processBinaryChunk( // We found the last chunk of the row const length = lastIdx - i; const lastChunk = new Uint8Array(chunk.buffer, offset, length); - processFullRow(response, rowID, rowTag, buffer, lastChunk); + processFullBinaryRow(response, rowID, rowTag, buffer, lastChunk); // Reset state machine for a new row i = lastIdx; if (rowState === ROW_CHUNK_BY_NEWLINE) { @@ -2415,6 +2424,151 @@ export function processBinaryChunk( response._rowLength = rowLength; } +export function processStringChunk(response: Response, chunk: string): void { + // This is a fork of processBinaryChunk that takes a string as input. + // This can't be just any binary chunk coverted to a string. It needs to be + // in the same offsets given from the Flight Server. E.g. if it's shifted by + // one byte then it won't line up to the UCS-2 encoding. It also needs to + // be valid Unicode. Also binary chunks cannot use this even if they're + // value Unicode. Large strings are encoded as binary and cannot be passed + // here. Basically, only if Flight Server gave you this string as a chunk, + // you can use it here. + let i = 0; + let rowState = response._rowState; + let rowID = response._rowID; + let rowTag = response._rowTag; + let rowLength = response._rowLength; + const buffer = response._buffer; + const chunkLength = chunk.length; + while (i < chunkLength) { + let lastIdx = -1; + switch (rowState) { + case ROW_ID: { + const byte = chunk.charCodeAt(i++); + if (byte === 58 /* ":" */) { + // Finished the rowID, next we'll parse the tag. + rowState = ROW_TAG; + } else { + rowID = (rowID << 4) | (byte > 96 ? byte - 87 : byte - 48); + } + continue; + } + case ROW_TAG: { + const resolvedRowTag = chunk.charCodeAt(i); + if ( + resolvedRowTag === 84 /* "T" */ || + (enableBinaryFlight && + (resolvedRowTag === 65 /* "A" */ || + resolvedRowTag === 79 /* "O" */ || + resolvedRowTag === 111 /* "o" */ || + resolvedRowTag === 85 /* "U" */ || + resolvedRowTag === 83 /* "S" */ || + resolvedRowTag === 115 /* "s" */ || + resolvedRowTag === 76 /* "L" */ || + resolvedRowTag === 108 /* "l" */ || + resolvedRowTag === 71 /* "G" */ || + resolvedRowTag === 103 /* "g" */ || + resolvedRowTag === 77 /* "M" */ || + resolvedRowTag === 109 /* "m" */ || + resolvedRowTag === 86)) /* "V" */ + ) { + rowTag = resolvedRowTag; + rowState = ROW_LENGTH; + i++; + } else if ( + (resolvedRowTag > 64 && resolvedRowTag < 91) /* "A"-"Z" */ || + resolvedRowTag === 114 /* "r" */ || + resolvedRowTag === 120 /* "x" */ + ) { + rowTag = resolvedRowTag; + rowState = ROW_CHUNK_BY_NEWLINE; + i++; + } else { + rowTag = 0; + rowState = ROW_CHUNK_BY_NEWLINE; + // This was an unknown tag so it was probably part of the data. + } + continue; + } + case ROW_LENGTH: { + const byte = chunk.charCodeAt(i++); + if (byte === 44 /* "," */) { + // Finished the rowLength, next we'll buffer up to that length. + rowState = ROW_CHUNK_BY_LENGTH; + } else { + rowLength = (rowLength << 4) | (byte > 96 ? byte - 87 : byte - 48); + } + continue; + } + case ROW_CHUNK_BY_NEWLINE: { + // We're looking for a newline + lastIdx = chunk.indexOf('\n', i); + break; + } + case ROW_CHUNK_BY_LENGTH: { + if (rowTag !== 84) { + throw new Error( + 'Binary RSC chunks cannot be encoded as strings. ' + + 'This is a bug in the wiring of the React streams.', + ); + } + // For a large string by length, we don't know how many unicode characters + // we are looking for but we can assume that the raw string will be its own + // chunk. We add extra validation that the length is at least within the + // possible byte range it could possibly be to catch mistakes. + if (rowLength < chunk.length || chunk.length > rowLength * 3) { + throw new Error( + 'String chunks need to be passed in their original shape. ' + + 'Not split into smaller string chunks. ' + + 'This is a bug in the wiring of the React streams.', + ); + } + lastIdx = chunk.length; + break; + } + } + if (lastIdx > -1) { + // We found the last chunk of the row + if (buffer.length > 0) { + // If we had a buffer already, it means that this chunk was split up into + // binary chunks preceeding it. + throw new Error( + 'String chunks need to be passed in their original shape. ' + + 'Not split into smaller string chunks. ' + + 'This is a bug in the wiring of the React streams.', + ); + } + const lastChunk = chunk.slice(i, lastIdx); + processFullStringRow(response, rowID, rowTag, lastChunk); + // Reset state machine for a new row + i = lastIdx; + if (rowState === ROW_CHUNK_BY_NEWLINE) { + // If we're trailing by a newline we need to skip it. + i++; + } + rowState = ROW_ID; + rowTag = 0; + rowID = 0; + rowLength = 0; + buffer.length = 0; + } else if (chunk.length !== i) { + // The rest of this row is in a future chunk. We only support passing the + // string from chunks in their entirety. Not split up into smaller string chunks. + // We could support this by buffering them but we shouldn't need to for + // this use case. + throw new Error( + 'String chunks need to be passed in their original shape. ' + + 'Not split into smaller string chunks. ' + + 'This is a bug in the wiring of the React streams.', + ); + } + } + response._rowState = rowState; + response._rowID = rowID; + response._rowTag = rowTag; + response._rowLength = rowLength; +} + function parseModel(response: Response, json: UninitializedModel): T { return JSON.parse(json, response._fromJSON); } diff --git a/packages/react-html/src/ReactHTMLLegacyClientStreamConfig.js b/packages/react-html/src/ReactHTMLLegacyClientStreamConfig.js index 74b0503590..eaee8d4593 100644 --- a/packages/react-html/src/ReactHTMLLegacyClientStreamConfig.js +++ b/packages/react-html/src/ReactHTMLLegacyClientStreamConfig.js @@ -7,26 +7,22 @@ * @flow */ -// TODO: The legacy one should not use binary. +export type StringDecoder = null; -export type StringDecoder = TextDecoder; - -export function createStringDecoder(): StringDecoder { - return new TextDecoder(); +export function createStringDecoder(): null { + return null; } -const decoderOptions = {stream: true}; - export function readPartialStringChunk( decoder: StringDecoder, buffer: Uint8Array, ): string { - return decoder.decode(buffer, decoderOptions); + throw new Error('Not implemented.'); } export function readFinalStringChunk( decoder: StringDecoder, buffer: Uint8Array, ): string { - return decoder.decode(buffer); + throw new Error('Not implemented.'); } diff --git a/packages/react-html/src/ReactHTMLServer.js b/packages/react-html/src/ReactHTMLServer.js index 8937001633..6bea571338 100644 --- a/packages/react-html/src/ReactHTMLServer.js +++ b/packages/react-html/src/ReactHTMLServer.js @@ -26,7 +26,7 @@ import { import { createResponse as createFlightResponse, getRoot as getFlightRoot, - processBinaryChunk as processFlightBinaryChunk, + processStringChunk as processFlightStringChunk, close as closeFlight, } from 'react-client/src/ReactFlightClient'; @@ -80,12 +80,10 @@ export function renderToMarkup( options?: MarkupOptions, ): Promise { return new Promise((resolve, reject) => { - const textEncoder = new TextEncoder(); const flightDestination = { push(chunk: string | null): boolean { if (chunk !== null) { - // TODO: Legacy should not use binary streams. - processFlightBinaryChunk(flightResponse, textEncoder.encode(chunk)); + processFlightStringChunk(flightResponse, chunk); } else { closeFlight(flightResponse); } diff --git a/packages/react-server-dom-webpack/src/ReactFlightDOMClientNode.js b/packages/react-server-dom-webpack/src/ReactFlightDOMClientNode.js index d0fb59c51e..e7beb9586a 100644 --- a/packages/react-server-dom-webpack/src/ReactFlightDOMClientNode.js +++ b/packages/react-server-dom-webpack/src/ReactFlightDOMClientNode.js @@ -30,6 +30,7 @@ import { createResponse, getRoot, reportGlobalError, + processStringChunk, processBinaryChunk, close, } from 'react-client/src/ReactFlightClient'; @@ -79,7 +80,11 @@ function createFromNodeStream( : undefined, ); stream.on('data', chunk => { - processBinaryChunk(response, chunk); + if (typeof chunk === 'string') { + processStringChunk(response, chunk); + } else { + processBinaryChunk(response, chunk); + } }); stream.on('error', error => { reportGlobalError(response, error); diff --git a/packages/react-server-dom-webpack/src/__tests__/ReactFlightDOMNode-test.js b/packages/react-server-dom-webpack/src/__tests__/ReactFlightDOMNode-test.js index 6f6a825e5e..2de34cc1c4 100644 --- a/packages/react-server-dom-webpack/src/__tests__/ReactFlightDOMNode-test.js +++ b/packages/react-server-dom-webpack/src/__tests__/ReactFlightDOMNode-test.js @@ -27,6 +27,11 @@ let use; let ReactServerScheduler; let reactServerAct; +// We test pass-through without encoding strings but it should work without it too. +const streamOptions = { + objectMode: true, +}; + describe('ReactFlightDOMNode', () => { beforeEach(() => { jest.resetModules(); @@ -76,7 +81,7 @@ describe('ReactFlightDOMNode', () => { function readResult(stream) { return new Promise((resolve, reject) => { let buffer = ''; - const writable = new Stream.PassThrough(); + const writable = new Stream.PassThrough(streamOptions); writable.setEncoding('utf8'); writable.on('data', chunk => { buffer += chunk; @@ -128,7 +133,7 @@ describe('ReactFlightDOMNode', () => { const stream = await serverAct(() => ReactServerDOMServer.renderToPipeableStream(, webpackMap), ); - const readable = new Stream.PassThrough(); + const readable = new Stream.PassThrough(streamOptions); let response; stream.pipe(readable); @@ -160,7 +165,7 @@ describe('ReactFlightDOMNode', () => { }), ); - const readable = new Stream.PassThrough(); + const readable = new Stream.PassThrough(streamOptions); const stringResult = readResult(readable); const parsedResult = ReactServerDOMClient.createFromNodeStream(readable, { @@ -206,7 +211,7 @@ describe('ReactFlightDOMNode', () => { const stream = await serverAct(() => ReactServerDOMServer.renderToPipeableStream(buffers), ); - const readable = new Stream.PassThrough(); + const readable = new Stream.PassThrough(streamOptions); const promise = ReactServerDOMClient.createFromNodeStream(readable, { moduleMap: {}, moduleLoading: webpackModuleLoading, @@ -253,7 +258,7 @@ describe('ReactFlightDOMNode', () => { const stream = await serverAct(() => ReactServerDOMServer.renderToPipeableStream(, webpackMap), ); - const readable = new Stream.PassThrough(); + const readable = new Stream.PassThrough(streamOptions); let response; stream.pipe(readable); @@ -304,7 +309,7 @@ describe('ReactFlightDOMNode', () => { ), ); - const writable = new Stream.PassThrough(); + const writable = new Stream.PassThrough(streamOptions); rscStream.pipe(writable); controller.enqueue('hi'); @@ -349,7 +354,7 @@ describe('ReactFlightDOMNode', () => { ), ); - const readable = new Stream.PassThrough(); + const readable = new Stream.PassThrough(streamOptions); rscStream.pipe(readable); const result = await ReactServerDOMClient.createFromNodeStream(readable, { diff --git a/packages/react-server/src/ReactServerStreamConfigNode.js b/packages/react-server/src/ReactServerStreamConfigNode.js index 773c998610..fe03332618 100644 --- a/packages/react-server/src/ReactServerStreamConfigNode.js +++ b/packages/react-server/src/ReactServerStreamConfigNode.js @@ -63,7 +63,8 @@ function writeStringChunk(destination: Destination, stringChunk: string) { currentView = new Uint8Array(VIEW_SIZE); writtenBytes = 0; } - writeToDestination(destination, textEncoder.encode(stringChunk)); + // Write the raw string chunk and let the consumer handle the encoding. + writeToDestination(destination, stringChunk); return; } diff --git a/scripts/error-codes/codes.json b/scripts/error-codes/codes.json index 46600256b0..088bd5b33b 100644 --- a/scripts/error-codes/codes.json +++ b/scripts/error-codes/codes.json @@ -523,5 +523,7 @@ "535": "renderToMarkup should not have emitted Server References. This is a bug in React.", "536": "Cannot pass ref in renderToMarkup because they will never be hydrated.", "537": "Cannot pass event handlers (%s) in renderToMarkup because the HTML will never be hydrated so they can never get called.", - "538": "Cannot use state or effect Hooks in renderToMarkup because this component will never be hydrated." + "538": "Cannot use state or effect Hooks in renderToMarkup because this component will never be hydrated.", + "539": "Binary RSC chunks cannot be encoded as strings. This is a bug in the wiring of the React streams.", + "540": "String chunks need to be passed in their original shape. Not split into smaller string chunks. This is a bug in the wiring of the React streams." }