mirror of
https://github.com/zebrajr/node.git
synced 2026-01-15 12:15:26 +00:00
string_decoder: rewrite implementation
This commit provides a rewrite of StringDecoder that both improves performance (for non-single-byte encodings) and understandability. Additionally, StringDecoder instantiation performance has increased considerably due to inlinability and more efficient encoding name checking. PR-URL: https://github.com/nodejs/node/pull/6777 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
This commit is contained in:
22
benchmark/string_decoder/string-decoder-create.js
Normal file
22
benchmark/string_decoder/string-decoder-create.js
Normal file
@@ -0,0 +1,22 @@
|
||||
'use strict';
|
||||
const common = require('../common.js');
|
||||
const StringDecoder = require('string_decoder').StringDecoder;
|
||||
|
||||
const bench = common.createBenchmark(main, {
|
||||
encoding: [
|
||||
'ascii', 'utf8', 'utf-8', 'base64', 'ucs2', 'UTF-8', 'AscII', 'UTF-16LE'
|
||||
],
|
||||
n: [25e6]
|
||||
});
|
||||
|
||||
function main(conf) {
|
||||
const encoding = conf.encoding;
|
||||
const n = conf.n | 0;
|
||||
|
||||
bench.start();
|
||||
for (var i = 0; i < n; ++i) {
|
||||
const sd = new StringDecoder(encoding);
|
||||
!!sd.encoding;
|
||||
}
|
||||
bench.end(n);
|
||||
}
|
||||
@@ -1,51 +1,79 @@
|
||||
'use strict';
|
||||
var common = require('../common.js');
|
||||
var StringDecoder = require('string_decoder').StringDecoder;
|
||||
const common = require('../common.js');
|
||||
const StringDecoder = require('string_decoder').StringDecoder;
|
||||
|
||||
var bench = common.createBenchmark(main, {
|
||||
encoding: ['ascii', 'utf8', 'base64-utf8', 'base64-ascii'],
|
||||
inlen: [32, 128, 1024],
|
||||
const bench = common.createBenchmark(main, {
|
||||
encoding: ['ascii', 'utf8', 'base64-utf8', 'base64-ascii', 'utf16le'],
|
||||
inlen: [32, 128, 1024, 4096],
|
||||
chunk: [16, 64, 256, 1024],
|
||||
n: [25e4]
|
||||
n: [25e5]
|
||||
});
|
||||
|
||||
var UTF_ALPHA = 'Blåbærsyltetøy';
|
||||
var ASC_ALPHA = 'Blueberry jam';
|
||||
const UTF8_ALPHA = 'Blåbærsyltetøy';
|
||||
const ASC_ALPHA = 'Blueberry jam';
|
||||
const UTF16_BUF = Buffer.from('Blåbærsyltetøy', 'utf16le');
|
||||
|
||||
function main(conf) {
|
||||
var encoding = conf.encoding;
|
||||
var inLen = conf.inlen | 0;
|
||||
var chunkLen = conf.chunk | 0;
|
||||
var n = conf.n | 0;
|
||||
const encoding = conf.encoding;
|
||||
const inLen = conf.inlen | 0;
|
||||
const chunkLen = conf.chunk | 0;
|
||||
const n = conf.n | 0;
|
||||
|
||||
var alpha;
|
||||
var chunks = [];
|
||||
var buf;
|
||||
const chunks = [];
|
||||
var str = '';
|
||||
var isBase64 = (encoding === 'base64-ascii' || encoding === 'base64-utf8');
|
||||
const isBase64 = (encoding === 'base64-ascii' || encoding === 'base64-utf8');
|
||||
var i;
|
||||
|
||||
if (encoding === 'ascii' || encoding === 'base64-ascii')
|
||||
alpha = ASC_ALPHA;
|
||||
else if (encoding === 'utf8' || encoding === 'base64-utf8')
|
||||
alpha = UTF_ALPHA;
|
||||
else
|
||||
alpha = UTF8_ALPHA;
|
||||
else if (encoding === 'utf16le') {
|
||||
buf = UTF16_BUF;
|
||||
str = Buffer.alloc(0);
|
||||
} else
|
||||
throw new Error('Bad encoding');
|
||||
|
||||
var sd = new StringDecoder(isBase64 ? 'base64' : encoding);
|
||||
const sd = new StringDecoder(isBase64 ? 'base64' : encoding);
|
||||
|
||||
for (i = 0; i < inLen; ++i) {
|
||||
if (i > 0 && (i % chunkLen) === 0 && !isBase64) {
|
||||
chunks.push(Buffer.from(str, encoding));
|
||||
str = '';
|
||||
if (alpha) {
|
||||
chunks.push(Buffer.from(str, encoding));
|
||||
str = '';
|
||||
} else {
|
||||
chunks.push(str);
|
||||
str = Buffer.alloc(0);
|
||||
}
|
||||
}
|
||||
if (alpha)
|
||||
str += alpha[i % alpha.length];
|
||||
else {
|
||||
var start = i;
|
||||
var end = i + 2;
|
||||
if (i % 2 !== 0) {
|
||||
++start;
|
||||
++end;
|
||||
}
|
||||
str = Buffer.concat([
|
||||
str,
|
||||
buf.slice(start % buf.length, end % buf.length)
|
||||
]);
|
||||
}
|
||||
str += alpha[i % alpha.length];
|
||||
}
|
||||
if (str.length > 0 && !isBase64)
|
||||
|
||||
if (!alpha) {
|
||||
if (str.length > 0)
|
||||
chunks.push(str);
|
||||
} else if (str.length > 0 && !isBase64)
|
||||
chunks.push(Buffer.from(str, encoding));
|
||||
|
||||
if (isBase64) {
|
||||
str = Buffer.from(str, 'utf8').toString('base64');
|
||||
while (str.length > 0) {
|
||||
var len = Math.min(chunkLen, str.length);
|
||||
const len = Math.min(chunkLen, str.length);
|
||||
chunks.push(Buffer.from(str.substring(0, len), 'utf8'));
|
||||
str = str.substring(len);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user