mirror of
https://github.com/zebrajr/node.git
synced 2026-01-15 12:15:26 +00:00
The array grouping function relies on the width of the characters. It was not calculated correct so far, since it used the string length instead. This improves the unicode output by calculating the mono-spaced font width (other fonts might differ). PR-URL: https://github.com/nodejs/node/pull/31319 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Steven R Loomis <srloomis@us.ibm.com> Reviewed-By: Rich Trott <rtrott@gmail.com> Reviewed-By: Minwoo Jung <nodecorelab@gmail.com>
90 lines
3.8 KiB
JavaScript
90 lines
3.8 KiB
JavaScript
// Flags: --expose_internals
|
|
'use strict';
|
|
const common = require('../common');
|
|
|
|
if (!common.hasIntl)
|
|
common.skip('missing Intl');
|
|
|
|
const assert = require('assert');
|
|
const { getStringWidth } = require('internal/util/inspect');
|
|
|
|
// Test column width
|
|
|
|
// Ll (Lowercase Letter): LATIN SMALL LETTER A
|
|
assert.strictEqual(getStringWidth('a'), 1);
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x0061)), 1);
|
|
// Lo (Other Letter)
|
|
assert.strictEqual(getStringWidth('丁'), 2);
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x4E01)), 2);
|
|
// Surrogate pairs
|
|
assert.strictEqual(getStringWidth('\ud83d\udc78\ud83c\udfff'), 4);
|
|
assert.strictEqual(getStringWidth('👅'), 2);
|
|
// Cs (Surrogate): High Surrogate
|
|
assert.strictEqual(getStringWidth('\ud83d'), 1);
|
|
// Cs (Surrogate): Low Surrogate
|
|
assert.strictEqual(getStringWidth('\udc78'), 1);
|
|
// Cc (Control): NULL
|
|
assert.strictEqual(getStringWidth('\u0000'), 0);
|
|
// Cc (Control): BELL
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x0007)), 0);
|
|
// Cc (Control): LINE FEED
|
|
assert.strictEqual(getStringWidth('\n'), 0);
|
|
// Cf (Format): SOFT HYPHEN
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x00AD)), 1);
|
|
// Cf (Format): LEFT-TO-RIGHT MARK
|
|
// Cf (Format): RIGHT-TO-LEFT MARK
|
|
assert.strictEqual(getStringWidth('\u200Ef\u200F'), 1);
|
|
// Cn (Unassigned): Not a character
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x10FFEF)), 1);
|
|
// Cn (Unassigned): Not a character (but in a CJK range)
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x3FFEF)), 1);
|
|
// Mn (Nonspacing Mark): COMBINING ACUTE ACCENT
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x0301)), 0);
|
|
// Mc (Spacing Mark): BALINESE ADEG ADEG
|
|
// Chosen as its Canonical_Combining_Class is not 0, but is not a 0-width
|
|
// character.
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x1B44)), 1);
|
|
// Me (Enclosing Mark): COMBINING ENCLOSING CIRCLE
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x20DD)), 0);
|
|
|
|
// The following is an emoji sequence with ZWJ (zero-width-joiner). In some
|
|
// implementations, it is represented as a single glyph, in other
|
|
// implementations as a sequence of individual glyphs. By default, each
|
|
// component will be counted individually, since not a lot of systems support
|
|
// these fully.
|
|
// See https://www.unicode.org/reports/tr51/tr51-16.html#Emoji_ZWJ_Sequences
|
|
assert.strictEqual(getStringWidth('👩👩👧👧'), 8);
|
|
// TODO(BridgeAR): This should have a width of two and six. The heart contains
|
|
// the \uFE0F variation selector that indicates that it should be displayed as
|
|
// emoji instead of as text. Emojis are all full width characters when not being
|
|
// rendered as text.
|
|
// https://en.wikipedia.org/wiki/Variation_Selectors_(Unicode_block)
|
|
assert.strictEqual(getStringWidth('❤️'), 1);
|
|
assert.strictEqual(getStringWidth('👩❤️👩'), 5);
|
|
// The length of one is correct. It is an emoji treated as text.
|
|
assert.strictEqual(getStringWidth('❤'), 1);
|
|
|
|
// By default, unicode characters whose width is considered ambiguous will
|
|
// be considered half-width. For these characters, getStringWidth will return
|
|
// 1. In some contexts, however, it is more appropriate to consider them full
|
|
// width. By default, the algorithm will assume half width.
|
|
assert.strictEqual(getStringWidth('\u01d4'), 1);
|
|
|
|
// Control chars and combining chars are zero
|
|
assert.strictEqual(getStringWidth('\u200E\n\u220A\u20D2'), 1);
|
|
|
|
// Test that the fast path for ASCII characters yields results consistent
|
|
// with the 'slow' path.
|
|
for (let i = 0; i < 256; i++) {
|
|
const char = String.fromCharCode(i);
|
|
assert.strictEqual(
|
|
getStringWidth(char + '🎉'),
|
|
getStringWidth(char) + 2);
|
|
|
|
if (i < 32 || (i >= 127 && i < 160)) { // Control character
|
|
assert.strictEqual(getStringWidth(char), 0);
|
|
} else { // Regular ASCII character
|
|
assert.strictEqual(getStringWidth(char), 1);
|
|
}
|
|
}
|