diff --git a/lib/fs.js b/lib/fs.js index 3ccc2b31c8..415d92d368 100644 --- a/lib/fs.js +++ b/lib/fs.js @@ -637,17 +637,8 @@ sys.inherits(ReadStream, events.EventEmitter); fs.FileReadStream = fs.ReadStream; // support the legacy name ReadStream.prototype.setEncoding = function (encoding) { - var Utf8Decoder = require("utf8decoder").Utf8Decoder; // lazy load - var self = this; - this._encoding = encoding.toLowerCase(); - if (this._encoding == 'utf-8' || this._encoding == 'utf8') { - this._decoder = new Utf8Decoder(); - this._decoder.onString = function(str) { - self.emit('data', str); - }; - } else if (this._decoder) { - delete this._decoder; - } + var StringDecoder = require("string_decoder").StringDecoder; // lazy load + this._decoder = new StringDecoder(encoding); }; @@ -707,13 +698,11 @@ ReadStream.prototype._read = function () { ReadStream.prototype._emitData = function (d) { - if (!this._encoding) { - this.emit('data', d); - } else if (this._decoder) { - this._decoder.write(d); + if (this._decoder) { + var string = this._decoder.write(d); + if (string.length) this.emit('data', string); } else { - var string = d.toString(this._encoding, 0, d.length); - this.emit('data', string); + this.emit('data', d); } }; diff --git a/lib/http.js b/lib/http.js index 1cf9ecaf1e..b5a9a1bdcb 100644 --- a/lib/http.js +++ b/lib/http.js @@ -9,7 +9,6 @@ if (debugLevel & 0x4) { } var net = require('net'); -var Utf8Decoder = require('utf8decoder').Utf8Decoder; var events = require('events'); var Buffer = require('buffer').Buffer; @@ -93,14 +92,12 @@ var parsers = new FreeList('parsers', 1000, function () { parser.onBody = function (b, start, len) { // TODO body encoding? - var enc = parser.incoming._encoding; - if (!enc) { - parser.incoming.emit('data', b.slice(start, start+len)); - } else if (this._decoder) { - this._decoder.write(pool.slice(start, end)); + var slice = b.slice(start, start+len); + if (parser.incoming._decoder) { + var string = parser.incoming._decoder.write(slice); + if (string.length) parser.incoming.emit('data', string); } else { - var string = b.toString(enc, start, start+len); - parser.incoming.emit('data', string); + parser.incoming.emit('data', slice); } }; @@ -217,18 +214,9 @@ IncomingMessage.prototype.setBodyEncoding = function (enc) { this.setEncoding(enc); }; -IncomingMessage.prototype.setEncoding = function (enc) { - // TODO check values, error out on bad, and deprecation message? - this._encoding = enc.toLowerCase(); - if (this._encoding == 'utf-8' || this._encoding == 'utf8') { - this._decoder = new Utf8Decoder(); - this._decoder.onString = function(str) { - this.emit('data', str); - }; - } else if (this._decoder) { - delete this._decoder; - } - +IncomingMessage.prototype.setEncoding = function (encoding) { + var StringDecoder = require("string_decoder").StringDecoder; // lazy load + this._decoder = new StringDecoder(encoding); }; IncomingMessage.prototype.pause = function () { diff --git a/lib/net.js b/lib/net.js index ac5f56825c..716326a57b 100644 --- a/lib/net.js +++ b/lib/net.js @@ -1,6 +1,5 @@ var sys = require("sys"); var fs = require("fs"); -var Utf8Decoder = require("utf8decoder").Utf8Decoder; var events = require("events"); var dns = require('dns'); @@ -500,20 +499,20 @@ function initStream (self) { var end = pool.used + bytesRead; pool.used += bytesRead; - if (!self._encoding) { + if (self._decoder) { + // emit String + var string = self._decoder.write(pool.slice(start, end)); + if (string.length) self.emit('data', string); + } else { + // emit buffer if (self._events && self._events['data']) { // emit a slice self.emit('data', pool.slice(start, end)); } - - // Optimization: emit the original buffer with end points - if (self.ondata) self.ondata(pool, start, end); - } else if (this._decoder) { - this._decoder.write(pool.slice(start, end)); - } else { - var string = pool.toString(self._encoding, start, end); - self.emit('data', string); } + + // Optimization: emit the original buffer with end points + if (self.ondata) self.ondata(pool, start, end); } }; self.readable = false; @@ -828,18 +827,9 @@ Stream.prototype._writeQueueLast = function () { }; -Stream.prototype.setEncoding = function (enc) { - var self = this; - // TODO check values, error out on bad, and deprecation message? - this._encoding = enc.toLowerCase(); - if (this._encoding == 'utf-8' || this._encoding == 'utf8') { - this._decoder = new Utf8Decoder(); - this._decoder.onString = function(str) { - self.emit('data', str); - }; - } else if (this._decoder) { - delete this._decoder; - } +Stream.prototype.setEncoding = function (encoding) { + var StringDecoder = require("string_decoder").StringDecoder; // lazy load + this._decoder = new StringDecoder(encoding); }; diff --git a/lib/utf8decoder.js b/lib/string_decoder.js similarity index 67% rename from lib/utf8decoder.js rename to lib/string_decoder.js index cd2bb9672e..2c7ace0ae5 100644 --- a/lib/utf8decoder.js +++ b/lib/string_decoder.js @@ -1,12 +1,21 @@ var Buffer = require('buffer').Buffer; -var Utf8Decoder = exports.Utf8Decoder = function() { - this.charBuffer = new Buffer(4); - this.charReceived = 0; - this.charLength = 0; +var StringDecoder = exports.StringDecoder = function (encoding) { + this.encoding = (encoding || 'utf8').toLowerCase().replace(/[-_]/,''); + if (this.encoding === 'utf8') { + this.charBuffer = new Buffer(4); + this.charReceived = 0; + this.charLength = 0; + } }; -Utf8Decoder.prototype.write = function(buffer) { + +StringDecoder.prototype.write = function (buffer) { + // If not utf8... + if (this.encoding !== 'utf8') { + return buffer.toString(this.encoding); + } + var charStr = ''; // if our last write ended with an incomplete multibyte character if (this.charLength) { @@ -21,28 +30,23 @@ Utf8Decoder.prototype.write = function(buffer) { if (this.charReceived < this.charLength) { // still not enough chars in this buffer? wait for more ... - return; + return ''; } // get the character that was split charStr = this.charBuffer.slice(0, this.charLength).toString(); this.charReceived = this.charLength = 0; - if (i == buffer.length) { - // if there are no more bytes in this buffer, just emit our char - this.onString(charStr) - return; - } + // if there are no more bytes in this buffer, just emit our char + if (i == buffer.length) return charStr; - // otherwise cut of the characters end from the beginning of this buffer + // otherwise cut off the characters end from the beginning of this buffer buffer = buffer.slice(i, buffer.length); } // determine how many bytes we have to check at the end of this buffer - var i = (buffer.length >= 3) - ? 3 - : buffer.length; + var i = (buffer.length >= 3) ? 3 : buffer.length; // figure out if one of the last i bytes of our buffer announces an incomplete char for (; i > 0; i--) { @@ -71,8 +75,7 @@ Utf8Decoder.prototype.write = function(buffer) { if (!this.charLength) { // no incomplete char at the end of this buffer, emit the whole thing - this.onString(charStr+buffer.toString()); - return; + return charStr + buffer.toString(); } // buffer the incomplete character bytes we got @@ -81,9 +84,9 @@ Utf8Decoder.prototype.write = function(buffer) { if (buffer.length - i > 0) { // buffer had more bytes before the incomplete char, emit them - this.onString(charStr+buffer.slice(0, buffer.length - i).toString()); - } else if (charStr) { - // or just emit the charStr if any - this.onString(charStr); + return charStr + buffer.toString('utf8', 0, buffer.length - i); } + + // or just emit the charStr + return charStr; }; diff --git a/src/node.cc b/src/node.cc index a83cd7b911..48ca941f48 100644 --- a/src/node.cc +++ b/src/node.cc @@ -1866,7 +1866,7 @@ static Handle Binding(const Arguments& args) { exports->Set(String::New("utils"), String::New(native_utils)); exports->Set(String::New("path"), String::New(native_path)); exports->Set(String::New("module"), String::New(native_module)); - exports->Set(String::New("utf8decoder"), String::New(native_utf8decoder)); + exports->Set(String::New("string_decoder"), String::New(native_string_decoder)); binding_cache->Set(module, exports); } diff --git a/test/simple/test-http-upgrade.js b/test/simple/test-http-upgrade.js index 8afe331d42..a1021e95d5 100644 --- a/test/simple/test-http-upgrade.js +++ b/test/simple/test-http-upgrade.js @@ -50,12 +50,6 @@ function testServer(){ sys.inherits(testServer, http.Server); -function testClient(){ - var conn = net.createConnection(PORT); - conn.setEncoding("utf8"); - return conn; -} - function writeReq(socket, data, encoding){ requests_sent++; socket.write(data); @@ -66,7 +60,8 @@ function writeReq(socket, data, encoding){ connection: Upgrade with listener -----------------------------------------------*/ function test_upgrade_with_listener(_server){ - var conn = new testClient(); + var conn = net.createConnection(PORT); + conn.setEncoding("utf8"); var state = 0; conn.addListener("connect", function () { @@ -79,10 +74,12 @@ function test_upgrade_with_listener(_server){ ); }); - conn.addListener("data", function(data){ + conn.addListener("data", function (data) { state++; - if(state == 1){ + assert.equal('string', typeof data); + + if(state == 1) { assert.equal("HTTP/1.1 101", data.substr(0, 12)); assert.equal("WjN}|M(6", request_upgradeHead.toString("utf8")); conn.write("test", "utf8"); @@ -106,7 +103,8 @@ function test_upgrade_with_listener(_server){ var test_upgrade_no_listener_ended = false; function test_upgrade_no_listener(){ - var conn = new testClient(); + var conn = net.createConnection(PORT); + conn.setEncoding("utf8"); conn.addListener("connect", function () { writeReq(conn, "GET / HTTP/1.1\r\nUpgrade: WebSocket\r\nConnection: Upgrade\r\n\r\n"); @@ -126,12 +124,15 @@ function test_upgrade_no_listener(){ connection: normal -----------------------------------------------*/ function test_standard_http(){ - var conn = new testClient(); + var conn = net.createConnection(PORT); + conn.setEncoding("utf8"); + conn.addListener("connect", function () { writeReq(conn, "GET / HTTP/1.1\r\n\r\n"); }); conn.addListener("data", function(data){ + assert.equal("string", typeof data); assert.equal("HTTP/1.1 200", data.substr(0, 12)); conn.end(); }); @@ -144,7 +145,7 @@ function test_standard_http(){ var server = createTestServer(); -server.listen(PORT, function(){ +server.listen(PORT, function () { // All tests get chained after this: test_upgrade_with_listener(server); }); diff --git a/test/simple/test-utf8-decoder.js b/test/simple/test-string-decoder.js similarity index 51% rename from test/simple/test-utf8-decoder.js rename to test/simple/test-string-decoder.js index fc15f224ed..fd5267537e 100644 --- a/test/simple/test-utf8-decoder.js +++ b/test/simple/test-string-decoder.js @@ -1,36 +1,30 @@ require('../common'); -var Utf8Decoder = require('utf8decoder').Utf8Decoder, - Buffer = require('buffer').Buffer, - decoder = new Utf8Decoder(), - buffer, - onStringCalled = 0; -decoder.onString = function(str) { - onStringCalled++; - assert.deepEqual(str, buffer.toString()); -}; +Buffer = require('buffer').Buffer; +StringDecoder = require('string_decoder').StringDecoder; +decoder = new StringDecoder('utf8'); + + buffer = new Buffer('$'); -decoder.write(buffer); -assert.equal(onStringCalled, 1); +assert.deepEqual('$', decoder.write(buffer)); buffer = new Buffer('¢'); -decoder.write(buffer.slice(0, 1)); -decoder.write(buffer.slice(1, 2)); -assert.equal(onStringCalled, 2); +assert.deepEqual('', decoder.write(buffer.slice(0, 1))); +assert.deepEqual('¢', decoder.write(buffer.slice(1, 2))); buffer = new Buffer('€'); -decoder.write(buffer.slice(0, 1)); -decoder.write(buffer.slice(1, 2)); -decoder.write(buffer.slice(2, 3)); -assert.equal(onStringCalled, 3); +assert.deepEqual('', decoder.write(buffer.slice(0, 1))); +assert.deepEqual('', decoder.write(buffer.slice(1, 2))); +assert.deepEqual('€', decoder.write(buffer.slice(2, 3))); buffer = new Buffer([0xF0, 0xA4, 0xAD, 0xA2]); -decoder.write(buffer.slice(0, 1)); -decoder.write(buffer.slice(1, 2)); -decoder.write(buffer.slice(2, 3)); -decoder.write(buffer.slice(3, 4)); -assert.equal(onStringCalled, 4); +s = ''; +s += decoder.write(buffer.slice(0, 1)); +s += decoder.write(buffer.slice(1, 2)); +s += decoder.write(buffer.slice(2, 3)); +s += decoder.write(buffer.slice(3, 4)); +assert.ok(s.length > 0); // A mixed ascii and non-ascii string // Test stolen from deps/v8/test/cctest/test-strings.cc @@ -51,18 +45,16 @@ charLengths = [0, 0, 1, 2, 2, 2, 3, 4, 4, 4, 5, 5]; print('scanning '); for (var j = 2; j < buffer.length; j++) { for (var i = 1; i < j; i++) { - var decoder = new Utf8Decoder(); - var sum = ""; - decoder.onString = function (s) { sum += s; }; + var decoder = new StringDecoder('utf8'); - decoder.write(buffer.slice(0, i)); + var sum = decoder.write(buffer.slice(0, i)); // just check that we've received the right amount // after the first write assert.equal(charLengths[i], sum.length); - decoder.write(buffer.slice(i, j)); - decoder.write(buffer.slice(j, buffer.length)); + sum += decoder.write(buffer.slice(i, j)); + sum += decoder.write(buffer.slice(j, buffer.length)); assert.equal(expected, sum); print("."); }