diff options
author | 2023-09-21 07:10:07 -0700 | |
---|---|---|
committer | 2023-09-21 07:10:07 -0700 | |
commit | abfc10afeb73f9447e47929359d37f2b488c3c81 (patch) | |
tree | 6f17b77422194eb4aa829bd1e85b62ca1df333c7 | |
parent | a18ef053a42779138ab1b8bfbcba362ff65e04eb (diff) | |
download | bun-abfc10afeb73f9447e47929359d37f2b488c3c81.tar.gz bun-abfc10afeb73f9447e47929359d37f2b488c3c81.tar.zst bun-abfc10afeb73f9447e47929359d37f2b488c3c81.zip |
Revert "feat(encoding): support BOM detection (#5550)"
This reverts commit 5f66b4e729105286863a13955b1ed8897b45210e.
This caused test failures in text-encoder. cc @WingLim
-rw-r--r-- | src/bun.js/bindings/ZigGeneratedClasses.cpp | 18 | ||||
-rw-r--r-- | src/bun.js/bindings/generated_classes.zig | 4 | ||||
-rw-r--r-- | src/bun.js/webcore/encoding.classes.ts | 3 | ||||
-rw-r--r-- | src/bun.js/webcore/encoding.zig | 29 | ||||
-rw-r--r-- | test/js/web/encoding/text-decoder.test.js | 24 |
5 files changed, 8 insertions, 70 deletions
diff --git a/src/bun.js/bindings/ZigGeneratedClasses.cpp b/src/bun.js/bindings/ZigGeneratedClasses.cpp index b3451a058..b84c1cd16 100644 --- a/src/bun.js/bindings/ZigGeneratedClasses.cpp +++ b/src/bun.js/bindings/ZigGeneratedClasses.cpp @@ -26577,16 +26577,12 @@ JSC_DECLARE_CUSTOM_GETTER(TextDecoderPrototype__encodingGetterWrap); extern "C" JSC::EncodedJSValue TextDecoderPrototype__getFatal(void* ptr, JSC::JSGlobalObject* lexicalGlobalObject); JSC_DECLARE_CUSTOM_GETTER(TextDecoderPrototype__fatalGetterWrap); -extern "C" JSC::EncodedJSValue TextDecoderPrototype__getIgnoreBOM(void* ptr, JSC::JSGlobalObject* lexicalGlobalObject); -JSC_DECLARE_CUSTOM_GETTER(TextDecoderPrototype__ignoreBOMGetterWrap); - STATIC_ASSERT_ISO_SUBSPACE_SHARABLE(JSTextDecoderPrototype, JSTextDecoderPrototype::Base); static const HashTableValue JSTextDecoderPrototypeTableValues[] = { { "decode"_s, static_cast<unsigned>(JSC::PropertyAttribute::Function | JSC::PropertyAttribute::DOMJITFunction | PropertyAttribute::DontDelete), NoIntrinsic, { HashTableValue::DOMJITFunctionType, TextDecoderPrototype__decodeCallback, &DOMJITSignatureForTextDecoderPrototype__decode } }, { "encoding"_s, static_cast<unsigned>(JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::CustomAccessor | JSC::PropertyAttribute::DOMAttribute | PropertyAttribute::DontDelete), NoIntrinsic, { HashTableValue::GetterSetterType, TextDecoderPrototype__encodingGetterWrap, 0 } }, - { "fatal"_s, static_cast<unsigned>(JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::CustomAccessor | JSC::PropertyAttribute::DOMAttribute | PropertyAttribute::DontDelete), NoIntrinsic, { HashTableValue::GetterSetterType, TextDecoderPrototype__fatalGetterWrap, 0 } }, - { "ignoreBOM"_s, static_cast<unsigned>(JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::CustomAccessor | JSC::PropertyAttribute::DOMAttribute | PropertyAttribute::DontDelete), NoIntrinsic, { HashTableValue::GetterSetterType, TextDecoderPrototype__ignoreBOMGetterWrap, 0 } } + { "fatal"_s, static_cast<unsigned>(JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::CustomAccessor | JSC::PropertyAttribute::DOMAttribute | PropertyAttribute::DontDelete), NoIntrinsic, { HashTableValue::GetterSetterType, TextDecoderPrototype__fatalGetterWrap, 0 } } }; const ClassInfo JSTextDecoderPrototype::s_info = { "TextDecoder"_s, &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(JSTextDecoderPrototype) }; @@ -26674,18 +26670,6 @@ JSC_DEFINE_CUSTOM_GETTER(TextDecoderPrototype__fatalGetterWrap, (JSGlobalObject RELEASE_AND_RETURN(throwScope, result); } -JSC_DEFINE_CUSTOM_GETTER(TextDecoderPrototype__ignoreBOMGetterWrap, (JSGlobalObject * lexicalGlobalObject, EncodedJSValue thisValue, PropertyName attributeName)) -{ - auto& vm = lexicalGlobalObject->vm(); - Zig::GlobalObject* globalObject = reinterpret_cast<Zig::GlobalObject*>(lexicalGlobalObject); - auto throwScope = DECLARE_THROW_SCOPE(vm); - JSTextDecoder* thisObject = jsCast<JSTextDecoder*>(JSValue::decode(thisValue)); - JSC::EnsureStillAliveScope thisArg = JSC::EnsureStillAliveScope(thisObject); - JSC::EncodedJSValue result = TextDecoderPrototype__getIgnoreBOM(thisObject->wrapped(), globalObject); - RETURN_IF_EXCEPTION(throwScope, {}); - RELEASE_AND_RETURN(throwScope, result); -} - void JSTextDecoderPrototype::finishCreation(JSC::VM& vm, JSC::JSGlobalObject* globalObject) { Base::finishCreation(vm); diff --git a/src/bun.js/bindings/generated_classes.zig b/src/bun.js/bindings/generated_classes.zig index 50170f998..581d4a5f3 100644 --- a/src/bun.js/bindings/generated_classes.zig +++ b/src/bun.js/bindings/generated_classes.zig @@ -6862,9 +6862,6 @@ pub const JSTextDecoder = struct { if (@TypeOf(TextDecoder.getFatal) != GetterType) @compileLog("Expected TextDecoder.getFatal to be a getter"); - if (@TypeOf(TextDecoder.getIgnoreBOM) != GetterType) - @compileLog("Expected TextDecoder.getIgnoreBOM to be a getter"); - if (!JSC.is_bindgen) { @export(TextDecoder.constructor, .{ .name = "TextDecoderClass__construct" }); @export(TextDecoder.decode, .{ .name = "TextDecoderPrototype__decode" }); @@ -6872,7 +6869,6 @@ pub const JSTextDecoder = struct { @export(TextDecoder.finalize, .{ .name = "TextDecoderClass__finalize" }); @export(TextDecoder.getEncoding, .{ .name = "TextDecoderPrototype__getEncoding" }); @export(TextDecoder.getFatal, .{ .name = "TextDecoderPrototype__getFatal" }); - @export(TextDecoder.getIgnoreBOM, .{ .name = "TextDecoderPrototype__getIgnoreBOM" }); } } }; diff --git a/src/bun.js/webcore/encoding.classes.ts b/src/bun.js/webcore/encoding.classes.ts index 7114f210e..118dfd09e 100644 --- a/src/bun.js/webcore/encoding.classes.ts +++ b/src/bun.js/webcore/encoding.classes.ts @@ -16,9 +16,6 @@ export default [ fatal: { getter: "getFatal", }, - ignoreBOM: { - getter: "getIgnoreBOM", - }, decode: { fn: "decode", diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig index ca0f44e6a..8ffbd3fd0 100644 --- a/src/bun.js/webcore/encoding.zig +++ b/src/bun.js/webcore/encoding.zig @@ -559,13 +559,6 @@ pub const TextDecoder = struct { remainder = remainder[1..]; continue; }, - // BOM handling - 0xFEFF => { - buffer.ensureTotalCapacity(allocator, 1) catch unreachable; - buffer.items.ptr[buffer.items.len] = remainder[0]; - buffer.items.len += 1; - remainder = remainder[1..]; - }, // Is this an unpaired low surrogate or four-digit hex escape? else => { @@ -636,13 +629,8 @@ pub const TextDecoder = struct { }, EncodingLabel.@"UTF-8" => { const toUTF16 = if (stream) strings.toUTF16Alloc else strings.toUTF16AllocNoTrim; - const moved_buffer_slice_8 = if (!this.ignore_bom and buffer_slice.len > 3 and std.mem.eql(u8, &[_]u8{ '\xEF', '\xBB', '\xBF' }, buffer_slice[0..3])) - buffer_slice[3..] - else - buffer_slice; - if (this.fatal) { - if (toUTF16(default_allocator, moved_buffer_slice_8, true)) |result_| { + if (toUTF16(default_allocator, buffer_slice, true)) |result_| { if (result_) |result| { return ZigString.toExternalU16(result.ptr, result.len, globalThis); } @@ -661,7 +649,7 @@ pub const TextDecoder = struct { } } } else { - if (toUTF16(default_allocator, moved_buffer_slice_8, false)) |result_| { + if (toUTF16(default_allocator, buffer_slice, false)) |result_| { if (result_) |result| { return ZigString.toExternalU16(result.ptr, result.len, globalThis); } @@ -676,20 +664,15 @@ pub const TextDecoder = struct { } // Experiment: using mimalloc directly is slightly slower - return ZigString.init(moved_buffer_slice_8).toValueGC(globalThis); + return ZigString.init(buffer_slice).toValueGC(globalThis); }, EncodingLabel.@"UTF-16LE" => { - const moved_buffer_slice_16 = if (!this.ignore_bom and buffer_slice.len > 2 and std.mem.eql(u8, &[_]u8{ '\xFF', '\xFE' }, buffer_slice[0..2])) - buffer_slice[2..] - else - buffer_slice; - - if (std.mem.isAligned(@intFromPtr(moved_buffer_slice_16.ptr), @alignOf([*]const u16))) { - return this.decodeUTF16WithAlignment([]align(2) const u16, @as([]align(2) const u16, @alignCast(std.mem.bytesAsSlice(u16, moved_buffer_slice_16))), globalThis); + if (std.mem.isAligned(@intFromPtr(buffer_slice.ptr), @alignOf([*]const u16))) { + return this.decodeUTF16WithAlignment([]align(2) const u16, @as([]align(2) const u16, @alignCast(std.mem.bytesAsSlice(u16, buffer_slice))), globalThis); } - return this.decodeUTF16WithAlignment([]align(1) const u16, std.mem.bytesAsSlice(u16, moved_buffer_slice_16), globalThis); + return this.decodeUTF16WithAlignment([]align(1) const u16, std.mem.bytesAsSlice(u16, buffer_slice), globalThis); }, else => { globalThis.throwInvalidArguments("TextDecoder.decode set to unsupported encoding", .{}); diff --git a/test/js/web/encoding/text-decoder.test.js b/test/js/web/encoding/text-decoder.test.js index 3685a5f6d..dabdb0936 100644 --- a/test/js/web/encoding/text-decoder.test.js +++ b/test/js/web/encoding/text-decoder.test.js @@ -250,7 +250,7 @@ describe("TextDecoder", () => { it("constructor should set values", () => { const decoder = new TextDecoder("utf-8", { fatal: true, ignoreBOM: false }); expect(decoder.fatal).toBe(true); - expect(decoder.ignoreBOM).toBe(false); + // expect(decoder.ignoreBOM).toBe(false); // currently the getter for ignoreBOM doesn't work and always returns undefined }); it("should throw on invalid input", () => { @@ -265,28 +265,6 @@ describe("TextDecoder", () => { }); }); -describe("TextDecoder ignoreBOM", () => { - it.each([ - { - encoding: "utf-8", - bytes: [0xef, 0xbb, 0xbf, 0x61, 0x62, 0x63], - }, - { - encoding: "utf-16le", - bytes: [0xff, 0xfe, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00], - }, - ])("should ignoreBOM for: %o", ({ encoding, bytes }) => { - const BOM = "\uFEFF"; - const array = new Uint8Array(bytes); - - const decoder_ignore_bom = new TextDecoder(encoding, { ignoreBOM: true }); - expect(decoder_ignore_bom.decode(array)).toStrictEqual(`${BOM}abc`); - - const decoder_not_ignore_bom = new TextDecoder(encoding, { ignoreBOM: false }); - expect(decoder_not_ignore_bom.decode(array)).toStrictEqual("abc"); - }); -}); - it("truncated sequences", () => { const assert_equals = (a, b) => expect(a).toBe(b); |