diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index f7bd93a98d05b5..8a445088b54aa8 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -340,53 +340,44 @@ void BindingData::EncodeUtf8String(const FunctionCallbackInfo& args) { size_t length = source->Length(); size_t utf8_length = 0; - bool is_one_byte = source->IsOneByte(); - - if (is_one_byte) { - // One-byte string (Latin1) - copy to buffer first, then process - MaybeStackBuffer latin1_buffer(length); - source->WriteOneByteV2(isolate, 0, length, latin1_buffer.out()); - - auto data = reinterpret_cast(latin1_buffer.out()); - - // Check if it's pure ASCII - if so, we can just copy - simdutf::result result = simdutf::validate_ascii_with_errors(data, length); - if (result.error == simdutf::SUCCESS) { - // Pure ASCII - direct copy - std::unique_ptr bs = ArrayBuffer::NewBackingStore( - isolate, length, BackingStoreInitializationMode::kUninitialized); - CHECK(bs); - memcpy(bs->Data(), data, length); - Local ab = ArrayBuffer::New(isolate, std::move(bs)); - args.GetReturnValue().Set(Uint8Array::New(ab, 0, length)); - return; - } - // Latin1 with non-ASCII characters - need conversion - utf8_length = simdutf::utf8_length_from_latin1(data, length); - std::unique_ptr bs = ArrayBuffer::NewBackingStore( - isolate, utf8_length, BackingStoreInitializationMode::kUninitialized); - CHECK(bs); - [[maybe_unused]] size_t written = simdutf::convert_latin1_to_utf8( - data, length, static_cast(bs->Data())); - DCHECK_EQ(written, utf8_length); - Local ab = ArrayBuffer::New(isolate, std::move(bs)); - args.GetReturnValue().Set(Uint8Array::New(ab, 0, utf8_length)); - return; + // Inspect the string's flat content directly to determine the encoding and + // the exact UTF-8 output size, without copying it out of the V8 heap. + // + // v8::String::ValueView holds a DisallowGarbageCollection scope, so it must + // be released before allocating the backing store below. Flattening is cached + // on the string, so re-acquiring the view for the conversion pass is cheap. + bool is_one_byte; + bool is_ascii = false; + bool is_well_formed = true; + { + v8::String::ValueView view(isolate, source); + is_one_byte = view.is_one_byte(); + if (is_one_byte) { + auto data = reinterpret_cast(view.data8()); + is_ascii = simdutf::validate_ascii_with_errors(data, length).error == + simdutf::SUCCESS; + utf8_length = + is_ascii ? length : simdutf::utf8_length_from_latin1(data, length); + } else { + auto data = reinterpret_cast(view.data16()); + is_well_formed = + simdutf::validate_utf16_with_errors(data, length).error == + simdutf::SUCCESS; + if (is_well_formed) { + utf8_length = simdutf::utf8_length_from_utf16(data, length); + } + } } - // Two-byte string (UTF-16) - copy to buffer first - MaybeStackBuffer utf16_buffer(length); - source->WriteV2(isolate, 0, length, utf16_buffer.out()); - - auto data = reinterpret_cast(utf16_buffer.out()); - - // Check for unpaired surrogates - simdutf::result validation_result = - simdutf::validate_utf16_with_errors(data, length); + // Rare path: two-byte string with unpaired surrogates. Copy into a mutable + // buffer, make it well-formed, then encode. + if (!is_well_formed) { + MaybeStackBuffer utf16_buffer(length); + source->WriteV2(isolate, 0, length, utf16_buffer.out()); + auto data = reinterpret_cast(utf16_buffer.out()); + simdutf::to_well_formed_utf16(data, length, data); - if (validation_result.error == simdutf::SUCCESS) { - // Valid UTF-16 - use the fast path utf8_length = simdutf::utf8_length_from_utf16(data, length); std::unique_ptr bs = ArrayBuffer::NewBackingStore( isolate, utf8_length, BackingStoreInitializationMode::kUninitialized); @@ -399,16 +390,30 @@ void BindingData::EncodeUtf8String(const FunctionCallbackInfo& args) { return; } - // Invalid UTF-16 with unpaired surrogates - convert to well-formed in place - simdutf::to_well_formed_utf16(data, length, data); - - utf8_length = simdutf::utf8_length_from_utf16(data, length); + // Common path: allocate the exact-size output, then re-acquire the flat + // content and encode directly into the backing store. std::unique_ptr bs = ArrayBuffer::NewBackingStore( isolate, utf8_length, BackingStoreInitializationMode::kUninitialized); CHECK(bs); - [[maybe_unused]] size_t written = simdutf::convert_utf16_to_utf8( - data, length, static_cast(bs->Data())); - DCHECK_EQ(written, utf8_length); + char* out = static_cast(bs->Data()); + { + v8::String::ValueView view(isolate, source); + if (is_one_byte) { + auto data = reinterpret_cast(view.data8()); + if (is_ascii) { + memcpy(out, data, length); + } else { + [[maybe_unused]] size_t written = + simdutf::convert_latin1_to_utf8(data, length, out); + DCHECK_EQ(written, utf8_length); + } + } else { + auto data = reinterpret_cast(view.data16()); + [[maybe_unused]] size_t written = + simdutf::convert_utf16_to_utf8(data, length, out); + DCHECK_EQ(written, utf8_length); + } + } Local ab = ArrayBuffer::New(isolate, std::move(bs)); args.GetReturnValue().Set(Uint8Array::New(ab, 0, utf8_length)); }