1 files changed, 402 insertions, 109 deletions
diff --git a/src/bun.js/bindings/simdutf.h b/src/bun.js/bindings/simdutf.h
index 3514f117d..0a57a69f7 100644
--- a/src/bun.js/bindings/simdutf.h
+++ b/src/bun.js/bindings/simdutf.h
@@ -1,4 +1,4 @@
-/* auto-generated on 2023-01-02 15:43:33 -0500. Do not edit! */
+/* auto-generated on 2023-02-10 14:42:58 -0500. Do not edit! */
 // dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf.h
 /* begin file include/simdutf.h */
 #ifndef SIMDUTF_H
@@ -64,6 +64,48 @@
 #include <strings.h>
 #endif
 
+/**
+ * We want to check that it is actually a little endian system at
+ * compile-time.
+ */
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
+#define SIMDUTF_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#elif defined(_WIN32)
+#define SIMDUTF_IS_BIG_ENDIAN 0
+#else
+#if defined(__APPLE__) || defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
+#include <machine/endian.h>
+#elif defined(sun) || defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__)
+#include <sys/byteorder.h>
+#else  // defined(__APPLE__) || defined(__FreeBSD__)
+
+#ifdef __has_include
+#if __has_include(<endian.h>)
+#include <endian.h>
+#endif //__has_include(<endian.h>)
+#endif //__has_include
+
+#endif // defined(__APPLE__) || defined(__FreeBSD__)
+
+
+#ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__)
+#define SIMDUTF_IS_BIG_ENDIAN 0
+#endif
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define SIMDUTF_IS_BIG_ENDIAN 0
+#else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define SIMDUTF_IS_BIG_ENDIAN 1
+#endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+
+#endif // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
+
+
+/**
+ * At this point in time, SIMDUTF_IS_BIG_ENDIAN is defined.
+ */
+
 #ifdef _MSC_VER
 #define SIMDUTF_VISUAL_STUDIO 1
 /**
@@ -98,8 +140,10 @@
 #define SIMDUTF_IS_ARM64 1
 #elif defined(__PPC64__) || defined(_M_PPC64)
 //#define SIMDUTF_IS_PPC64 1
-#pragma message("The simdutf library does yet support SIMD acceleration under\
-POWER processors. Please see https://github.com/lemire/simdutf/issues/51")
+// The simdutf library does yet support SIMD acceleration under
+// POWER processors. Please see https://github.com/lemire/simdutf/issues/51
+#elif defined(__s390__)
+// s390 IBM system. Big endian.
 #else
 // The simdutf library is designed
 // for 64-bit processors and it seems that you are not
@@ -456,6 +500,8 @@ enum endianness {
         BIG
 };
 
+bool match_system(endianness e);
+
 std::string to_string(encoding_type bom);
 
 // Note that BOM for UTF8 is discouraged.
@@ -492,7 +538,7 @@ enum error_code {
   HEADER_BITS,  // Any byte must have fewer than 5 header bits.
   TOO_SHORT,    // The leading byte must be followed by N-1 continuation bytes, where N is the UTF-8 character length
                 // This is also the error when the input is truncated.
-  TOO_LONG,     // The leading byte must not be a continuation byte.
+  TOO_LONG,     // We either have too many consecutive continuation bytes or the string starts with a continuation byte.
   OVERLONG,     // The decoded character must be above U+7F for two-byte characters, U+7FF for three-byte characters,
                 // and U+FFFF for four-byte characters.
   TOO_LARGE,    // The decoded character must be less than or equal to U+10FFFF OR less than or equal than U+7F for ASCII.
@@ -526,18 +572,18 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
 #define SIMDUTF_SIMDUTF_VERSION_H
 
 /** The version of simdutf being used (major.minor.revision) */
-#define SIMDUTF_VERSION "2.1.0"
+#define SIMDUTF_VERSION "3.2.0"
 
 namespace simdutf {
 enum {
   /**
    * The major version (MAJOR.minor.revision) of simdutf being used.
    */
-  SIMDUTF_VERSION_MAJOR = 2,
+  SIMDUTF_VERSION_MAJOR = 3,
   /**
    * The minor version (major.MINOR.revision) of simdutf being used.
    */
-  SIMDUTF_VERSION_MINOR = 1,
+  SIMDUTF_VERSION_MINOR = 2,
   /**
    * The revision (major.minor.REVISION) of simdutf being used.
    */
@@ -873,6 +919,21 @@ simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept;
 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) noexcept;
 
 /**
+ * Using native endianness; Validate the UTF-16 string.
+ * This function may be best when you expect the input to be almost always valid.
+ * Otherwise, consider using validate_utf16_with_errors.
+ *
+ * Overridden by each implementation.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param buf the UTF-16 string to validate.
+ * @param len the length of the string in number of 2-byte words (char16_t).
+ * @return true if and only if the string is valid UTF-16.
+ */
+simdutf_warn_unused bool validate_utf16(const char16_t *buf, size_t len) noexcept;
+
+/**
  * Validate the UTF-16LE string. This function may be best when you expect
  * the input to be almost always valid. Otherwise, consider using
  * validate_utf16le_with_errors.
@@ -903,6 +964,20 @@ simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) noexc
 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) noexcept;
 
 /**
+ * Using native endianness; Validate the UTF-16 string and stop on error.
+ * It might be faster than validate_utf16 when an error is expected to occur early.
+ *
+ * Overridden by each implementation.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param buf the UTF-16 string to validate.
+ * @param len the length of the string in number of 2-byte words (char16_t).
+ * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ */
+simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, size_t len) noexcept;
+
+/**
  * Validate the UTF-16LE string and stop on error. It might be faster than
  * validate_utf16le when an error is expected to occur early.
  *
@@ -931,7 +1006,7 @@ simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, siz
 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) noexcept;
 
 /**
- * Validate the UTF-32LE string. This function may be best when you expect
+ * Validate the UTF-32 string. This function may be best when you expect
  * the input to be almost always valid. Otherwise, consider using
  * validate_utf32_with_errors.
  *
@@ -939,27 +1014,40 @@ simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, siz
  *
  * This function is not BOM-aware.
  *
- * @param buf the UTF-32LE string to validate.
+ * @param buf the UTF-32 string to validate.
  * @param len the length of the string in number of 4-byte words (char32_t).
- * @return true if and only if the string is valid UTF-32LE.
+ * @return true if and only if the string is valid UTF-32.
  */
 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) noexcept;
 
 /**
- * Validate the UTF-32LE string and stop on error. It might be faster than
+ * Validate the UTF-32 string and stop on error. It might be faster than
  * validate_utf32 when an error is expected to occur early.
  *
  * Overridden by each implementation.
  *
  * This function is not BOM-aware.
  *
- * @param buf the UTF-32LE string to validate.
+ * @param buf the UTF-32 string to validate.
  * @param len the length of the string in number of 4-byte words (char32_t).
  * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
  */
 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) noexcept;
 
 /**
+ * Using native endianness; Convert possibly broken UTF-8 string into UTF-16 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input         the UTF-8 string to convert
+ * @param length        the length of the string in bytes
+ * @param utf16_buffer  the pointer to buffer that can hold conversion result
+ * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
+ */
+simdutf_warn_unused size_t convert_utf8_to_utf16(const char * input, size_t length, char16_t* utf16_output) noexcept;
+
+/**
  * Convert possibly broken UTF-8 string into UTF-16LE string.
  *
  * During the conversion also validation of the input string is done.
@@ -986,6 +1074,20 @@ simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * input, size_t le
 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_output) noexcept;
 
 /**
+ * Using native endianness; Convert possibly broken UTF-8 string into UTF-16
+ * string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input         the UTF-8 string to convert
+ * @param length        the length of the string in bytes
+ * @param utf16_buffer  the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
+ */
+simdutf_warn_unused result convert_utf8_to_utf16_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept;
+
+/**
  * Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
  *
  * During the conversion also validation of the input string is done.
@@ -1012,7 +1114,7 @@ simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * inpu
 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept;
 
 /**
- * Convert possibly broken UTF-8 string into UTF-32LE string.
+ * Convert possibly broken UTF-8 string into UTF-32 string.
  *
  * During the conversion also validation of the input string is done.
  * This function is suitable to work with inputs from untrusted sources.
@@ -1025,7 +1127,7 @@ simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * inpu
 simdutf_warn_unused size_t convert_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_output) noexcept;
 
 /**
- * Convert possibly broken UTF-8 string into UTF-32LE string and stop on error.
+ * Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
  *
  * During the conversion also validation of the input string is done.
  * This function is suitable to work with inputs from untrusted sources.
@@ -1038,6 +1140,18 @@ simdutf_warn_unused size_t convert_utf8_to_utf32(const char * input, size_t leng
 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * input, size_t length, char32_t* utf32_output) noexcept;
 
 /**
+ * Using native endianness; Convert valid UTF-8 string into UTF-16 string.
+ *
+ * This function assumes that the input string is valid UTF-8.
+ *
+ * @param input         the UTF-8 string to convert
+ * @param length        the length of the string in bytes
+ * @param utf16_buffer  the pointer to buffer that can hold conversion result
+ * @return the number of written char16_t
+ */
+simdutf_warn_unused size_t convert_valid_utf8_to_utf16(const char * input, size_t length, char16_t* utf16_buffer) noexcept;
+
+/**
  * Convert valid UTF-8 string into UTF-16LE string.
  *
  * This function assumes that the input string is valid UTF-8.
@@ -1062,7 +1176,7 @@ simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * input, siz
 simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_buffer) noexcept;
 
 /**
- * Convert valid UTF-8 string into UTF-32LE string.
+ * Convert valid UTF-8 string into UTF-32 string.
  *
  * This function assumes that the input string is valid UTF-8.
  *
@@ -1087,7 +1201,7 @@ simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * input, size_
 simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) noexcept;
 
 /**
- * Compute the number of 4-byte words that this UTF-8 string would require in UTF-32LE format.
+ * Compute the number of 4-byte words that this UTF-8 string would require in UTF-32 format.
  *
  * This function is equivalent to count_utf8
  *
@@ -1097,11 +1211,26 @@ simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t len
  *
  * @param input         the UTF-8 string to process
  * @param length        the length of the string in bytes
- * @return the number of char32_t words required to encode the UTF-8 string as UTF-32LE
+ * @return the number of char32_t words required to encode the UTF-8 string as UTF-32
  */
 simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) noexcept;
 
 /**
+ * Using native endianness; Convert possibly broken UTF-16 string into UTF-8 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-16 string to convert
+ * @param length        the length of the string in 2-byte words (char16_t)
+ * @param utf8_buffer   the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-16LE string
+ */
+simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
+
+/**
  * Convert possibly broken UTF-16LE string into UTF-8 string.
  *
  * During the conversion also validation of the input string is done.
@@ -1132,6 +1261,21 @@ simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * input, size_
 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
 
 /**
+ * Using native endianness; Convert possibly broken UTF-16 string into UTF-8 string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-16 string to convert
+ * @param length        the length of the string in 2-byte words (char16_t)
+ * @param utf8_buffer   the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+simdutf_warn_unused result convert_utf16_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
+
+/**
  * Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
  *
  * During the conversion also validation of the input string is done.
@@ -1162,6 +1306,20 @@ simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t *
 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
 
 /**
+ * Using native endianness; Convert valid UTF-16 string into UTF-8 string.
+ *
+ * This function assumes that the input string is valid UTF-16LE.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-16 string to convert
+ * @param length        the length of the string in 2-byte words (char16_t)
+ * @param utf8_buffer   the pointer to buffer that can hold the conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
+
+/**
  * Convert valid UTF-16LE string into UTF-8 string.
  *
  * This function assumes that the input string is valid UTF-16LE.
@@ -1190,7 +1348,22 @@ simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * input,
 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
 
 /**
- * Convert possibly broken UTF-16LE string into UTF-32LE string.
+ * Using native endianness; Convert possibly broken UTF-16 string into UTF-32 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-16 string to convert
+ * @param length        the length of the string in 2-byte words (char16_t)
+ * @param utf32_buffer   the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-16LE string
+ */
+simdutf_warn_unused size_t convert_utf16_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
+
+/**
+ * Convert possibly broken UTF-16LE string into UTF-32 string.
  *
  * During the conversion also validation of the input string is done.
  * This function is suitable to work with inputs from untrusted sources.
@@ -1205,7 +1378,7 @@ simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * input,
 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
 
 /**
- * Convert possibly broken UTF-16BE string into UTF-32LE string.
+ * Convert possibly broken UTF-16BE string into UTF-32 string.
  *
  * During the conversion also validation of the input string is done.
  * This function is suitable to work with inputs from untrusted sources.
@@ -1220,7 +1393,23 @@ simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * input, size
 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
 
 /**
- * Convert possibly broken UTF-16LE string into UTF-32LE string and stop on error.
+ * Using native endianness; Convert possibly broken UTF-16 string into
+ * UTF-32 string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-16 string to convert
+ * @param length        the length of the string in 2-byte words (char16_t)
+ * @param utf32_buffer   the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
+ */
+simdutf_warn_unused result convert_utf16_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
+
+/**
+ * Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
  *
  * During the conversion also validation of the input string is done.
  * This function is suitable to work with inputs from untrusted sources.
@@ -1235,7 +1424,7 @@ simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * input, size
 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
 
 /**
- * Convert possibly broken UTF-16BE string into UTF-32LE string and stop on error.
+ * Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
  *
  * During the conversion also validation of the input string is done.
  * This function is suitable to work with inputs from untrusted sources.
@@ -1250,7 +1439,21 @@ simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t *
 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
 
 /**
- * Convert valid UTF-16LE string into UTF-32LE string.
+ * Using native endianness; Convert valid UTF-16 string into UTF-32 string.
+ *
+ * This function assumes that the input string is valid UTF-16 (native endianness).
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-16 string to convert
+ * @param length        the length of the string in 2-byte words (char16_t)
+ * @param utf32_buffer   the pointer to buffer that can hold the conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+simdutf_warn_unused size_t convert_valid_utf16_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
+
+/**
+ * Convert valid UTF-16LE string into UTF-32 string.
  *
  * This function assumes that the input string is valid UTF-16LE.
  *
@@ -1264,7 +1467,7 @@ simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t *
 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
 
 /**
- * Convert valid UTF-16BE string into UTF-32LE string.
+ * Convert valid UTF-16BE string into UTF-32 string.
  *
  * This function assumes that the input string is valid UTF-16LE.
  *
@@ -1278,6 +1481,18 @@ simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * input
 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
 
 /**
+ * Using native endianness; Compute the number of bytes that this UTF-16
+ * string would require in UTF-8 format.
+ *
+ * This function does not validate the input.
+ *
+ * @param input         the UTF-16 string to convert
+ * @param length        the length of the string in 2-byte words (char16_t)
+ * @return the number of bytes required to encode the UTF-16LE string as UTF-8
+ */
+simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t * input, size_t length) noexcept;
+
+/**
  * Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
  *
  * This function does not validate the input.
@@ -1300,29 +1515,29 @@ simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size
 simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) noexcept;
 
 /**
- * Convert possibly broken UTF-32LE string into UTF-8 string.
+ * Convert possibly broken UTF-32 string into UTF-8 string.
  *
  * During the conversion also validation of the input string is done.
  * This function is suitable to work with inputs from untrusted sources.
  *
  * This function is not BOM-aware.
  *
- * @param input         the UTF-32LE string to convert
+ * @param input         the UTF-32 string to convert
  * @param length        the length of the string in 4-byte words (char32_t)
  * @param utf8_buffer   the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-32LE string
+ * @return number of written words; 0 if input is not a valid UTF-32 string
  */
 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) noexcept;
 
 /**
- * Convert possibly broken UTF-32LE string into UTF-8 string and stop on error.
+ * Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
  *
  * During the conversion also validation of the input string is done.
  * This function is suitable to work with inputs from untrusted sources.
  *
  * This function is not BOM-aware.
  *
- * @param input         the UTF-32LE string to convert
+ * @param input         the UTF-32 string to convert
  * @param length        the length of the string in 4-byte words (char32_t)
  * @param utf8_buffer   the pointer to buffer that can hold conversion result
  * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
@@ -1330,13 +1545,13 @@ simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * input, size_t
 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * input, size_t length, char* utf8_buffer) noexcept;
 
 /**
- * Convert valid UTF-32LE string into UTF-8 string.
+ * Convert valid UTF-32 string into UTF-8 string.
  *
- * This function assumes that the input string is valid UTF-32LE.
+ * This function assumes that the input string is valid UTF-32.
  *
  * This function is not BOM-aware.
  *
- * @param input         the UTF-32LE string to convert
+ * @param input         the UTF-32 string to convert
  * @param length        the length of the string in 4-byte words (char32_t)
  * @param utf8_buffer   the pointer to buffer that can hold the conversion result
  * @return number of written words; 0 if conversion is not possible
@@ -1344,44 +1559,75 @@ simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * in
 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) noexcept;
 
 /**
- * Convert possibly broken UTF-32LE string into UTF-16LE string.
+ * Using native endianness; Convert possibly broken UTF-32 string into UTF-16 string.
  *
  * During the conversion also validation of the input string is done.
  * This function is suitable to work with inputs from untrusted sources.
  *
  * This function is not BOM-aware.
  *
- * @param input         the UTF-32LE string to convert
+ * @param input         the UTF-32 string to convert
  * @param length        the length of the string in 4-byte words (char32_t)
  * @param utf16_buffer   the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-32LE string
+ * @return number of written words; 0 if input is not a valid UTF-32 string
+ */
+simdutf_warn_unused size_t convert_utf32_to_utf16(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
+
+/**
+ * Convert possibly broken UTF-32 string into UTF-16LE string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-32 string to convert
+ * @param length        the length of the string in 4-byte words (char32_t)
+ * @param utf16_buffer   the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-32 string
  */
 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
 
 /**
- * Convert possibly broken UTF-32LE string into UTF-16BE string.
+ * Convert possibly broken UTF-32 string into UTF-16BE string.
  *
  * During the conversion also validation of the input string is done.
  * This function is suitable to work with inputs from untrusted sources.
  *
  * This function is not BOM-aware.
  *
- * @param input         the UTF-32LE string to convert
+ * @param input         the UTF-32 string to convert
  * @param length        the length of the string in 4-byte words (char32_t)
  * @param utf16_buffer   the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-32LE string
+ * @return number of written words; 0 if input is not a valid UTF-32 string
  */
 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
 
 /**
- * Convert possibly broken UTF-32LE string into UTF-16LE string and stop on error.
+ * Using native endianness; Convert possibly broken UTF-32 string into UTF-16
+ * string and stop on error.
  *
  * During the conversion also validation of the input string is done.
  * This function is suitable to work with inputs from untrusted sources.
  *
  * This function is not BOM-aware.
  *
- * @param input         the UTF-32LE string to convert
+ * @param input         the UTF-32 string to convert
+ * @param length        the length of the string in 4-byte words (char32_t)
+ * @param utf16_buffer   the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
+ */
+simdutf_warn_unused result convert_utf32_to_utf16_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
+
+/**
+ * Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-32 string to convert
  * @param length        the length of the string in 4-byte words (char32_t)
  * @param utf16_buffer   the pointer to buffer that can hold conversion result
  * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
@@ -1389,14 +1635,14 @@ simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * input, size
 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
 
 /**
- * Convert possibly broken UTF-32LE string into UTF-16BE string and stop on error.
+ * Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
  *
  * During the conversion also validation of the input string is done.
  * This function is suitable to work with inputs from untrusted sources.
  *
  * This function is not BOM-aware.
  *
- * @param input         the UTF-32LE string to convert
+ * @param input         the UTF-32 string to convert
  * @param length        the length of the string in 4-byte words (char32_t)
  * @param utf16_buffer   the pointer to buffer that can hold conversion result
  * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
@@ -1404,13 +1650,27 @@ simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t *
 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
 
 /**
- * Convert valid UTF-32LE string into UTF-16LE string.
+ * Using native endianness; Convert valid UTF-32 string into UTF-16 string.
+ *
+ * This function assumes that the input string is valid UTF-32.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-32 string to convert
+ * @param length        the length of the string in 4-byte words (char32_t)
+ * @param utf16_buffer   the pointer to buffer that can hold the conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+simdutf_warn_unused size_t convert_valid_utf32_to_utf16(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
+
+/**
+ * Convert valid UTF-32 string into UTF-16LE string.
  *
- * This function assumes that the input string is valid UTF-32LE.
+ * This function assumes that the input string is valid UTF-32.
  *
  * This function is not BOM-aware.
  *
- * @param input         the UTF-32LE string to convert
+ * @param input         the UTF-32 string to convert
  * @param length        the length of the string in 4-byte words (char32_t)
  * @param utf16_buffer   the pointer to buffer that can hold the conversion result
  * @return number of written words; 0 if conversion is not possible
@@ -1418,13 +1678,13 @@ simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t *
 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
 
 /**
- * Convert valid UTF-32LE string into UTF-16BE string.
+ * Convert valid UTF-32 string into UTF-16BE string.
  *
- * This function assumes that the input string is valid UTF-32LE.
+ * This function assumes that the input string is valid UTF-32.
  *
  * This function is not BOM-aware.
  *
- * @param input         the UTF-32LE string to convert
+ * @param input         the UTF-32 string to convert
  * @param length        the length of the string in 4-byte words (char32_t)
  * @param utf16_buffer   the pointer to buffer that can hold the conversion result
  * @return number of written words; 0 if conversion is not possible
@@ -1446,29 +1706,45 @@ simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * input
 void change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) noexcept;
 
 /**
- * Compute the number of bytes that this UTF-32LE string would require in UTF-8 format.
+ * Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
  *
  * This function does not validate the input.
  *
- * @param input         the UTF-32LE string to convert
+ * @param input         the UTF-32 string to convert
  * @param length        the length of the string in 4-byte words (char32_t)
- * @return the number of bytes required to encode the UTF-32LE string as UTF-8
+ * @return the number of bytes required to encode the UTF-32 string as UTF-8
  */
 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) noexcept;
 
 /**
- * Compute the number of two-byte words that this UTF-32LE string would require in UTF-16 format.
+ * Compute the number of two-byte words that this UTF-32 string would require in UTF-16 format.
  *
  * This function does not validate the input.
  *
- * @param input         the UTF-32LE string to convert
+ * @param input         the UTF-32 string to convert
  * @param length        the length of the string in 4-byte words (char32_t)
- * @return the number of bytes required to encode the UTF-32LE string as UTF-16
+ * @return the number of bytes required to encode the UTF-32 string as UTF-16
  */
 simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) noexcept;
 
 /**
- * Compute the number of bytes that this UTF-16LE string would require in UTF-32LE format.
+ * Using native endianness; Compute the number of bytes that this UTF-16
+ * string would require in UTF-32 format.
+ *
+ * This function is equivalent to count_utf16.
+ *
+ * This function does not validate the input.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-16 string to convert
+ * @param length        the length of the string in 2-byte words (char16_t)
+ * @return the number of bytes required to encode the UTF-16LE string as UTF-32
+ */
+simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t * input, size_t length) noexcept;
+
+/**
+ * Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
  *
  * This function is equivalent to count_utf16le.
  *
@@ -1478,12 +1754,12 @@ simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_
  *
  * @param input         the UTF-16LE string to convert
  * @param length        the length of the string in 2-byte words (char16_t)
- * @return the number of bytes required to encode the UTF-16LE string as UTF-32LE
+ * @return the number of bytes required to encode the UTF-16LE string as UTF-32
  */
 simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) noexcept;
 
 /**
- * Compute the number of bytes that this UTF-16BE string would require in UTF-32LE format.
+ * Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
  *
  * This function is equivalent to count_utf16be.
  *
@@ -1493,7 +1769,7 @@ simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, siz
  *
  * @param input         the UTF-16BE string to convert
  * @param length        the length of the string in 2-byte words (char16_t)
- * @return the number of bytes required to encode the UTF-16BE string as UTF-32LE
+ * @return the number of bytes required to encode the UTF-16BE string as UTF-32
  */
 simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) noexcept;
 
@@ -1501,6 +1777,20 @@ simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, siz
  * Count the number of code points (characters) in the string assuming that
  * it is valid.
  *
+ * This function assumes that the input string is valid UTF-16 (native endianness).
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-16 string to process
+ * @param length        the length of the string in 2-byte words (char16_t)
+ * @return number of code points
+ */
+simdutf_warn_unused size_t count_utf16(const char16_t * input, size_t length) noexcept;
+
+/**
+ * Count the number of code points (characters) in the string assuming that
+ * it is valid.
+ *
  * This function assumes that the input string is valid UTF-16LE.
  *
  * This function is not BOM-aware.
@@ -1599,7 +1889,7 @@ public:
    *
    * @return a mask of all required `internal::instruction_set::` values
    */
-  virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; };
+  virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; }
 
 
   /**
@@ -1705,26 +1995,26 @@ public:
   simdutf_warn_unused virtual result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept = 0;
 
   /**
-   * Validate the UTF-32LE string.
+   * Validate the UTF-32 string.
    *
    * Overridden by each implementation.
    *
    * This function is not BOM-aware.
    *
-   * @param buf the UTF-32LE string to validate.
+   * @param buf the UTF-32 string to validate.
    * @param len the length of the string in number of 4-byte words (char32_t).
-   * @return true if and only if the string is valid UTF-32LE.
+   * @return true if and only if the string is valid UTF-32.
    */
   simdutf_warn_unused virtual bool validate_utf32(const char32_t *buf, size_t len) const noexcept = 0;
 
   /**
-   * Validate the UTF-32LE string and stop on error.
+   * Validate the UTF-32 string and stop on error.
    *
    * Overridden by each implementation.
    *
    * This function is not BOM-aware.
    *
-   * @param buf the UTF-32LE string to validate.
+   * @param buf the UTF-32 string to validate.
    * @param len the length of the string in number of 4-byte words (char32_t).
    * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
    */
@@ -1783,7 +2073,7 @@ public:
   simdutf_warn_unused virtual result convert_utf8_to_utf16be_with_errors(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
 
   /**
-   * Convert possibly broken UTF-8 string into UTF-32LE string.
+   * Convert possibly broken UTF-8 string into UTF-32 string.
    *
    * During the conversion also validation of the input string is done.
    * This function is suitable to work with inputs from untrusted sources.
@@ -1796,7 +2086,7 @@ public:
   simdutf_warn_unused virtual size_t convert_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_output) const noexcept = 0;
 
   /**
-   * Convert possibly broken UTF-8 string into UTF-32LE string and stop on error.
+   * Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
    *
    * During the conversion also validation of the input string is done.
    * This function is suitable to work with inputs from untrusted sources.
@@ -1833,7 +2123,7 @@ public:
   simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
 
   /**
-   * Convert valid UTF-8 string into UTF-32LE string.
+   * Convert valid UTF-8 string into UTF-32 string.
    *
    * This function assumes that the input string is valid UTF-8.
    *
@@ -1856,7 +2146,7 @@ public:
   simdutf_warn_unused virtual size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept = 0;
 
    /**
-   * Compute the number of 4-byte words that this UTF-8 string would require in UTF-32LE format.
+   * Compute the number of 4-byte words that this UTF-8 string would require in UTF-32 format.
    *
    * This function is equivalent to count_utf8.
    *
@@ -1864,7 +2154,7 @@ public:
    *
    * @param input         the UTF-8 string to process
    * @param length        the length of the string in bytes
-   * @return the number of char32_t words required to encode the UTF-8 string as UTF-32LE
+   * @return the number of char32_t words required to encode the UTF-8 string as UTF-32
    */
   simdutf_warn_unused virtual size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept = 0;
 
@@ -1957,7 +2247,7 @@ public:
   simdutf_warn_unused virtual size_t convert_valid_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
 
   /**
-   * Convert possibly broken UTF-16LE string into UTF-32LE string.
+   * Convert possibly broken UTF-16LE string into UTF-32 string.
    *
    * During the conversion also validation of the input string is done.
    * This function is suitable to work with inputs from untrusted sources.
@@ -1972,7 +2262,7 @@ public:
   simdutf_warn_unused virtual size_t convert_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
 
   /**
-   * Convert possibly broken UTF-16BE string into UTF-32LE string.
+   * Convert possibly broken UTF-16BE string into UTF-32 string.
    *
    * During the conversion also validation of the input string is done.
    * This function is suitable to work with inputs from untrusted sources.
@@ -1987,7 +2277,7 @@ public:
   simdutf_warn_unused virtual size_t convert_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
 
   /**
-   * Convert possibly broken UTF-16LE string into UTF-32LE string and stop on error.
+   * Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
    *
    * During the conversion also validation of the input string is done.
    * This function is suitable to work with inputs from untrusted sources.
@@ -2002,7 +2292,7 @@ public:
   simdutf_warn_unused virtual result convert_utf16le_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
 
   /**
-   * Convert possibly broken UTF-16BE string into UTF-32LE string and stop on error.
+   * Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
    *
    * During the conversion also validation of the input string is done.
    * This function is suitable to work with inputs from untrusted sources.
@@ -2017,7 +2307,7 @@ public:
   simdutf_warn_unused virtual result convert_utf16be_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
 
   /**
-   * Convert valid UTF-16LE string into UTF-32LE string.
+   * Convert valid UTF-16LE string into UTF-32 string.
    *
    * This function assumes that the input string is valid UTF-16LE.
    *
@@ -2071,29 +2361,29 @@ public:
   simdutf_warn_unused virtual size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept = 0;
 
   /**
-   * Convert possibly broken UTF-32LE string into UTF-8 string.
+   * Convert possibly broken UTF-32 string into UTF-8 string.
    *
    * During the conversion also validation of the input string is done.
    * This function is suitable to work with inputs from untrusted sources.
    *
    * This function is not BOM-aware.
    *
-   * @param input         the UTF-32LE string to convert
+   * @param input         the UTF-32 string to convert
    * @param length        the length of the string in 4-byte words (char32_t)
    * @param utf8_buffer   the pointer to buffer that can hold conversion result
-   * @return number of written words; 0 if input is not a valid UTF-32LE string
+   * @return number of written words; 0 if input is not a valid UTF-32 string
    */
   simdutf_warn_unused virtual size_t convert_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
 
   /**
-   * Convert possibly broken UTF-32LE string into UTF-8 string and stop on error.
+   * Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
    *
    * During the conversion also validation of the input string is done.
    * This function is suitable to work with inputs from untrusted sources.
    *
    * This function is not BOM-aware.
    *
-   * @param input         the UTF-32LE string to convert
+   * @param input         the UTF-32 string to convert
    * @param length        the length of the string in 4-byte words (char32_t)
    * @param utf8_buffer   the pointer to buffer that can hold conversion result
    * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
@@ -2101,13 +2391,13 @@ public:
   simdutf_warn_unused virtual result convert_utf32_to_utf8_with_errors(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
 
   /**
-   * Convert valid UTF-32LE string into UTF-8 string.
+   * Convert valid UTF-32 string into UTF-8 string.
    *
-   * This function assumes that the input string is valid UTF-32LE.
+   * This function assumes that the input string is valid UTF-32.
    *
    * This function is not BOM-aware.
    *
-   * @param input         the UTF-32LE string to convert
+   * @param input         the UTF-32 string to convert
    * @param length        the length of the string in 4-byte words (char32_t)
    * @param utf8_buffer   the pointer to buffer that can hold the conversion result
    * @return number of written words; 0 if conversion is not possible
@@ -2115,44 +2405,44 @@ public:
   simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
 
   /**
-   * Convert possibly broken UTF-32LE string into UTF-16LE string.
+   * Convert possibly broken UTF-32 string into UTF-16LE string.
    *
    * During the conversion also validation of the input string is done.
    * This function is suitable to work with inputs from untrusted sources.
    *
    * This function is not BOM-aware.
    *
-   * @param input         the UTF-32LE string to convert
+   * @param input         the UTF-32 string to convert
    * @param length        the length of the string in 4-byte words (char32_t)
    * @param utf16_buffer   the pointer to buffer that can hold conversion result
-   * @return number of written words; 0 if input is not a valid UTF-32LE string
+   * @return number of written words; 0 if input is not a valid UTF-32 string
    */
   simdutf_warn_unused virtual size_t convert_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
 
   /**
-   * Convert possibly broken UTF-32LE string into UTF-16BE string.
+   * Convert possibly broken UTF-32 string into UTF-16BE string.
    *
    * During the conversion also validation of the input string is done.
    * This function is suitable to work with inputs from untrusted sources.
    *
    * This function is not BOM-aware.
    *
-   * @param input         the UTF-32LE string to convert
+   * @param input         the UTF-32 string to convert
    * @param length        the length of the string in 4-byte words (char32_t)
    * @param utf16_buffer   the pointer to buffer that can hold conversion result
-   * @return number of written words; 0 if input is not a valid UTF-32LE string
+   * @return number of written words; 0 if input is not a valid UTF-32 string
    */
   simdutf_warn_unused virtual size_t convert_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
 
   /**
-   * Convert possibly broken UTF-32LE string into UTF-16LE string and stop on error.
+   * Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
    *
    * During the conversion also validation of the input string is done.
    * This function is suitable to work with inputs from untrusted sources.
    *
    * This function is not BOM-aware.
    *
-   * @param input         the UTF-32LE string to convert
+   * @param input         the UTF-32 string to convert
    * @param length        the length of the string in 4-byte words (char32_t)
    * @param utf16_buffer   the pointer to buffer that can hold conversion result
    * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
@@ -2160,14 +2450,14 @@ public:
   simdutf_warn_unused virtual result convert_utf32_to_utf16le_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
 
   /**
-   * Convert possibly broken UTF-32LE string into UTF-16BE string and stop on error.
+   * Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
    *
    * During the conversion also validation of the input string is done.
    * This function is suitable to work with inputs from untrusted sources.
    *
    * This function is not BOM-aware.
    *
-   * @param input         the UTF-32LE string to convert
+   * @param input         the UTF-32 string to convert
    * @param length        the length of the string in 4-byte words (char32_t)
    * @param utf16_buffer   the pointer to buffer that can hold conversion result
    * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
@@ -2175,13 +2465,13 @@ public:
   simdutf_warn_unused virtual result convert_utf32_to_utf16be_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
 
   /**
-   * Convert valid UTF-32LE string into UTF-16LE string.
+   * Convert valid UTF-32 string into UTF-16LE string.
    *
-   * This function assumes that the input string is valid UTF-32LE.
+   * This function assumes that the input string is valid UTF-32.
    *
    * This function is not BOM-aware.
    *
-   * @param input         the UTF-32LE string to convert
+   * @param input         the UTF-32 string to convert
    * @param length        the length of the string in 4-byte words (char32_t)
    * @param utf16_buffer   the pointer to buffer that can hold the conversion result
    * @return number of written words; 0 if conversion is not possible
@@ -2189,13 +2479,13 @@ public:
   simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
 
   /**
-   * Convert valid UTF-32LE string into UTF-16BE string.
+   * Convert valid UTF-32 string into UTF-16BE string.
    *
-   * This function assumes that the input string is valid UTF-32LE.
+   * This function assumes that the input string is valid UTF-32.
    *
    * This function is not BOM-aware.
    *
-   * @param input         the UTF-32LE string to convert
+   * @param input         the UTF-32 string to convert
    * @param length        the length of the string in 4-byte words (char32_t)
    * @param utf16_buffer   the pointer to buffer that can hold the conversion result
    * @return number of written words; 0 if conversion is not possible
@@ -2217,29 +2507,29 @@ public:
   virtual void change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept = 0;
 
   /**
-   * Compute the number of bytes that this UTF-32LE string would require in UTF-8 format.
+   * Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
    *
    * This function does not validate the input.
    *
-   * @param input         the UTF-32LE string to convert
+   * @param input         the UTF-32 string to convert
    * @param length        the length of the string in 4-byte words (char32_t)
-   * @return the number of bytes required to encode the UTF-32LE string as UTF-8
+   * @return the number of bytes required to encode the UTF-32 string as UTF-8
    */
   simdutf_warn_unused virtual size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0;
 
   /**
-   * Compute the number of two-byte words that this UTF-32LE string would require in UTF-16 format.
+   * Compute the number of two-byte words that this UTF-32 string would require in UTF-16 format.
    *
    * This function does not validate the input.
    *
-   * @param input         the UTF-32LE string to convert
+   * @param input         the UTF-32 string to convert
    * @param length        the length of the string in 4-byte words (char32_t)
-   * @return the number of bytes required to encode the UTF-32LE string as UTF-16
+   * @return the number of bytes required to encode the UTF-32 string as UTF-16
    */
   simdutf_warn_unused virtual size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0;
 
   /*
-   * Compute the number of bytes that this UTF-16LE string would require in UTF-32LE format.
+   * Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
    *
    * This function is equivalent to count_utf16le.
    *
@@ -2249,12 +2539,12 @@ public:
    *
    * @param input         the UTF-16LE string to convert
    * @param length        the length of the string in 2-byte words (char16_t)
-   * @return the number of bytes required to encode the UTF-16LE string as UTF-32LE
+   * @return the number of bytes required to encode the UTF-16LE string as UTF-32
    */
   simdutf_warn_unused virtual size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept = 0;
 
   /*
-   * Compute the number of bytes that this UTF-16BE string would require in UTF-32LE format.
+   * Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
    *
    * This function is equivalent to count_utf16be.
    *
@@ -2264,7 +2554,7 @@ public:
    *
    * @param input         the UTF-16BE string to convert
    * @param length        the length of the string in 2-byte words (char16_t)
-   * @return the number of bytes required to encode the UTF-16BE string as UTF-32LE
+   * @return the number of bytes required to encode the UTF-16BE string as UTF-32
    */
   simdutf_warn_unused virtual size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept = 0;
 
@@ -2429,19 +2719,22 @@ private:
 #endif
 };
 
+class detect_best_supported_implementation_on_first_use;
+
 } // namespace internal
 
 /**
  * The list of available implementations compiled into simdutf.
  */
-extern SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list available_implementations;
+extern SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations();
 
 /**
   * The active implementation.
   *
   * Automatically initialized on first use to the most advanced implementation supported by this hardware.
   */
-extern SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation> active_implementation;
+extern SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation();
+
 
 } // namespace simdutf