diff options
author | 2023-08-09 09:14:51 -0700 | |
---|---|---|
committer | 2023-08-09 09:14:51 -0700 | |
commit | b3019270c9640a60f7a30f172cea10e310baf3b6 (patch) | |
tree | 8b6252ac910863f513a27444526b461c39be8e76 /src/bun.js/bindings/simdutf.h | |
parent | 5d7c77aab0761e16ef163dcf9792e8947bdab214 (diff) | |
download | bun-b3019270c9640a60f7a30f172cea10e310baf3b6.tar.gz bun-b3019270c9640a60f7a30f172cea10e310baf3b6.tar.zst bun-b3019270c9640a60f7a30f172cea10e310baf3b6.zip |
Update SIMDUTF (#4078)
Diffstat (limited to 'src/bun.js/bindings/simdutf.h')
-rw-r--r-- | src/bun.js/bindings/simdutf.h | 3610 |
1 files changed, 1803 insertions, 1807 deletions
diff --git a/src/bun.js/bindings/simdutf.h b/src/bun.js/bindings/simdutf.h index 4c04ae467..7ad28528d 100644 --- a/src/bun.js/bindings/simdutf.h +++ b/src/bun.js/bindings/simdutf.h @@ -1,4 +1,4 @@ -/* auto-generated on 2023-06-21 08:09:45 -0400. Do not edit! */ +/* auto-generated on 2023-08-08 16:23:39 -0400. Do not edit! */ // dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf.h /* begin file include/simdutf.h */ #ifndef SIMDUTF_H @@ -78,7 +78,7 @@ #include <machine/endian.h> #elif defined(sun) || defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__) #include <sys/byteorder.h> -#else // defined(__APPLE__) || defined(__FreeBSD__) +#else // defined(__APPLE__) || defined(__FreeBSD__) #ifdef __has_include #if __has_include(<endian.h>) @@ -88,6 +88,7 @@ #endif // defined(__APPLE__) || defined(__FreeBSD__) + #ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) #define SIMDUTF_IS_BIG_ENDIAN 0 #endif @@ -100,6 +101,7 @@ #endif // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ + /** * At this point in time, SIMDUTF_IS_BIG_ENDIAN is defined. */ @@ -137,9 +139,9 @@ #elif defined(__aarch64__) || defined(_M_ARM64) #define SIMDUTF_IS_ARM64 1 #elif defined(__PPC64__) || defined(_M_PPC64) -// #define SIMDUTF_IS_PPC64 1 -// The simdutf library does yet support SIMD acceleration under -// POWER processors. Please see https://github.com/lemire/simdutf/issues/51 +//#define SIMDUTF_IS_PPC64 1 +// The simdutf library does yet support SIMD acceleration under +// POWER processors. Please see https://github.com/lemire/simdutf/issues/51 #elif defined(__s390__) // s390 IBM system. Big endian. #elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64 @@ -198,14 +200,14 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") // warning: clang attribute push can't be used within a namespace in clang up // til 8.0 so SIMDUTF_TARGET_REGION and SIMDUTF_UNTARGET_REGION must be *outside* of a // namespace. -#define SIMDUTF_TARGET_REGION(T) \ - _Pragma(SIMDUTF_STRINGIFY( \ - clang attribute push(__attribute__((target(T))), apply_to = function))) +#define SIMDUTF_TARGET_REGION(T) \ + _Pragma(SIMDUTF_STRINGIFY( \ + clang attribute push(__attribute__((target(T))), apply_to = function))) #define SIMDUTF_UNTARGET_REGION _Pragma("clang attribute pop") #elif defined(__GNUC__) // GCC is easier -#define SIMDUTF_TARGET_REGION(T) \ - _Pragma("GCC push_options") _Pragma(SIMDUTF_STRINGIFY(GCC target(T))) +#define SIMDUTF_TARGET_REGION(T) \ + _Pragma("GCC push_options") _Pragma(SIMDUTF_STRINGIFY(GCC target(T))) #define SIMDUTF_UNTARGET_REGION _Pragma("GCC pop_options") #endif // clang then gcc @@ -258,11 +260,7 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") #define SIMDUTF_ASSUME(COND) __assume(COND) #else #define SIMDUTF_UNREACHABLE() __builtin_unreachable(); -#define SIMDUTF_ASSUME(COND) \ - do { \ - if (!(COND)) \ - __builtin_unreachable(); \ - } while (0) +#define SIMDUTF_ASSUME(COND) do { if (!(COND)) __builtin_unreachable(); } while (0) #endif #else // NDEBUG @@ -272,12 +270,14 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") #endif + #if defined(__GNUC__) && !defined(__clang__) #if __GNUC__ >= 11 #define SIMDUTF_GCC11ORMORE 1 #endif // __GNUC__ >= 11 #endif // defined(__GNUC__) && !defined(__clang__) + #endif // SIMDUTF_PORTABILITY_H /* end file include/simdutf/portability.h */ // dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/avx512.h @@ -295,86 +295,84 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") */ #ifndef SIMDUTF_HAS_AVX512F -#if defined(__AVX512F__) && __AVX512F__ == 1 -#define SIMDUTF_HAS_AVX512F 1 -#endif +# if defined(__AVX512F__) && __AVX512F__ == 1 +# define SIMDUTF_HAS_AVX512F 1 +# endif #endif #ifndef SIMDUTF_HAS_AVX512DQ -#if defined(__AVX512DQ__) && __AVX512DQ__ == 1 -#define SIMDUTF_HAS_AVX512DQ 1 -#endif +# if defined(__AVX512DQ__) && __AVX512DQ__ == 1 +# define SIMDUTF_HAS_AVX512DQ 1 +# endif #endif #ifndef SIMDUTF_HAS_AVX512IFMA -#if defined(__AVX512IFMA__) && __AVX512IFMA__ == 1 -#define SIMDUTF_HAS_AVX512IFMA 1 -#endif +# if defined(__AVX512IFMA__) && __AVX512IFMA__ == 1 +# define SIMDUTF_HAS_AVX512IFMA 1 +# endif #endif #ifndef SIMDUTF_HAS_AVX512CD -#if defined(__AVX512CD__) && __AVX512CD__ == 1 -#define SIMDUTF_HAS_AVX512CD 1 -#endif +# if defined(__AVX512CD__) && __AVX512CD__ == 1 +# define SIMDUTF_HAS_AVX512CD 1 +# endif #endif #ifndef SIMDUTF_HAS_AVX512BW -#if defined(__AVX512BW__) && __AVX512BW__ == 1 -#define SIMDUTF_HAS_AVX512BW 1 -#endif +# if defined(__AVX512BW__) && __AVX512BW__ == 1 +# define SIMDUTF_HAS_AVX512BW 1 +# endif #endif #ifndef SIMDUTF_HAS_AVX512VL -#if defined(__AVX512VL__) && __AVX512VL__ == 1 -#define SIMDUTF_HAS_AVX512VL 1 -#endif +# if defined(__AVX512VL__) && __AVX512VL__ == 1 +# define SIMDUTF_HAS_AVX512VL 1 +# endif #endif #ifndef SIMDUTF_HAS_AVX512VBMI -#if defined(__AVX512VBMI__) && __AVX512VBMI__ == 1 -#define SIMDUTF_HAS_AVX512VBMI 1 -#endif +# if defined(__AVX512VBMI__) && __AVX512VBMI__ == 1 +# define SIMDUTF_HAS_AVX512VBMI 1 +# endif #endif #ifndef SIMDUTF_HAS_AVX512VBMI2 -#if defined(__AVX512VBMI2__) && __AVX512VBMI2__ == 1 -#define SIMDUTF_HAS_AVX512VBMI2 1 -#endif +# if defined(__AVX512VBMI2__) && __AVX512VBMI2__ == 1 +# define SIMDUTF_HAS_AVX512VBMI2 1 +# endif #endif #ifndef SIMDUTF_HAS_AVX512VNNI -#if defined(__AVX512VNNI__) && __AVX512VNNI__ == 1 -#define SIMDUTF_HAS_AVX512VNNI 1 -#endif +# if defined(__AVX512VNNI__) && __AVX512VNNI__ == 1 +# define SIMDUTF_HAS_AVX512VNNI 1 +# endif #endif #ifndef SIMDUTF_HAS_AVX512BITALG -#if defined(__AVX512BITALG__) && __AVX512BITALG__ == 1 -#define SIMDUTF_HAS_AVX512BITALG 1 -#endif +# if defined(__AVX512BITALG__) && __AVX512BITALG__ == 1 +# define SIMDUTF_HAS_AVX512BITALG 1 +# endif #endif #ifndef SIMDUTF_HAS_AVX512VPOPCNTDQ -#if defined(__AVX512VPOPCNTDQ__) && __AVX512VPOPCNTDQ__ == 1 -#define SIMDUTF_HAS_AVX512VPOPCNTDQ 1 -#endif +# if defined(__AVX512VPOPCNTDQ__) && __AVX512VPOPCNTDQ__ == 1 +# define SIMDUTF_HAS_AVX512VPOPCNTDQ 1 +# endif #endif #endif // SIMDUTF_AVX512_H_ /* end file include/simdutf/avx512.h */ + #if defined(__GNUC__) -// Marks a block with a name so that MCA analysis can see it. -#define SIMDUTF_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); -#define SIMDUTF_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); -#define SIMDUTF_DEBUG_BLOCK(name, block) \ - BEGIN_DEBUG_BLOCK(name); \ - block; \ - END_DEBUG_BLOCK(name); + // Marks a block with a name so that MCA analysis can see it. + #define SIMDUTF_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); + #define SIMDUTF_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); + #define SIMDUTF_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name); #else -#define SIMDUTF_BEGIN_DEBUG_BLOCK(name) -#define SIMDUTF_END_DEBUG_BLOCK(name) -#define SIMDUTF_DEBUG_BLOCK(name, block) + #define SIMDUTF_BEGIN_DEBUG_BLOCK(name) + #define SIMDUTF_END_DEBUG_BLOCK(name) + #define SIMDUTF_DEBUG_BLOCK(name, block) #endif // Align to N-byte boundary @@ -385,108 +383,103 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") #if defined(SIMDUTF_REGULAR_VISUAL_STUDIO) -#define simdutf_really_inline __forceinline -#define simdutf_never_inline __declspec(noinline) - -#define simdutf_unused -#define simdutf_warn_unused - -#ifndef simdutf_likely -#define simdutf_likely(x) x -#endif -#ifndef simdutf_unlikely -#define simdutf_unlikely(x) x -#endif - -#define SIMDUTF_PUSH_DISABLE_WARNINGS __pragma(warning(push)) -#define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS __pragma(warning(push, 0)) -#define SIMDUTF_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning(disable \ - : WARNING_NUMBER)) -// Get rid of Intellisense-only warnings (Code Analysis) -// Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910). -#ifdef __has_include -#if __has_include(<CppCoreCheck\Warnings.h>) -#include <CppCoreCheck\Warnings.h> -#define SIMDUTF_DISABLE_UNDESIRED_WARNINGS SIMDUTF_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) -#endif -#endif - -#ifndef SIMDUTF_DISABLE_UNDESIRED_WARNINGS -#define SIMDUTF_DISABLE_UNDESIRED_WARNINGS -#endif - -#define SIMDUTF_DISABLE_DEPRECATED_WARNING SIMDUTF_DISABLE_VS_WARNING(4996) -#define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING -#define SIMDUTF_POP_DISABLE_WARNINGS __pragma(warning(pop)) + #define simdutf_really_inline __forceinline + #define simdutf_never_inline __declspec(noinline) + + #define simdutf_unused + #define simdutf_warn_unused + + #ifndef simdutf_likely + #define simdutf_likely(x) x + #endif + #ifndef simdutf_unlikely + #define simdutf_unlikely(x) x + #endif + + #define SIMDUTF_PUSH_DISABLE_WARNINGS __pragma(warning( push )) + #define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 )) + #define SIMDUTF_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER )) + // Get rid of Intellisense-only warnings (Code Analysis) + // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910). + #ifdef __has_include + #if __has_include(<CppCoreCheck\Warnings.h>) + #include <CppCoreCheck\Warnings.h> + #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS SIMDUTF_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) + #endif + #endif + + #ifndef SIMDUTF_DISABLE_UNDESIRED_WARNINGS + #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS + #endif + + #define SIMDUTF_DISABLE_DEPRECATED_WARNING SIMDUTF_DISABLE_VS_WARNING(4996) + #define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING + #define SIMDUTF_POP_DISABLE_WARNINGS __pragma(warning( pop )) #else // SIMDUTF_REGULAR_VISUAL_STUDIO -#define simdutf_really_inline inline __attribute__((always_inline)) -#define simdutf_never_inline inline __attribute__((noinline)) + #define simdutf_really_inline inline __attribute__((always_inline)) + #define simdutf_never_inline inline __attribute__((noinline)) + + #define simdutf_unused __attribute__((unused)) + #define simdutf_warn_unused __attribute__((warn_unused_result)) + + #ifndef simdutf_likely + #define simdutf_likely(x) __builtin_expect(!!(x), 1) + #endif + #ifndef simdutf_unlikely + #define simdutf_unlikely(x) __builtin_expect(!!(x), 0) + #endif + + #define SIMDUTF_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") + // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary + #define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS SIMDUTF_PUSH_DISABLE_WARNINGS \ + SIMDUTF_DISABLE_GCC_WARNING(-Weffc++) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wall) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wconversion) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wextra) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wattributes) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wreturn-type) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wshadow) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wunused-parameter) \ + SIMDUTF_DISABLE_GCC_WARNING(-Wunused-variable) + #define SIMDUTF_PRAGMA(P) _Pragma(#P) + #define SIMDUTF_DISABLE_GCC_WARNING(WARNING) SIMDUTF_PRAGMA(GCC diagnostic ignored #WARNING) + #if defined(SIMDUTF_CLANG_VISUAL_STUDIO) + #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS SIMDUTF_DISABLE_GCC_WARNING(-Wmicrosoft-include) + #else + #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS + #endif + #define SIMDUTF_DISABLE_DEPRECATED_WARNING SIMDUTF_DISABLE_GCC_WARNING(-Wdeprecated-declarations) + #define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING SIMDUTF_DISABLE_GCC_WARNING(-Wstrict-overflow) + #define SIMDUTF_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") -#define simdutf_unused __attribute__((unused)) -#define simdutf_warn_unused __attribute__((warn_unused_result)) -#ifndef simdutf_likely -#define simdutf_likely(x) __builtin_expect(!!(x), 1) -#endif -#ifndef simdutf_unlikely -#define simdutf_unlikely(x) __builtin_expect(!!(x), 0) -#endif - -#define SIMDUTF_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") -// gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary -#define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS \ - SIMDUTF_PUSH_DISABLE_WARNINGS \ - SIMDUTF_DISABLE_GCC_WARNING(-Weffc++) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wall) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wconversion) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wextra) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wattributes) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wimplicit - fallthrough) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wnon - virtual - dtor) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wreturn - type) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wshadow) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wunused - parameter) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wunused - variable) -#define SIMDUTF_PRAGMA(P) _Pragma(#P) -#define SIMDUTF_DISABLE_GCC_WARNING(WARNING) SIMDUTF_PRAGMA(GCC diagnostic ignored #WARNING) -#if defined(SIMDUTF_CLANG_VISUAL_STUDIO) -#define SIMDUTF_DISABLE_UNDESIRED_WARNINGS SIMDUTF_DISABLE_GCC_WARNING(-Wmicrosoft - include) -#else -#define SIMDUTF_DISABLE_UNDESIRED_WARNINGS -#endif -#define SIMDUTF_DISABLE_DEPRECATED_WARNING SIMDUTF_DISABLE_GCC_WARNING(-Wdeprecated - declarations) -#define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING SIMDUTF_DISABLE_GCC_WARNING(-Wstrict - overflow) -#define SIMDUTF_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") #endif // MSC_VER #ifndef SIMDUTF_DLLIMPORTEXPORT -#if defined(SIMDUTF_VISUAL_STUDIO) -/** - * It does not matter here whether you are using - * the regular visual studio or clang under visual - * studio. - */ -#if SIMDUTF_USING_LIBRARY -#define SIMDUTF_DLLIMPORTEXPORT __declspec(dllimport) -#else -#define SIMDUTF_DLLIMPORTEXPORT __declspec(dllexport) -#endif -#else -#define SIMDUTF_DLLIMPORTEXPORT -#endif + #if defined(SIMDUTF_VISUAL_STUDIO) + /** + * It does not matter here whether you are using + * the regular visual studio or clang under visual + * studio. + */ + #if SIMDUTF_USING_LIBRARY + #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllimport) + #else + #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllexport) + #endif + #else + #define SIMDUTF_DLLIMPORTEXPORT + #endif #endif /// If EXPR is an error, returns it. -#define SIMDUTF_TRY(EXPR) \ - { \ - auto _err = (EXPR); \ - if (_err) { \ - return _err; \ - } \ - } +#define SIMDUTF_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } } + #endif // SIMDUTF_COMMON_DEFS_H /* end file include/simdutf/common_defs.h */ @@ -497,19 +490,19 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") namespace simdutf { enum encoding_type { - UTF8 = 1, // BOM 0xef 0xbb 0xbf - UTF16_LE = 2, // BOM 0xff 0xfe - UTF16_BE = 4, // BOM 0xfe 0xff - UTF32_LE = 8, // BOM 0xff 0xfe 0x00 0x00 - UTF32_BE = 16, // BOM 0x00 0x00 0xfe 0xff - Latin1 = 32, - - unspecified = 0 + UTF8 = 1, // BOM 0xef 0xbb 0xbf + UTF16_LE = 2, // BOM 0xff 0xfe + UTF16_BE = 4, // BOM 0xfe 0xff + UTF32_LE = 8, // BOM 0xff 0xfe 0x00 0x00 + UTF32_BE = 16, // BOM 0x00 0x00 0xfe 0xff + Latin1 = 32, + + unspecified = 0 }; enum endianness { - LITTLE, - BIG + LITTLE, + BIG }; bool match_system(endianness e); @@ -546,27 +539,27 @@ size_t bom_byte_size(encoding_type bom); namespace simdutf { enum error_code { - SUCCESS = 0, - HEADER_BITS, // Any byte must have fewer than 5 header bits. - TOO_SHORT, // The leading byte must be followed by N-1 continuation bytes, where N is the UTF-8 character length - // This is also the error when the input is truncated. - TOO_LONG, // We either have too many consecutive continuation bytes or the string starts with a continuation byte. - OVERLONG, // The decoded character must be above U+7F for two-byte characters, U+7FF for three-byte characters, - // and U+FFFF for four-byte characters. - TOO_LARGE, // The decoded character must be less than or equal to U+10FFFF,less than or equal than U+7F for ASCII OR less than equal than U+FF for Latin1 - SURROGATE, // The decoded character must be not be in U+D800...DFFF (UTF-8 or UTF-32) OR - // a high surrogate must be followed by a low surrogate and a low surrogate must be preceded by a high surrogate (UTF-16) OR - // there must be no surrogate at all (Latin1) - OTHER // Not related to validation/transcoding. + SUCCESS = 0, + HEADER_BITS, // Any byte must have fewer than 5 header bits. + TOO_SHORT, // The leading byte must be followed by N-1 continuation bytes, where N is the UTF-8 character length + // This is also the error when the input is truncated. + TOO_LONG, // We either have too many consecutive continuation bytes or the string starts with a continuation byte. + OVERLONG, // The decoded character must be above U+7F for two-byte characters, U+7FF for three-byte characters, + // and U+FFFF for four-byte characters. + TOO_LARGE, // The decoded character must be less than or equal to U+10FFFF,less than or equal than U+7F for ASCII OR less than equal than U+FF for Latin1 + SURROGATE, // The decoded character must be not be in U+D800...DFFF (UTF-8 or UTF-32) OR + // a high surrogate must be followed by a low surrogate and a low surrogate must be preceded by a high surrogate (UTF-16) OR + // there must be no surrogate at all (Latin1) + OTHER // Not related to validation/transcoding. }; struct result { - error_code error; - size_t count; // In case of error, indicates the position of the error. In case of success, indicates the number of words validated/written. + error_code error; + size_t count; // In case of error, indicates the position of the error. In case of success, indicates the number of words validated/written. - simdutf_really_inline result(); + simdutf_really_inline result(); - simdutf_really_inline result(error_code, size_t); + simdutf_really_inline result(error_code, size_t); }; } @@ -589,18 +582,18 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS namespace simdutf { enum { - /** - * The major version (MAJOR.minor.revision) of simdutf being used. - */ - SIMDUTF_VERSION_MAJOR = 3, - /** - * The minor version (major.MINOR.revision) of simdutf being used. - */ - SIMDUTF_VERSION_MINOR = 2, - /** - * The revision (major.minor.REVISION) of simdutf being used. - */ - SIMDUTF_VERSION_REVISION = 14 + /** + * The major version (MAJOR.minor.revision) of simdutf being used. + */ + SIMDUTF_VERSION_MAJOR = 3, + /** + * The minor version (major.MINOR.revision) of simdutf being used. + */ + SIMDUTF_VERSION_MINOR = 2, + /** + * The revision (major.minor.REVISION) of simdutf being used. + */ + SIMDUTF_VERSION_REVISION = 14 }; } // namespace simdutf @@ -678,192 +671,191 @@ namespace simdutf { namespace internal { enum instruction_set { - DEFAULT = 0x0, - NEON = 0x1, - AVX2 = 0x4, - SSE42 = 0x8, - PCLMULQDQ = 0x10, - BMI1 = 0x20, - BMI2 = 0x40, - ALTIVEC = 0x80, - AVX512F = 0x100, - AVX512DQ = 0x200, - AVX512IFMA = 0x400, - AVX512PF = 0x800, - AVX512ER = 0x1000, - AVX512CD = 0x2000, - AVX512BW = 0x4000, - AVX512VL = 0x8000, - AVX512VBMI2 = 0x10000 + DEFAULT = 0x0, + NEON = 0x1, + AVX2 = 0x4, + SSE42 = 0x8, + PCLMULQDQ = 0x10, + BMI1 = 0x20, + BMI2 = 0x40, + ALTIVEC = 0x80, + AVX512F = 0x100, + AVX512DQ = 0x200, + AVX512IFMA = 0x400, + AVX512PF = 0x800, + AVX512ER = 0x1000, + AVX512CD = 0x2000, + AVX512BW = 0x4000, + AVX512VL = 0x8000, + AVX512VBMI2 = 0x10000, + AVX512VPOPCNTDQ = 0x2000 }; #if defined(__PPC64__) -static inline uint32_t detect_supported_architectures() -{ - return instruction_set::ALTIVEC; +static inline uint32_t detect_supported_architectures() { + return instruction_set::ALTIVEC; } #elif defined(__aarch64__) || defined(_M_ARM64) -static inline uint32_t detect_supported_architectures() -{ - return instruction_set::NEON; +static inline uint32_t detect_supported_architectures() { + return instruction_set::NEON; } #elif defined(__x86_64__) || defined(_M_AMD64) // x64 + namespace { namespace cpuid_bit { -// Can be found on Intel ISA Reference for CPUID - -// EAX = 0x01 -constexpr uint32_t pclmulqdq = uint32_t(1) << 1; ///< @private bit 1 of ECX for EAX=0x1 -constexpr uint32_t sse42 = uint32_t(1) << 20; ///< @private bit 20 of ECX for EAX=0x1 -constexpr uint32_t osxsave = (uint32_t(1) << 26) | (uint32_t(1) << 27); ///< @private bits 26+27 of ECX for EAX=0x1 - -// EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf) -// See: "Table 3-8. Information Returned by CPUID Instruction" -namespace ebx { -constexpr uint32_t bmi1 = uint32_t(1) << 3; -constexpr uint32_t avx2 = uint32_t(1) << 5; -constexpr uint32_t bmi2 = uint32_t(1) << 8; -constexpr uint32_t avx512f = uint32_t(1) << 16; -constexpr uint32_t avx512dq = uint32_t(1) << 17; -constexpr uint32_t avx512ifma = uint32_t(1) << 21; -constexpr uint32_t avx512cd = uint32_t(1) << 28; -constexpr uint32_t avx512bw = uint32_t(1) << 30; -constexpr uint32_t avx512vl = uint32_t(1) << 31; -} + // Can be found on Intel ISA Reference for CPUID + + // EAX = 0x01 + constexpr uint32_t pclmulqdq = uint32_t(1) << 1; ///< @private bit 1 of ECX for EAX=0x1 + constexpr uint32_t sse42 = uint32_t(1) << 20; ///< @private bit 20 of ECX for EAX=0x1 + constexpr uint32_t osxsave = (uint32_t(1) << 26) | (uint32_t(1) << 27); ///< @private bits 26+27 of ECX for EAX=0x1 + + // EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf) + // See: "Table 3-8. Information Returned by CPUID Instruction" + namespace ebx { + constexpr uint32_t bmi1 = uint32_t(1) << 3; + constexpr uint32_t avx2 = uint32_t(1) << 5; + constexpr uint32_t bmi2 = uint32_t(1) << 8; + constexpr uint32_t avx512f = uint32_t(1) << 16; + constexpr uint32_t avx512dq = uint32_t(1) << 17; + constexpr uint32_t avx512ifma = uint32_t(1) << 21; + constexpr uint32_t avx512cd = uint32_t(1) << 28; + constexpr uint32_t avx512bw = uint32_t(1) << 30; + constexpr uint32_t avx512vl = uint32_t(1) << 31; + } -namespace ecx { -constexpr uint32_t avx512vbmi = uint32_t(1) << 1; -constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6; -constexpr uint32_t avx512vnni = uint32_t(1) << 11; -constexpr uint32_t avx512bitalg = uint32_t(1) << 12; -constexpr uint32_t avx512vpopcnt = uint32_t(1) << 14; -} -namespace edx { -constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8; -} -namespace xcr0_bit { -constexpr uint64_t avx256_saved = uint64_t(1) << 2; ///< @private bit 2 = AVX -constexpr uint64_t avx512_saved = uint64_t(7) << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM -} -} + namespace ecx { + constexpr uint32_t avx512vbmi = uint32_t(1) << 1; + constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6; + constexpr uint32_t avx512vnni = uint32_t(1) << 11; + constexpr uint32_t avx512bitalg = uint32_t(1) << 12; + constexpr uint32_t avx512vpopcnt = uint32_t(1) << 14; + } + namespace edx { + constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8; + } + namespace xcr0_bit { + constexpr uint64_t avx256_saved = uint64_t(1) << 2; ///< @private bit 2 = AVX + constexpr uint64_t avx512_saved = uint64_t(7) << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM + } + } } -static inline void cpuid(uint32_t* eax, uint32_t* ebx, uint32_t* ecx, - uint32_t* edx) -{ -#if defined(_MSC_VER) - int cpu_info[4]; - __cpuidex(cpu_info, *eax, *ecx); - *eax = cpu_info[0]; - *ebx = cpu_info[1]; - *ecx = cpu_info[2]; - *edx = cpu_info[3]; -#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) - uint32_t level = *eax; - __get_cpuid(level, eax, ebx, ecx, edx); -#else - uint32_t a = *eax, b, c = *ecx, d; - asm volatile("cpuid\n\t" - : "+a"(a), "=b"(b), "+c"(c), "=d"(d)); - *eax = a; - *ebx = b; - *ecx = c; - *edx = d; -#endif -} -static inline uint64_t xgetbv() -{ + +static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, + uint32_t *edx) { #if defined(_MSC_VER) - return _xgetbv(0); + int cpu_info[4]; + __cpuidex(cpu_info, *eax, *ecx); + *eax = cpu_info[0]; + *ebx = cpu_info[1]; + *ecx = cpu_info[2]; + *edx = cpu_info[3]; +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) + uint32_t level = *eax; + __get_cpuid(level, eax, ebx, ecx, edx); #else - uint32_t xcr0_lo, xcr0_hi; - asm volatile("xgetbv\n\t" - : "=a"(xcr0_lo), "=d"(xcr0_hi) - : "c"(0)); - return xcr0_lo | ((uint64_t)xcr0_hi << 32); + uint32_t a = *eax, b, c = *ecx, d; + asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d)); + *eax = a; + *ebx = b; + *ecx = c; + *edx = d; #endif } -static inline uint32_t detect_supported_architectures() -{ - uint32_t eax; - uint32_t ebx = 0; - uint32_t ecx = 0; - uint32_t edx = 0; - uint32_t host_isa = 0x0; - - // EBX for EAX=0x1 - eax = 0x1; - cpuid(&eax, &ebx, &ecx, &edx); - - if (ecx & cpuid_bit::sse42) { - host_isa |= instruction_set::SSE42; - } - - if (ecx & cpuid_bit::pclmulqdq) { - host_isa |= instruction_set::PCLMULQDQ; - } - - if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) { - return host_isa; - } +static inline uint64_t xgetbv() { + #if defined(_MSC_VER) + return _xgetbv(0); + #else + uint32_t xcr0_lo, xcr0_hi; + asm volatile("xgetbv\n\t" : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0)); + return xcr0_lo | ((uint64_t)xcr0_hi << 32); + #endif + } + +static inline uint32_t detect_supported_architectures() { + uint32_t eax; + uint32_t ebx = 0; + uint32_t ecx = 0; + uint32_t edx = 0; + uint32_t host_isa = 0x0; + + // EBX for EAX=0x1 + eax = 0x1; + cpuid(&eax, &ebx, &ecx, &edx); + + if (ecx & cpuid_bit::sse42) { + host_isa |= instruction_set::SSE42; + } + + if (ecx & cpuid_bit::pclmulqdq) { + host_isa |= instruction_set::PCLMULQDQ; + } + + if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) { + return host_isa; + } - // xgetbv for checking if the OS saves registers - uint64_t xcr0 = xgetbv(); + // xgetbv for checking if the OS saves registers + uint64_t xcr0 = xgetbv(); - if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) { - return host_isa; - } - // ECX for EAX=0x7 - eax = 0x7; - ecx = 0x0; // Sub-leaf = 0 - cpuid(&eax, &ebx, &ecx, &edx); - if (ebx & cpuid_bit::ebx::avx2) { - host_isa |= instruction_set::AVX2; - } - if (ebx & cpuid_bit::ebx::bmi1) { - host_isa |= instruction_set::BMI1; - } - if (ebx & cpuid_bit::ebx::bmi2) { - host_isa |= instruction_set::BMI2; - } - if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) == cpuid_bit::xcr0_bit::avx512_saved)) { - return host_isa; - } - if (ebx & cpuid_bit::ebx::avx512f) { - host_isa |= instruction_set::AVX512F; - } - if (ebx & cpuid_bit::ebx::avx512bw) { - host_isa |= instruction_set::AVX512BW; - } - if (ebx & cpuid_bit::ebx::avx512cd) { - host_isa |= instruction_set::AVX512CD; - } - if (ebx & cpuid_bit::ebx::avx512dq) { - host_isa |= instruction_set::AVX512DQ; - } - if (ebx & cpuid_bit::ebx::avx512vl) { - host_isa |= instruction_set::AVX512VL; - } - if (ecx & cpuid_bit::ecx::avx512vbmi2) { - host_isa |= instruction_set::AVX512VBMI2; - } + if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) { + return host_isa; + } + // ECX for EAX=0x7 + eax = 0x7; + ecx = 0x0; // Sub-leaf = 0 + cpuid(&eax, &ebx, &ecx, &edx); + if (ebx & cpuid_bit::ebx::avx2) { + host_isa |= instruction_set::AVX2; + } + if (ebx & cpuid_bit::ebx::bmi1) { + host_isa |= instruction_set::BMI1; + } + if (ebx & cpuid_bit::ebx::bmi2) { + host_isa |= instruction_set::BMI2; + } + if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) == cpuid_bit::xcr0_bit::avx512_saved)) { return host_isa; + } + if (ebx & cpuid_bit::ebx::avx512f) { + host_isa |= instruction_set::AVX512F; + } + if (ebx & cpuid_bit::ebx::avx512bw) { + host_isa |= instruction_set::AVX512BW; + } + if (ebx & cpuid_bit::ebx::avx512cd) { + host_isa |= instruction_set::AVX512CD; + } + if (ebx & cpuid_bit::ebx::avx512dq) { + host_isa |= instruction_set::AVX512DQ; + } + if (ebx & cpuid_bit::ebx::avx512vl) { + host_isa |= instruction_set::AVX512VL; + } + if (ecx & cpuid_bit::ecx::avx512vbmi2) { + host_isa |= instruction_set::AVX512VBMI2; + } + if (ecx & cpuid_bit::ecx::avx512vpopcnt) { + host_isa |= instruction_set::AVX512VPOPCNTDQ; + } + return host_isa; } #else // fallback // includes 32-bit ARM. -static inline uint32_t detect_supported_architectures() -{ - return instruction_set::DEFAULT; +static inline uint32_t detect_supported_architectures() { + return instruction_set::DEFAULT; } + #endif // end SIMD extension detection code } // namespace internal @@ -872,6 +864,7 @@ static inline uint32_t detect_supported_architectures() #endif // SIMDutf_INTERNAL_ISADETECTION_H /* end file include/simdutf/internal/isadetection.h */ + namespace simdutf { /** @@ -884,10 +877,9 @@ namespace simdutf { * @param length the length of the string in bytes. * @return the detected encoding type */ -simdutf_warn_unused simdutf::encoding_type autodetect_encoding(const char* input, size_t length) noexcept; -simdutf_really_inline simdutf_warn_unused simdutf::encoding_type autodetect_encoding(const uint8_t* input, size_t length) noexcept -{ - return autodetect_encoding(reinterpret_cast<const char*>(input), length); +simdutf_warn_unused simdutf::encoding_type autodetect_encoding(const char * input, size_t length) noexcept; +simdutf_really_inline simdutf_warn_unused simdutf::encoding_type autodetect_encoding(const uint8_t * input, size_t length) noexcept { + return autodetect_encoding(reinterpret_cast<const char *>(input), length); } /** @@ -901,10 +893,9 @@ simdutf_really_inline simdutf_warn_unused simdutf::encoding_type autodetect_enco * @param length the length of the string in bytes. * @return the detected encoding type */ -simdutf_warn_unused int detect_encodings(const char* input, size_t length) noexcept; -simdutf_really_inline simdutf_warn_unused int detect_encodings(const uint8_t* input, size_t length) noexcept -{ - return detect_encodings(reinterpret_cast<const char*>(input), length); +simdutf_warn_unused int detect_encodings(const char * input, size_t length) noexcept; +simdutf_really_inline simdutf_warn_unused int detect_encodings(const uint8_t * input, size_t length) noexcept { + return detect_encodings(reinterpret_cast<const char *>(input), length); } /** @@ -918,7 +909,7 @@ simdutf_really_inline simdutf_warn_unused int detect_encodings(const uint8_t* in * @param len the length of the string in bytes. * @return true if and only if the string is valid UTF-8. */ -simdutf_warn_unused bool validate_utf8(const char* buf, size_t len) noexcept; +simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept; /** * Validate the UTF-8 string and stop on error. @@ -929,7 +920,7 @@ simdutf_warn_unused bool validate_utf8(const char* buf, size_t len) noexcept; * @param len the length of the string in bytes. * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. */ -simdutf_warn_unused result validate_utf8_with_errors(const char* buf, size_t len) noexcept; +simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) noexcept; /** * Validate the ASCII string. @@ -940,7 +931,7 @@ simdutf_warn_unused result validate_utf8_with_errors(const char* buf, size_t len * @param len the length of the string in bytes. * @return true if and only if the string is valid ASCII. */ -simdutf_warn_unused bool validate_ascii(const char* buf, size_t len) noexcept; +simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept; /** * Validate the ASCII string and stop on error. It might be faster than @@ -952,7 +943,7 @@ simdutf_warn_unused bool validate_ascii(const char* buf, size_t len) noexcept; * @param len the length of the string in bytes. * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. */ -simdutf_warn_unused result validate_ascii_with_errors(const char* buf, size_t len) noexcept; +simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) noexcept; /** * Using native endianness; Validate the UTF-16 string. @@ -967,7 +958,7 @@ simdutf_warn_unused result validate_ascii_with_errors(const char* buf, size_t le * @param len the length of the string in number of 2-byte words (char16_t). * @return true if and only if the string is valid UTF-16. */ -simdutf_warn_unused bool validate_utf16(const char16_t* buf, size_t len) noexcept; +simdutf_warn_unused bool validate_utf16(const char16_t *buf, size_t len) noexcept; /** * Validate the UTF-16LE string. This function may be best when you expect @@ -982,7 +973,7 @@ simdutf_warn_unused bool validate_utf16(const char16_t* buf, size_t len) noexcep * @param len the length of the string in number of 2-byte words (char16_t). * @return true if and only if the string is valid UTF-16LE. */ -simdutf_warn_unused bool validate_utf16le(const char16_t* buf, size_t len) noexcept; +simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) noexcept; /** * Validate the UTF-16BE string. This function may be best when you expect @@ -997,7 +988,7 @@ simdutf_warn_unused bool validate_utf16le(const char16_t* buf, size_t len) noexc * @param len the length of the string in number of 2-byte words (char16_t). * @return true if and only if the string is valid UTF-16BE. */ -simdutf_warn_unused bool validate_utf16be(const char16_t* buf, size_t len) noexcept; +simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) noexcept; /** * Using native endianness; Validate the UTF-16 string and stop on error. @@ -1011,7 +1002,7 @@ simdutf_warn_unused bool validate_utf16be(const char16_t* buf, size_t len) noexc * @param len the length of the string in number of 2-byte words (char16_t). * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. */ -simdutf_warn_unused result validate_utf16_with_errors(const char16_t* buf, size_t len) noexcept; +simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, size_t len) noexcept; /** * Validate the UTF-16LE string and stop on error. It might be faster than @@ -1025,7 +1016,7 @@ simdutf_warn_unused result validate_utf16_with_errors(const char16_t* buf, size_ * @param len the length of the string in number of 2-byte words (char16_t). * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. */ -simdutf_warn_unused result validate_utf16le_with_errors(const char16_t* buf, size_t len) noexcept; +simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) noexcept; /** * Validate the UTF-16BE string and stop on error. It might be faster than @@ -1039,7 +1030,7 @@ simdutf_warn_unused result validate_utf16le_with_errors(const char16_t* buf, siz * @param len the length of the string in number of 2-byte words (char16_t). * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. */ -simdutf_warn_unused result validate_utf16be_with_errors(const char16_t* buf, size_t len) noexcept; +simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) noexcept; /** * Validate the UTF-32 string. This function may be best when you expect @@ -1054,7 +1045,7 @@ simdutf_warn_unused result validate_utf16be_with_errors(const char16_t* buf, siz * @param len the length of the string in number of 4-byte words (char32_t). * @return true if and only if the string is valid UTF-32. */ -simdutf_warn_unused bool validate_utf32(const char32_t* buf, size_t len) noexcept; +simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) noexcept; /** * Validate the UTF-32 string and stop on error. It might be faster than @@ -1068,68 +1059,69 @@ simdutf_warn_unused bool validate_utf32(const char32_t* buf, size_t len) noexcep * @param len the length of the string in number of 4-byte words (char32_t). * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. */ -simdutf_warn_unused result validate_utf32_with_errors(const char32_t* buf, size_t len) noexcept; - -/** - * Convert Latin1 string into UTF8 string. - * - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the Latin1 string to convert - * @param length the length of the string in bytes - * @param latin1_output the pointer to buffer that can hold conversion result - * @return the number of written char; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_latin1_to_utf8(const char* input, size_t length, char* utf8_output) noexcept; - -/** - * Convert possibly Latin1 string into UTF-16LE string. - * - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the Latin1 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_latin1_to_utf16le(const char* input, size_t length, char16_t* utf16_output) noexcept; - -/** - * Convert Latin1 string into UTF-16BE string. - * - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the Latin1 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_latin1_to_utf16be(const char* input, size_t length, char16_t* utf16_output) noexcept; - -/** - * Convert Latin1 string into UTF-32 string. - * - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the Latin1 string to convert - * @param length the length of the string in bytes - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return the number of written char32_t; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_latin1_to_utf32(const char* input, size_t length, char32_t* utf32_buffer) noexcept; - -/** - * Convert possibly broken UTF-8 string into latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param latin1_output the pointer to buffer that can hold conversion result - * @return the number of written char; 0 if the input was not valid UTF-8 string - */ -simdutf_warn_unused size_t convert_utf8_to_latin1(const char* input, size_t length, char* latin1_output) noexcept; +simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) noexcept; + + /** + * Convert Latin1 string into UTF8 string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if conversion is not possible + */ + simdutf_warn_unused size_t convert_latin1_to_utf8(const char * input, size_t length, char* utf8_output) noexcept; + + + /** + * Convert possibly Latin1 string into UTF-16LE string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if conversion is not possible + */ + simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * input, size_t length, char16_t* utf16_output) noexcept; + + /** + * Convert Latin1 string into UTF-16BE string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if conversion is not possible + */ + simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * input, size_t length, char16_t* utf16_output) noexcept; + + /** + * Convert Latin1 string into UTF-32 string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return the number of written char32_t; 0 if conversion is not possible + */ + simdutf_warn_unused size_t convert_latin1_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) noexcept; + + /** + * Convert possibly broken UTF-8 string into latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if the input was not valid UTF-8 string + */ + simdutf_warn_unused size_t convert_utf8_to_latin1(const char * input, size_t length, char* latin1_output) noexcept; /** * Using native endianness; Convert possibly broken UTF-8 string into UTF-16 string. @@ -1142,7 +1134,7 @@ simdutf_warn_unused size_t convert_utf8_to_latin1(const char* input, size_t leng * @param utf16_buffer the pointer to buffer that can hold conversion result * @return the number of written char16_t; 0 if the input was not valid UTF-8 string */ -simdutf_warn_unused size_t convert_utf8_to_utf16(const char* input, size_t length, char16_t* utf16_output) noexcept; +simdutf_warn_unused size_t convert_utf8_to_utf16(const char * input, size_t length, char16_t* utf16_output) noexcept; /** * Convert possibly broken UTF-8 string into UTF-16LE string. @@ -1155,7 +1147,7 @@ simdutf_warn_unused size_t convert_utf8_to_utf16(const char* input, size_t lengt * @param utf16_buffer the pointer to buffer that can hold conversion result * @return the number of written char16_t; 0 if the input was not valid UTF-8 string */ -simdutf_warn_unused size_t convert_utf8_to_utf16le(const char* input, size_t length, char16_t* utf16_output) noexcept; +simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_output) noexcept; /** * Convert possibly broken UTF-8 string into UTF-16BE string. @@ -1168,20 +1160,21 @@ simdutf_warn_unused size_t convert_utf8_to_utf16le(const char* input, size_t len * @param utf16_buffer the pointer to buffer that can hold conversion result * @return the number of written char16_t; 0 if the input was not valid UTF-8 string */ -simdutf_warn_unused size_t convert_utf8_to_utf16be(const char* input, size_t length, char16_t* utf16_output) noexcept; +simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_output) noexcept; -/** - * Convert possibly broken UTF-8 string into latin1 string. with errors - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param latin1_output the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. - */ -simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char* input, size_t length, char* latin1_output) noexcept; + + /** + * Convert possibly broken UTF-8 string into latin1 string. with errors + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. + */ + simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * input, size_t length, char* latin1_output) noexcept; /** * Using native endianness; Convert possibly broken UTF-8 string into UTF-16 @@ -1195,7 +1188,7 @@ simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char* input, * @param utf16_buffer the pointer to buffer that can hold conversion result * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful. */ -simdutf_warn_unused result convert_utf8_to_utf16_with_errors(const char* input, size_t length, char16_t* utf16_output) noexcept; +simdutf_warn_unused result convert_utf8_to_utf16_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept; /** * Convert possibly broken UTF-8 string into UTF-16LE string and stop on error. @@ -1208,7 +1201,7 @@ simdutf_warn_unused result convert_utf8_to_utf16_with_errors(const char* input, * @param utf16_buffer the pointer to buffer that can hold conversion result * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful. */ -simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char* input, size_t length, char16_t* utf16_output) noexcept; +simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept; /** * Convert possibly broken UTF-8 string into UTF-16BE string and stop on error. @@ -1221,7 +1214,7 @@ simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char* input * @param utf16_buffer the pointer to buffer that can hold conversion result * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful. */ -simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char* input, size_t length, char16_t* utf16_output) noexcept; +simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept; /** * Convert possibly broken UTF-8 string into UTF-32 string. @@ -1234,7 +1227,7 @@ simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char* input * @param utf32_buffer the pointer to buffer that can hold conversion result * @return the number of written char32_t; 0 if the input was not valid UTF-8 string */ -simdutf_warn_unused size_t convert_utf8_to_utf32(const char* input, size_t length, char32_t* utf32_output) noexcept; +simdutf_warn_unused size_t convert_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_output) noexcept; /** * Convert possibly broken UTF-8 string into UTF-32 string and stop on error. @@ -1247,21 +1240,22 @@ simdutf_warn_unused size_t convert_utf8_to_utf32(const char* input, size_t lengt * @param utf32_buffer the pointer to buffer that can hold conversion result * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful. */ -simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char* input, size_t length, char32_t* utf32_output) noexcept; +simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * input, size_t length, char32_t* utf32_output) noexcept; + + /** + * Convert valid UTF-8 string into latin1 string. + * + * This function assumes that the input string is valid UTF-8. + * + * This function is not BOM-aware. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if the input was not valid UTF-8 string + */ + simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * input, size_t length, char* latin1_output) noexcept; -/** - * Convert valid UTF-8 string into latin1 string. - * - * This function assumes that the input string is valid UTF-8. - * - * This function is not BOM-aware. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param latin1_output the pointer to buffer that can hold conversion result - * @return the number of written char; 0 if the input was not valid UTF-8 string - */ -simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char* input, size_t length, char* latin1_output) noexcept; /** * Using native endianness; Convert valid UTF-8 string into UTF-16 string. @@ -1273,7 +1267,7 @@ simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char* input, size_ * @param utf16_buffer the pointer to buffer that can hold conversion result * @return the number of written char16_t */ -simdutf_warn_unused size_t convert_valid_utf8_to_utf16(const char* input, size_t length, char16_t* utf16_buffer) noexcept; +simdutf_warn_unused size_t convert_valid_utf8_to_utf16(const char * input, size_t length, char16_t* utf16_buffer) noexcept; /** * Convert valid UTF-8 string into UTF-16LE string. @@ -1285,7 +1279,7 @@ simdutf_warn_unused size_t convert_valid_utf8_to_utf16(const char* input, size_t * @param utf16_buffer the pointer to buffer that can hold conversion result * @return the number of written char16_t */ -simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char* input, size_t length, char16_t* utf16_buffer) noexcept; +simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_buffer) noexcept; /** * Convert valid UTF-8 string into UTF-16BE string. @@ -1297,7 +1291,7 @@ simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char* input, size * @param utf16_buffer the pointer to buffer that can hold conversion result * @return the number of written char16_t */ -simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char* input, size_t length, char16_t* utf16_buffer) noexcept; +simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_buffer) noexcept; /** * Convert valid UTF-8 string into UTF-32 string. @@ -1309,29 +1303,30 @@ simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char* input, size * @param utf32_buffer the pointer to buffer that can hold conversion result * @return the number of written char32_t */ -simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char* input, size_t length, char32_t* utf32_buffer) noexcept; +simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) noexcept; -/** - * Return the number of bytes that this Latin1 string would require in UTF-8 format. - * - * @param input the Latin1 string to convert - * @param length the length of the string bytes - * @return the number of bytes required to encode the Latin1 string as UTF-8 - */ -simdutf_warn_unused size_t utf8_length_from_latin1(const char* input, size_t length) noexcept; -/** - * Compute the number of bytes that this UTF-8 string would require in Latin1 format. - * - * This function does not validate the input. - * - * This function is not BOM-aware. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in byte - * @return the number of bytes required to encode the UTF-8 string as Latin1 - */ -simdutf_warn_unused size_t latin1_length_from_utf8(const char* input, size_t length) noexcept; + /** + * Return the number of bytes that this Latin1 string would require in UTF-8 format. + * + * @param input the Latin1 string to convert + * @param length the length of the string bytes + * @return the number of bytes required to encode the Latin1 string as UTF-8 + */ + simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) noexcept; + + /** + * Compute the number of bytes that this UTF-8 string would require in Latin1 format. + * + * This function does not validate the input. + * + * This function is not BOM-aware. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in byte + * @return the number of bytes required to encode the UTF-8 string as Latin1 + */ + simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) noexcept; /** * Compute the number of 2-byte words that this UTF-8 string would require in UTF-16LE format. @@ -1344,7 +1339,7 @@ simdutf_warn_unused size_t latin1_length_from_utf8(const char* input, size_t len * @param length the length of the string in bytes * @return the number of char16_t words required to encode the UTF-8 string as UTF-16LE */ -simdutf_warn_unused size_t utf16_length_from_utf8(const char* input, size_t length) noexcept; +simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) noexcept; /** * Compute the number of 4-byte words that this UTF-8 string would require in UTF-32 format. @@ -1359,7 +1354,7 @@ simdutf_warn_unused size_t utf16_length_from_utf8(const char* input, size_t leng * @param length the length of the string in bytes * @return the number of char32_t words required to encode the UTF-8 string as UTF-32 */ -simdutf_warn_unused size_t utf32_length_from_utf8(const char* input, size_t length) noexcept; +simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) noexcept; /** * Using native endianness; Convert possibly broken UTF-16 string into UTF-8 string. @@ -1374,37 +1369,39 @@ simdutf_warn_unused size_t utf32_length_from_utf8(const char* input, size_t leng * @param utf8_buffer the pointer to buffer that can hold conversion result * @return number of written words; 0 if input is not a valid UTF-16LE string */ -simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) noexcept; - -/** - * Convert possibly broken UTF-16LE string into Latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return number of written words; 0 if input is not a valid UTF-16LE string - */ -simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t* input, size_t length, char* latin1_buffer) noexcept; +simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept; + + + /** + * Convert possibly broken UTF-16LE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if input is not a valid UTF-16LE string + */ + simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept; + + /** + * Convert possibly broken UTF-16BE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept; -/** - * Convert possibly broken UTF-16BE string into Latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. - */ -simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t* input, size_t length, char* latin1_buffer) noexcept; /** * Convert possibly broken UTF-16LE string into UTF-8 string. @@ -1419,7 +1416,7 @@ simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t* input, size * @param utf8_buffer the pointer to buffer that can hold conversion result * @return number of written words; 0 if input is not a valid UTF-16LE string */ -simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) noexcept; +simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept; /** * Convert possibly broken UTF-16BE string into UTF-8 string. @@ -1434,35 +1431,36 @@ simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t* input, size_t * @param utf8_buffer the pointer to buffer that can hold conversion result * @return number of written words; 0 if input is not a valid UTF-16LE string */ -simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) noexcept; +simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept; + + /** + * Convert possibly broken UTF-16LE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) noexcept; + + /** + * Convert possibly broken UTF-16BE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) noexcept; -/** - * Convert possibly broken UTF-16LE string into Latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. - */ -simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t* input, size_t length, char* latin1_buffer) noexcept; - -/** - * Convert possibly broken UTF-16BE string into Latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. - */ -simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t* input, size_t length, char* latin1_buffer) noexcept; /** * Using native endianness; Convert possibly broken UTF-16 string into UTF-8 string and stop on error. @@ -1477,7 +1475,7 @@ simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t* * @param utf8_buffer the pointer to buffer that can hold conversion result * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. */ -simdutf_warn_unused result convert_utf16_to_utf8_with_errors(const char16_t* input, size_t length, char* utf8_buffer) noexcept; +simdutf_warn_unused result convert_utf16_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) noexcept; /** * Convert possibly broken UTF-16LE string into UTF-8 string and stop on error. @@ -1492,7 +1490,7 @@ simdutf_warn_unused result convert_utf16_to_utf8_with_errors(const char16_t* inp * @param utf8_buffer the pointer to buffer that can hold conversion result * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. */ -simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t* input, size_t length, char* utf8_buffer) noexcept; +simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) noexcept; /** * Convert possibly broken UTF-16BE string into UTF-8 string and stop on error. @@ -1507,7 +1505,7 @@ simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t* i * @param utf8_buffer the pointer to buffer that can hold conversion result * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. */ -simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t* input, size_t length, char* utf8_buffer) noexcept; +simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) noexcept; /** * Using native endianness; Convert valid UTF-16 string into UTF-8 string. @@ -1521,35 +1519,37 @@ simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t* i * @param utf8_buffer the pointer to buffer that can hold the conversion result * @return number of written words; 0 if conversion is not possible */ -simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) noexcept; +simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept; -/** - * Convert valid UTF-16LE string into Latin1 string. - * - * This function assumes that the input string is valid UTF-8. - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return number of written words; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t* input, size_t length, char* latin1_buffer) noexcept; + /** + * Convert valid UTF-16LE string into Latin1 string. + * + * This function assumes that the input string is valid UTF-8. + + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept; + + /** + * Convert valid UTF-16BE string into Latin1 string. + * + * This function assumes that the input string is valid UTF-8. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept; -/** - * Convert valid UTF-16BE string into Latin1 string. - * - * This function assumes that the input string is valid UTF-8. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return number of written words; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t* input, size_t length, char* latin1_buffer) noexcept; /** * Convert valid UTF-16LE string into UTF-8 string. @@ -1563,7 +1563,7 @@ simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t* input * @param utf8_buffer the pointer to buffer that can hold the conversion result * @return number of written words; 0 if conversion is not possible */ -simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) noexcept; +simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept; /** * Convert valid UTF-16BE string into UTF-8 string. @@ -1577,7 +1577,7 @@ simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t* input, * @param utf8_buffer the pointer to buffer that can hold the conversion result * @return number of written words; 0 if conversion is not possible */ -simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) noexcept; +simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept; /** * Using native endianness; Convert possibly broken UTF-16 string into UTF-32 string. @@ -1592,7 +1592,7 @@ simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t* input, * @param utf32_buffer the pointer to buffer that can hold conversion result * @return number of written words; 0 if input is not a valid UTF-16LE string */ -simdutf_warn_unused size_t convert_utf16_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept; +simdutf_warn_unused size_t convert_utf16_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept; /** * Convert possibly broken UTF-16LE string into UTF-32 string. @@ -1607,7 +1607,7 @@ simdutf_warn_unused size_t convert_utf16_to_utf32(const char16_t* input, size_t * @param utf32_buffer the pointer to buffer that can hold conversion result * @return number of written words; 0 if input is not a valid UTF-16LE string */ -simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept; +simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept; /** * Convert possibly broken UTF-16BE string into UTF-32 string. @@ -1622,7 +1622,7 @@ simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t* input, size_ * @param utf32_buffer the pointer to buffer that can hold conversion result * @return number of written words; 0 if input is not a valid UTF-16LE string */ -simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept; +simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept; /** * Using native endianness; Convert possibly broken UTF-16 string into @@ -1638,7 +1638,7 @@ simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t* input, size_ * @param utf32_buffer the pointer to buffer that can hold conversion result * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful. */ -simdutf_warn_unused result convert_utf16_to_utf32_with_errors(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept; +simdutf_warn_unused result convert_utf16_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept; /** * Convert possibly broken UTF-16LE string into UTF-32 string and stop on error. @@ -1653,7 +1653,7 @@ simdutf_warn_unused result convert_utf16_to_utf32_with_errors(const char16_t* in * @param utf32_buffer the pointer to buffer that can hold conversion result * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful. */ -simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept; +simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept; /** * Convert possibly broken UTF-16BE string into UTF-32 string and stop on error. @@ -1668,7 +1668,7 @@ simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t* * @param utf32_buffer the pointer to buffer that can hold conversion result * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful. */ -simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept; +simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept; /** * Using native endianness; Convert valid UTF-16 string into UTF-32 string. @@ -1682,7 +1682,7 @@ simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t* * @param utf32_buffer the pointer to buffer that can hold the conversion result * @return number of written words; 0 if conversion is not possible */ -simdutf_warn_unused size_t convert_valid_utf16_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept; +simdutf_warn_unused size_t convert_valid_utf16_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept; /** * Convert valid UTF-16LE string into UTF-32 string. @@ -1696,7 +1696,7 @@ simdutf_warn_unused size_t convert_valid_utf16_to_utf32(const char16_t* input, s * @param utf32_buffer the pointer to buffer that can hold the conversion result * @return number of written words; 0 if conversion is not possible */ -simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept; +simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept; /** * Convert valid UTF-16BE string into UTF-32 string. @@ -1710,20 +1710,22 @@ simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t* input, * @param utf32_buffer the pointer to buffer that can hold the conversion result * @return number of written words; 0 if conversion is not possible */ -simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept; +simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept; + /* - * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format. - * - * This function does not validate the input. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @return the number of bytes required to encode the UTF-16LE string as Latin1 - */ -simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept; + * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format. + * + * This function does not validate the input. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @return the number of bytes required to encode the UTF-16LE string as Latin1 + */ + simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept; + /** * Using native endianness; Compute the number of bytes that this UTF-16 @@ -1735,7 +1737,7 @@ simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept; * @param length the length of the string in 2-byte words (char16_t) * @return the number of bytes required to encode the UTF-16LE string as UTF-8 */ -simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t* input, size_t length) noexcept; +simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t * input, size_t length) noexcept; /** * Compute the number of bytes that this UTF-16LE string would require in UTF-8 format. @@ -1746,7 +1748,7 @@ simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t* input, size_t * @param length the length of the string in 2-byte words (char16_t) * @return the number of bytes required to encode the UTF-16LE string as UTF-8 */ -simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t* input, size_t length) noexcept; +simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) noexcept; /** * Compute the number of bytes that this UTF-16BE string would require in UTF-8 format. @@ -1757,7 +1759,7 @@ simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t* input, size_ * @param length the length of the string in 2-byte words (char16_t) * @return the number of bytes required to encode the UTF-16BE string as UTF-8 */ -simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t* input, size_t length) noexcept; +simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) noexcept; /** * Convert possibly broken UTF-32 string into UTF-8 string. @@ -1772,7 +1774,7 @@ simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t* input, size_ * @param utf8_buffer the pointer to buffer that can hold conversion result * @return number of written words; 0 if input is not a valid UTF-32 string */ -simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t* input, size_t length, char* utf8_buffer) noexcept; +simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) noexcept; /** * Convert possibly broken UTF-32 string into UTF-8 string and stop on error. @@ -1787,7 +1789,7 @@ simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t* input, size_t l * @param utf8_buffer the pointer to buffer that can hold conversion result * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. */ -simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t* input, size_t length, char* utf8_buffer) noexcept; +simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * input, size_t length, char* utf8_buffer) noexcept; /** * Convert valid UTF-32 string into UTF-8 string. @@ -1801,7 +1803,7 @@ simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t* inp * @param utf8_buffer the pointer to buffer that can hold the conversion result * @return number of written words; 0 if conversion is not possible */ -simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t* input, size_t length, char* utf8_buffer) noexcept; +simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) noexcept; /** * Using native endianness; Convert possibly broken UTF-32 string into UTF-16 string. @@ -1816,7 +1818,7 @@ simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t* input, si * @param utf16_buffer the pointer to buffer that can hold conversion result * @return number of written words; 0 if input is not a valid UTF-32 string */ -simdutf_warn_unused size_t convert_utf32_to_utf16(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept; +simdutf_warn_unused size_t convert_utf32_to_utf16(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept; /** * Convert possibly broken UTF-32 string into UTF-16LE string. @@ -1831,53 +1833,54 @@ simdutf_warn_unused size_t convert_utf32_to_utf16(const char32_t* input, size_t * @param utf16_buffer the pointer to buffer that can hold conversion result * @return number of written words; 0 if input is not a valid UTF-32 string */ -simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept; - -/** - * Convert possibly broken UTF-32 string into Latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return number of written words; 0 if input is not a valid UTF-32 string - */ - -simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t* input, size_t length, char* latin1_buffer) noexcept; - -/** - * Convert possibly broken UTF-32 string into Latin1 string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. - */ - -simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t* input, size_t length, char* latin1_buffer) noexcept; - -/** - * Convert valid UTF-32 string into Latin1 string. - * - * This function assumes that the input string is valid UTF-32. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @param latin1_buffer the pointer to buffer that can hold the conversion result - * @return number of written words; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t* input, size_t length, char* latin1_buffer) noexcept; +simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept; + + /** + * Convert possibly broken UTF-32 string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if input is not a valid UTF-32 string + */ + + simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) noexcept; + + + /** + * Convert possibly broken UTF-32 string into Latin1 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + + simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * input, size_t length, char* latin1_buffer) noexcept; + + /** + * Convert valid UTF-32 string into Latin1 string. + * + * This function assumes that the input string is valid UTF-32. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param latin1_buffer the pointer to buffer that can hold the conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) noexcept; /** * Convert possibly broken UTF-32 string into UTF-16BE string. @@ -1892,7 +1895,7 @@ simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t* input, * @param utf16_buffer the pointer to buffer that can hold conversion result * @return number of written words; 0 if input is not a valid UTF-32 string */ -simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept; +simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept; /** * Using native endianness; Convert possibly broken UTF-32 string into UTF-16 @@ -1908,7 +1911,7 @@ simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t* input, size_ * @param utf16_buffer the pointer to buffer that can hold conversion result * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful. */ -simdutf_warn_unused result convert_utf32_to_utf16_with_errors(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept; +simdutf_warn_unused result convert_utf32_to_utf16_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept; /** * Convert possibly broken UTF-32 string into UTF-16LE string and stop on error. @@ -1923,7 +1926,7 @@ simdutf_warn_unused result convert_utf32_to_utf16_with_errors(const char32_t* in * @param utf16_buffer the pointer to buffer that can hold conversion result * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful. */ -simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept; +simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept; /** * Convert possibly broken UTF-32 string into UTF-16BE string and stop on error. @@ -1938,7 +1941,7 @@ simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t* * @param utf16_buffer the pointer to buffer that can hold conversion result * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful. */ -simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept; +simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept; /** * Using native endianness; Convert valid UTF-32 string into UTF-16 string. @@ -1952,7 +1955,7 @@ simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t* * @param utf16_buffer the pointer to buffer that can hold the conversion result * @return number of written words; 0 if conversion is not possible */ -simdutf_warn_unused size_t convert_valid_utf32_to_utf16(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept; +simdutf_warn_unused size_t convert_valid_utf32_to_utf16(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept; /** * Convert valid UTF-32 string into UTF-16LE string. @@ -1966,7 +1969,7 @@ simdutf_warn_unused size_t convert_valid_utf32_to_utf16(const char32_t* input, s * @param utf16_buffer the pointer to buffer that can hold the conversion result * @return number of written words; 0 if conversion is not possible */ -simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept; +simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept; /** * Convert valid UTF-32 string into UTF-16BE string. @@ -1980,7 +1983,7 @@ simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t* input, * @param utf16_buffer the pointer to buffer that can hold the conversion result * @return number of written words; 0 if conversion is not possible */ -simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept; +simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept; /** * Change the endianness of the input. Can be used to go from UTF-16LE to UTF-16BE or @@ -1994,7 +1997,7 @@ simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t* input, * @param length the length of the string in 2-byte words (char16_t) * @param output the pointer to buffer that can hold the conversion result */ -void change_endianness_utf16(const char16_t* input, size_t length, char16_t* output) noexcept; +void change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) noexcept; /** * Compute the number of bytes that this UTF-32 string would require in UTF-8 format. @@ -2005,7 +2008,7 @@ void change_endianness_utf16(const char16_t* input, size_t length, char16_t* out * @param length the length of the string in 4-byte words (char32_t) * @return the number of bytes required to encode the UTF-32 string as UTF-8 */ -simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t* input, size_t length) noexcept; +simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) noexcept; /** * Compute the number of two-byte words that this UTF-32 string would require in UTF-16 format. @@ -2016,7 +2019,7 @@ simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t* input, size_t * @param length the length of the string in 4-byte words (char32_t) * @return the number of bytes required to encode the UTF-32 string as UTF-16 */ -simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t* input, size_t length) noexcept; +simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) noexcept; /** * Using native endianness; Compute the number of bytes that this UTF-16 @@ -2032,7 +2035,7 @@ simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t* input, size_t * @param length the length of the string in 2-byte words (char16_t) * @return the number of bytes required to encode the UTF-16LE string as UTF-32 */ -simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t* input, size_t length) noexcept; +simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t * input, size_t length) noexcept; /** * Compute the number of bytes that this UTF-16LE string would require in UTF-32 format. @@ -2047,7 +2050,7 @@ simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t* input, size_t * @param length the length of the string in 2-byte words (char16_t) * @return the number of bytes required to encode the UTF-16LE string as UTF-32 */ -simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t* input, size_t length) noexcept; +simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) noexcept; /** * Compute the number of bytes that this UTF-16BE string would require in UTF-32 format. @@ -2062,7 +2065,7 @@ simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t* input, size * @param length the length of the string in 2-byte words (char16_t) * @return the number of bytes required to encode the UTF-16BE string as UTF-32 */ -simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t* input, size_t length) noexcept; +simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) noexcept; /** * Count the number of code points (characters) in the string assuming that @@ -2076,7 +2079,7 @@ simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t* input, size * @param length the length of the string in 2-byte words (char16_t) * @return number of code points */ -simdutf_warn_unused size_t count_utf16(const char16_t* input, size_t length) noexcept; +simdutf_warn_unused size_t count_utf16(const char16_t * input, size_t length) noexcept; /** * Count the number of code points (characters) in the string assuming that @@ -2090,7 +2093,7 @@ simdutf_warn_unused size_t count_utf16(const char16_t* input, size_t length) noe * @param length the length of the string in 2-byte words (char16_t) * @return number of code points */ -simdutf_warn_unused size_t count_utf16le(const char16_t* input, size_t length) noexcept; +simdutf_warn_unused size_t count_utf16le(const char16_t * input, size_t length) noexcept; /** * Count the number of code points (characters) in the string assuming that @@ -2104,7 +2107,7 @@ simdutf_warn_unused size_t count_utf16le(const char16_t* input, size_t length) n * @param length the length of the string in 2-byte words (char16_t) * @return number of code points */ -simdutf_warn_unused size_t count_utf16be(const char16_t* input, size_t length) noexcept; +simdutf_warn_unused size_t count_utf16be(const char16_t * input, size_t length) noexcept; /** * Count the number of code points (characters) in the string assuming that @@ -2116,7 +2119,7 @@ simdutf_warn_unused size_t count_utf16be(const char16_t* input, size_t length) n * @param length the length of the string in bytes * @return number of code points */ -simdutf_warn_unused size_t count_utf8(const char* input, size_t length) noexcept; +simdutf_warn_unused size_t count_utf8(const char * input, size_t length) noexcept; /** * An implementation of simdutf for a particular CPU architecture. @@ -2126,1080 +2129,1090 @@ simdutf_warn_unused size_t count_utf8(const char* input, size_t length) noexcept */ class implementation { public: - /** - * The name of this implementation. - * - * const implementation *impl = simdutf::active_implementation; - * cout << "simdutf is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; - * - * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" - */ - virtual const std::string& name() const { return _name; } - - /** - * The description of this implementation. - * - * const implementation *impl = simdutf::active_implementation; - * cout << "simdutf is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; - * - * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" - */ - virtual const std::string& description() const { return _description; } - - /** - * The instruction sets this implementation is compiled against - * and the current CPU match. This function may poll the current CPU/system - * and should therefore not be called too often if performance is a concern. - * - * - * @return true if the implementation can be safely used on the current system (determined at runtime) - */ - bool supported_by_runtime_system() const; - - /** - * This function will try to detect the encoding - * @param input the string to identify - * @param length the length of the string in bytes. - * @return the encoding type detected - */ - virtual encoding_type autodetect_encoding(const char* input, size_t length) const noexcept; - - /** - * This function will try to detect the possible encodings in one pass - * @param input the string to identify - * @param length the length of the string in bytes. - * @return the encoding type detected - */ - virtual int detect_encodings(const char* input, size_t length) const noexcept = 0; - - /** - * @private For internal implementation use - * - * The instruction sets this implementation is compiled against. - * - * @return a mask of all required `internal::instruction_set::` values - */ - virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; } - - /** - * Validate the UTF-8 string. - * - * Overridden by each implementation. - * - * @param buf the UTF-8 string to validate. - * @param len the length of the string in bytes. - * @return true if and only if the string is valid UTF-8. - */ - simdutf_warn_unused virtual bool validate_utf8(const char* buf, size_t len) const noexcept = 0; - - /** - * Validate the UTF-8 string and stop on errors. - * - * Overridden by each implementation. - * - * @param buf the UTF-8 string to validate. - * @param len the length of the string in bytes. - * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. - */ - simdutf_warn_unused virtual result validate_utf8_with_errors(const char* buf, size_t len) const noexcept = 0; - - /** - * Validate the ASCII string. - * - * Overridden by each implementation. - * - * @param buf the ASCII string to validate. - * @param len the length of the string in bytes. - * @return true if and only if the string is valid ASCII. - */ - simdutf_warn_unused virtual bool validate_ascii(const char* buf, size_t len) const noexcept = 0; - - /** - * Validate the ASCII string and stop on error. - * - * Overridden by each implementation. - * - * @param buf the ASCII string to validate. - * @param len the length of the string in bytes. - * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. - */ - simdutf_warn_unused virtual result validate_ascii_with_errors(const char* buf, size_t len) const noexcept = 0; - - /** - * Validate the UTF-16LE string.This function may be best when you expect - * the input to be almost always valid. Otherwise, consider using - * validate_utf16le_with_errors. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-16LE string to validate. - * @param len the length of the string in number of 2-byte words (char16_t). - * @return true if and only if the string is valid UTF-16LE. - */ - simdutf_warn_unused virtual bool validate_utf16le(const char16_t* buf, size_t len) const noexcept = 0; - - /** - * Validate the UTF-16BE string. This function may be best when you expect - * the input to be almost always valid. Otherwise, consider using - * validate_utf16be_with_errors. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-16BE string to validate. - * @param len the length of the string in number of 2-byte words (char16_t). - * @return true if and only if the string is valid UTF-16BE. - */ - simdutf_warn_unused virtual bool validate_utf16be(const char16_t* buf, size_t len) const noexcept = 0; - - /** - * Validate the UTF-16LE string and stop on error. It might be faster than - * validate_utf16le when an error is expected to occur early. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-16LE string to validate. - * @param len the length of the string in number of 2-byte words (char16_t). - * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. - */ - simdutf_warn_unused virtual result validate_utf16le_with_errors(const char16_t* buf, size_t len) const noexcept = 0; - - /** - * Validate the UTF-16BE string and stop on error. It might be faster than - * validate_utf16be when an error is expected to occur early. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-16BE string to validate. - * @param len the length of the string in number of 2-byte words (char16_t). - * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. - */ - simdutf_warn_unused virtual result validate_utf16be_with_errors(const char16_t* buf, size_t len) const noexcept = 0; - - /** - * Validate the UTF-32 string. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-32 string to validate. - * @param len the length of the string in number of 4-byte words (char32_t). - * @return true if and only if the string is valid UTF-32. - */ - simdutf_warn_unused virtual bool validate_utf32(const char32_t* buf, size_t len) const noexcept = 0; - - /** - * Validate the UTF-32 string and stop on error. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-32 string to validate. - * @param len the length of the string in number of 4-byte words (char32_t). - * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. - */ - simdutf_warn_unused virtual result validate_utf32_with_errors(const char32_t* buf, size_t len) const noexcept = 0; - - /** - * Convert Latin1 string into UTF8 string. - * - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the Latin1 string to convert - * @param length the length of the string in bytes - * @param latin1_output the pointer to buffer that can hold conversion result - * @return the number of written char; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t convert_latin1_to_utf8(const char* input, size_t length, char* utf8_output) const noexcept = 0; - - /** - * Convert possibly Latin1 string into UTF-16LE string. - * - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the Latin1 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t convert_latin1_to_utf16le(const char* input, size_t length, char16_t* utf16_output) const noexcept = 0; - - /** - * Convert Latin1 string into UTF-16BE string. - * - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the Latin1 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t convert_latin1_to_utf16be(const char* input, size_t length, char16_t* utf16_output) const noexcept = 0; - - /** - * Convert Latin1 string into UTF-32 string. - * - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the Latin1 string to convert - * @param length the length of the string in bytes - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return the number of written char32_t; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t convert_latin1_to_utf32(const char* input, size_t length, char32_t* utf32_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-8 string into latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param latin1_output the pointer to buffer that can hold conversion result - * @return the number of written char; 0 if the input was not valid UTF-8 string - */ - simdutf_warn_unused virtual size_t convert_utf8_to_latin1(const char* input, size_t length, char* latin1_output) const noexcept = 0; - - /** - * Convert possibly broken UTF-8 string into latin1 string. with errors - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param latin1_output the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. - */ - simdutf_warn_unused virtual result convert_utf8_to_latin1_with_errors(const char* input, size_t length, char* latin1_output) const noexcept = 0; - - /** - * Convert valid UTF-8 string into latin1 string. - * - * This function assumes that the input string is valid UTF-8. - * - * This function is not BOM-aware. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param latin1_output the pointer to buffer that can hold conversion result - * @return the number of written char; 0 if the input was not valid UTF-8 string - */ - simdutf_warn_unused virtual size_t convert_valid_utf8_to_latin1(const char* input, size_t length, char* latin1_output) const noexcept = 0; - - /** - * Convert possibly broken UTF-8 string into UTF-16LE string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t; 0 if the input was not valid UTF-8 string - */ - simdutf_warn_unused virtual size_t convert_utf8_to_utf16le(const char* input, size_t length, char16_t* utf16_output) const noexcept = 0; - - /** - * Convert possibly broken UTF-8 string into UTF-16BE string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t; 0 if the input was not valid UTF-8 string - */ - simdutf_warn_unused virtual size_t convert_utf8_to_utf16be(const char* input, size_t length, char16_t* utf16_output) const noexcept = 0; - - /** - * Convert possibly broken UTF-8 string into UTF-16LE string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. - */ - simdutf_warn_unused virtual result convert_utf8_to_utf16le_with_errors(const char* input, size_t length, char16_t* utf16_output) const noexcept = 0; - - /** - * Convert possibly broken UTF-8 string into UTF-16BE string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. - */ - simdutf_warn_unused virtual result convert_utf8_to_utf16be_with_errors(const char* input, size_t length, char16_t* utf16_output) const noexcept = 0; - - /** - * Convert possibly broken UTF-8 string into UTF-32 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t; 0 if the input was not valid UTF-8 string - */ - simdutf_warn_unused virtual size_t convert_utf8_to_utf32(const char* input, size_t length, char32_t* utf32_output) const noexcept = 0; - - /** - * Convert possibly broken UTF-8 string into UTF-32 string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful. - */ - simdutf_warn_unused virtual result convert_utf8_to_utf32_with_errors(const char* input, size_t length, char32_t* utf32_output) const noexcept = 0; - - /** - * Convert valid UTF-8 string into UTF-16LE string. - * - * This function assumes that the input string is valid UTF-8. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t - */ - simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf16le(const char* input, size_t length, char16_t* utf16_buffer) const noexcept = 0; - - /** - * Convert valid UTF-8 string into UTF-16BE string. - * - * This function assumes that the input string is valid UTF-8. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t - */ - simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf16be(const char* input, size_t length, char16_t* utf16_buffer) const noexcept = 0; - - /** - * Convert valid UTF-8 string into UTF-32 string. - * - * This function assumes that the input string is valid UTF-8. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char32_t - */ - simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf32(const char* input, size_t length, char32_t* utf32_buffer) const noexcept = 0; - /** - * Compute the number of 2-byte words that this UTF-8 string would require in UTF-16LE format. - * - * This function does not validate the input. - * - * @param input the UTF-8 string to process - * @param length the length of the string in bytes - * @return the number of char16_t words required to encode the UTF-8 string as UTF-16LE - */ - simdutf_warn_unused virtual size_t utf16_length_from_utf8(const char* input, size_t length) const noexcept = 0; - - /** - * Compute the number of 4-byte words that this UTF-8 string would require in UTF-32 format. - * - * This function is equivalent to count_utf8. - * - * This function does not validate the input. - * - * @param input the UTF-8 string to process - * @param length the length of the string in bytes - * @return the number of char32_t words required to encode the UTF-8 string as UTF-32 - */ - simdutf_warn_unused virtual size_t utf32_length_from_utf8(const char* input, size_t length) const noexcept = 0; - - /** - * Convert possibly broken UTF-16LE string into Latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return number of written words; 0 if input is not a valid UTF-16LE string - */ - simdutf_warn_unused virtual size_t convert_utf16le_to_latin1(const char16_t* input, size_t length, char* latin1_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16BE string into Latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. - */ - simdutf_warn_unused virtual size_t convert_utf16be_to_latin1(const char16_t* input, size_t length, char* latin1_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16LE string into Latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. - */ - simdutf_warn_unused virtual result convert_utf16le_to_latin1_with_errors(const char16_t* input, size_t length, char* latin1_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16BE string into Latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. - */ - simdutf_warn_unused virtual result convert_utf16be_to_latin1_with_errors(const char16_t* input, size_t length, char* latin1_buffer) const noexcept = 0; - - /** - * Convert valid UTF-16LE string into Latin1 string. - * - * This function assumes that the input string is valid UTF-8. - - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return number of written words; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t convert_valid_utf16le_to_latin1(const char16_t* input, size_t length, char* latin1_buffer) const noexcept = 0; - - /** - * Convert valid UTF-16BE string into Latin1 string. - * - * This function assumes that the input string is valid UTF-8. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return number of written words; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t convert_valid_utf16be_to_latin1(const char16_t* input, size_t length, char* latin1_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16LE string into UTF-8 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return number of written words; 0 if input is not a valid UTF-16LE string - */ - simdutf_warn_unused virtual size_t convert_utf16le_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16BE string into UTF-8 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return number of written words; 0 if input is not a valid UTF-16BE string - */ - simdutf_warn_unused virtual size_t convert_utf16be_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16LE string into UTF-8 string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. - */ - simdutf_warn_unused virtual result convert_utf16le_to_utf8_with_errors(const char16_t* input, size_t length, char* utf8_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16BE string into UTF-8 string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. - */ - simdutf_warn_unused virtual result convert_utf16be_to_utf8_with_errors(const char16_t* input, size_t length, char* utf8_buffer) const noexcept = 0; - - /** - * Convert valid UTF-16LE string into UTF-8 string. - * - * This function assumes that the input string is valid UTF-16LE. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param utf8_buffer the pointer to buffer that can hold the conversion result - * @return number of written words; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t convert_valid_utf16le_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) const noexcept = 0; - - /** - * Convert valid UTF-16BE string into UTF-8 string. - * - * This function assumes that the input string is valid UTF-16BE. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param utf8_buffer the pointer to buffer that can hold the conversion result - * @return number of written words; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t convert_valid_utf16be_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16LE string into UTF-32 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return number of written words; 0 if input is not a valid UTF-16LE string - */ - simdutf_warn_unused virtual size_t convert_utf16le_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16BE string into UTF-32 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return number of written words; 0 if input is not a valid UTF-16BE string - */ - simdutf_warn_unused virtual size_t convert_utf16be_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16LE string into UTF-32 string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful. - */ - simdutf_warn_unused virtual result convert_utf16le_to_utf32_with_errors(const char16_t* input, size_t length, char32_t* utf32_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16BE string into UTF-32 string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful. - */ - simdutf_warn_unused virtual result convert_utf16be_to_utf32_with_errors(const char16_t* input, size_t length, char32_t* utf32_buffer) const noexcept = 0; - - /** - * Convert valid UTF-16LE string into UTF-32 string. - * - * This function assumes that the input string is valid UTF-16LE. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param utf32_buffer the pointer to buffer that can hold the conversion result - * @return number of written words; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t convert_valid_utf16le_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) const noexcept = 0; - - /** - * Convert valid UTF-16LE string into UTF-32BE string. - * - * This function assumes that the input string is valid UTF-16BE. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @param utf32_buffer the pointer to buffer that can hold the conversion result - * @return number of written words; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t convert_valid_utf16be_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) const noexcept = 0; - - /** - * Compute the number of bytes that this UTF-16LE string would require in UTF-8 format. - * - * This function does not validate the input. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @return the number of bytes required to encode the UTF-16LE string as UTF-8 - */ - simdutf_warn_unused virtual size_t utf8_length_from_utf16le(const char16_t* input, size_t length) const noexcept = 0; - - /** - * Compute the number of bytes that this UTF-16BE string would require in UTF-8 format. - * - * This function does not validate the input. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @return the number of bytes required to encode the UTF-16BE string as UTF-8 - */ - simdutf_warn_unused virtual size_t utf8_length_from_utf16be(const char16_t* input, size_t length) const noexcept = 0; - - /** - * Convert possibly broken UTF-32 string into Latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return number of written words; 0 if input is not a valid UTF-32 string - */ - - simdutf_warn_unused virtual size_t convert_utf32_to_latin1(const char32_t* input, size_t length, char* latin1_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-32 string into Latin1 string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. - */ - - simdutf_warn_unused virtual result convert_utf32_to_latin1_with_errors(const char32_t* input, size_t length, char* latin1_buffer) const noexcept = 0; - - /** - * Convert valid UTF-32 string into Latin1 string. - * - * This function assumes that the input string is valid UTF-32. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @param latin1_buffer the pointer to buffer that can hold the conversion result - * @return number of written words; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t convert_valid_utf32_to_latin1(const char32_t* input, size_t length, char* latin1_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-32 string into UTF-8 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return number of written words; 0 if input is not a valid UTF-32 string - */ - simdutf_warn_unused virtual size_t convert_utf32_to_utf8(const char32_t* input, size_t length, char* utf8_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-32 string into UTF-8 string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. - */ - simdutf_warn_unused virtual result convert_utf32_to_utf8_with_errors(const char32_t* input, size_t length, char* utf8_buffer) const noexcept = 0; - - /** - * Convert valid UTF-32 string into UTF-8 string. - * - * This function assumes that the input string is valid UTF-32. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @param utf8_buffer the pointer to buffer that can hold the conversion result - * @return number of written words; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf8(const char32_t* input, size_t length, char* utf8_buffer) const noexcept = 0; - - /** - * Return the number of bytes that this UTF-16 string would require in Latin1 format. - * - * - * @param input the UTF-16 string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @return the number of bytes required to encode the UTF-16 string as Latin1 - */ + /** + * The name of this implementation. + * + * const implementation *impl = simdutf::active_implementation; + * cout << "simdutf is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + */ + virtual const std::string &name() const { return _name; } + + /** + * The description of this implementation. + * + * const implementation *impl = simdutf::active_implementation; + * cout << "simdutf is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + */ + virtual const std::string &description() const { return _description; } + + /** + * The instruction sets this implementation is compiled against + * and the current CPU match. This function may poll the current CPU/system + * and should therefore not be called too often if performance is a concern. + * + * + * @return true if the implementation can be safely used on the current system (determined at runtime) + */ + bool supported_by_runtime_system() const; + + /** + * This function will try to detect the encoding + * @param input the string to identify + * @param length the length of the string in bytes. + * @return the encoding type detected + */ + virtual encoding_type autodetect_encoding(const char * input, size_t length) const noexcept; + + /** + * This function will try to detect the possible encodings in one pass + * @param input the string to identify + * @param length the length of the string in bytes. + * @return the encoding type detected + */ + virtual int detect_encodings(const char * input, size_t length) const noexcept = 0; + + /** + * @private For internal implementation use + * + * The instruction sets this implementation is compiled against. + * + * @return a mask of all required `internal::instruction_set::` values + */ + virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; } + + + /** + * Validate the UTF-8 string. + * + * Overridden by each implementation. + * + * @param buf the UTF-8 string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid UTF-8. + */ + simdutf_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0; + + /** + * Validate the UTF-8 string and stop on errors. + * + * Overridden by each implementation. + * + * @param buf the UTF-8 string to validate. + * @param len the length of the string in bytes. + * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. + */ + simdutf_warn_unused virtual result validate_utf8_with_errors(const char *buf, size_t len) const noexcept = 0; + + /** + * Validate the ASCII string. + * + * Overridden by each implementation. + * + * @param buf the ASCII string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid ASCII. + */ + simdutf_warn_unused virtual bool validate_ascii(const char *buf, size_t len) const noexcept = 0; + + /** + * Validate the ASCII string and stop on error. + * + * Overridden by each implementation. + * + * @param buf the ASCII string to validate. + * @param len the length of the string in bytes. + * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. + */ + simdutf_warn_unused virtual result validate_ascii_with_errors(const char *buf, size_t len) const noexcept = 0; + + /** + * Validate the UTF-16LE string.This function may be best when you expect + * the input to be almost always valid. Otherwise, consider using + * validate_utf16le_with_errors. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-16LE string to validate. + * @param len the length of the string in number of 2-byte words (char16_t). + * @return true if and only if the string is valid UTF-16LE. + */ + simdutf_warn_unused virtual bool validate_utf16le(const char16_t *buf, size_t len) const noexcept = 0; + + /** + * Validate the UTF-16BE string. This function may be best when you expect + * the input to be almost always valid. Otherwise, consider using + * validate_utf16be_with_errors. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-16BE string to validate. + * @param len the length of the string in number of 2-byte words (char16_t). + * @return true if and only if the string is valid UTF-16BE. + */ + simdutf_warn_unused virtual bool validate_utf16be(const char16_t *buf, size_t len) const noexcept = 0; + + /** + * Validate the UTF-16LE string and stop on error. It might be faster than + * validate_utf16le when an error is expected to occur early. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-16LE string to validate. + * @param len the length of the string in number of 2-byte words (char16_t). + * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. + */ + simdutf_warn_unused virtual result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept = 0; + + /** + * Validate the UTF-16BE string and stop on error. It might be faster than + * validate_utf16be when an error is expected to occur early. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-16BE string to validate. + * @param len the length of the string in number of 2-byte words (char16_t). + * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. + */ + simdutf_warn_unused virtual result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept = 0; + + /** + * Validate the UTF-32 string. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-32 string to validate. + * @param len the length of the string in number of 4-byte words (char32_t). + * @return true if and only if the string is valid UTF-32. + */ + simdutf_warn_unused virtual bool validate_utf32(const char32_t *buf, size_t len) const noexcept = 0; + + /** + * Validate the UTF-32 string and stop on error. + * + * Overridden by each implementation. + * + * This function is not BOM-aware. + * + * @param buf the UTF-32 string to validate. + * @param len the length of the string in number of 4-byte words (char32_t). + * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. + */ + simdutf_warn_unused virtual result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept = 0; + + /** + * Convert Latin1 string into UTF8 string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_latin1_to_utf8(const char * input, size_t length, char* utf8_output) const noexcept = 0; + + + /** + * Convert possibly Latin1 string into UTF-16LE string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_latin1_to_utf16le(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0; + + /** + * Convert Latin1 string into UTF-16BE string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_latin1_to_utf16be(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0; + + /** + * Convert Latin1 string into UTF-32 string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return the number of written char32_t; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_latin1_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-8 string into latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if the input was not valid UTF-8 string + */ + simdutf_warn_unused virtual size_t convert_utf8_to_latin1(const char * input, size_t length, char* latin1_output) const noexcept = 0; + + /** + * Convert possibly broken UTF-8 string into latin1 string. with errors + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. + */ + simdutf_warn_unused virtual result convert_utf8_to_latin1_with_errors(const char * input, size_t length, char* latin1_output) const noexcept = 0; + + /** + * Convert valid UTF-8 string into latin1 string. + * + * This function assumes that the input string is valid UTF-8. + * + * This function is not BOM-aware. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if the input was not valid UTF-8 string + */ + simdutf_warn_unused virtual size_t convert_valid_utf8_to_latin1(const char * input, size_t length, char* latin1_output) const noexcept = 0; + + + /** + * Convert possibly broken UTF-8 string into UTF-16LE string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if the input was not valid UTF-8 string + */ + simdutf_warn_unused virtual size_t convert_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0; + + /** + * Convert possibly broken UTF-8 string into UTF-16BE string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if the input was not valid UTF-8 string + */ + simdutf_warn_unused virtual size_t convert_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0; + + /** + * Convert possibly broken UTF-8 string into UTF-16LE string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. + */ + simdutf_warn_unused virtual result convert_utf8_to_utf16le_with_errors(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0; + + /** + * Convert possibly broken UTF-8 string into UTF-16BE string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. + */ + simdutf_warn_unused virtual result convert_utf8_to_utf16be_with_errors(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0; + + /** + * Convert possibly broken UTF-8 string into UTF-32 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if the input was not valid UTF-8 string + */ + simdutf_warn_unused virtual size_t convert_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_output) const noexcept = 0; + + /** + * Convert possibly broken UTF-8 string into UTF-32 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful. + */ + simdutf_warn_unused virtual result convert_utf8_to_utf32_with_errors(const char * input, size_t length, char32_t* utf32_output) const noexcept = 0; + + /** + * Convert valid UTF-8 string into UTF-16LE string. + * + * This function assumes that the input string is valid UTF-8. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t + */ + simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_buffer) const noexcept = 0; + +/** + * Convert valid UTF-8 string into UTF-16BE string. + * + * This function assumes that the input string is valid UTF-8. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t + */ + simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_buffer) const noexcept = 0; + + /** + * Convert valid UTF-8 string into UTF-32 string. + * + * This function assumes that the input string is valid UTF-8. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char32_t + */ + simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) const noexcept = 0; + + /** + * Compute the number of 2-byte words that this UTF-8 string would require in UTF-16LE format. + * + * This function does not validate the input. + * + * @param input the UTF-8 string to process + * @param length the length of the string in bytes + * @return the number of char16_t words required to encode the UTF-8 string as UTF-16LE + */ + simdutf_warn_unused virtual size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept = 0; + + /** + * Compute the number of 4-byte words that this UTF-8 string would require in UTF-32 format. + * + * This function is equivalent to count_utf8. + * + * This function does not validate the input. + * + * @param input the UTF-8 string to process + * @param length the length of the string in bytes + * @return the number of char32_t words required to encode the UTF-8 string as UTF-32 + */ + simdutf_warn_unused virtual size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept = 0; + + /** + * Convert possibly broken UTF-16LE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if input is not a valid UTF-16LE string + */ + simdutf_warn_unused virtual size_t convert_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16BE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + simdutf_warn_unused virtual size_t convert_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16LE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + simdutf_warn_unused virtual result convert_utf16le_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16BE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + simdutf_warn_unused virtual result convert_utf16be_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** + * Convert valid UTF-16LE string into Latin1 string. + * + * This function assumes that the input string is valid UTF-8. + + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_valid_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** + * Convert valid UTF-16BE string into Latin1 string. + * + * This function assumes that the input string is valid UTF-8. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_valid_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16LE string into UTF-8 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if input is not a valid UTF-16LE string + */ + simdutf_warn_unused virtual size_t convert_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16BE string into UTF-8 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if input is not a valid UTF-16BE string + */ + simdutf_warn_unused virtual size_t convert_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16LE string into UTF-8 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + simdutf_warn_unused virtual result convert_utf16le_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16BE string into UTF-8 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + simdutf_warn_unused virtual result convert_utf16be_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0; + + /** + * Convert valid UTF-16LE string into UTF-8 string. + * + * This function assumes that the input string is valid UTF-16LE. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param utf8_buffer the pointer to buffer that can hold the conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_valid_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0; + + /** + * Convert valid UTF-16BE string into UTF-8 string. + * + * This function assumes that the input string is valid UTF-16BE. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param utf8_buffer the pointer to buffer that can hold the conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_valid_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16LE string into UTF-32 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if input is not a valid UTF-16LE string + */ + simdutf_warn_unused virtual size_t convert_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16BE string into UTF-32 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if input is not a valid UTF-16BE string + */ + simdutf_warn_unused virtual size_t convert_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16LE string into UTF-32 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful. + */ + simdutf_warn_unused virtual result convert_utf16le_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16BE string into UTF-32 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful. + */ + simdutf_warn_unused virtual result convert_utf16be_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0; + + /** + * Convert valid UTF-16LE string into UTF-32 string. + * + * This function assumes that the input string is valid UTF-16LE. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param utf32_buffer the pointer to buffer that can hold the conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_valid_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0; + + /** + * Convert valid UTF-16LE string into UTF-32BE string. + * + * This function assumes that the input string is valid UTF-16BE. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param utf32_buffer the pointer to buffer that can hold the conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_valid_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0; + + /** + * Compute the number of bytes that this UTF-16LE string would require in UTF-8 format. + * + * This function does not validate the input. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @return the number of bytes required to encode the UTF-16LE string as UTF-8 + */ + simdutf_warn_unused virtual size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept = 0; + + /** + * Compute the number of bytes that this UTF-16BE string would require in UTF-8 format. + * + * This function does not validate the input. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @return the number of bytes required to encode the UTF-16BE string as UTF-8 + */ + simdutf_warn_unused virtual size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept = 0; + + /** + * Convert possibly broken UTF-32 string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if input is not a valid UTF-32 string + */ + + simdutf_warn_unused virtual size_t convert_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-32 string into Latin1 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + + simdutf_warn_unused virtual result convert_utf32_to_latin1_with_errors(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** + * Convert valid UTF-32 string into Latin1 string. + * + * This function assumes that the input string is valid UTF-32. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param latin1_buffer the pointer to buffer that can hold the conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_valid_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-32 string into UTF-8 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if input is not a valid UTF-32 string + */ + simdutf_warn_unused virtual size_t convert_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-32 string into UTF-8 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param utf8_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + simdutf_warn_unused virtual result convert_utf32_to_utf8_with_errors(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0; + + /** + * Convert valid UTF-32 string into UTF-8 string. + * + * This function assumes that the input string is valid UTF-32. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param utf8_buffer the pointer to buffer that can hold the conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0; + + + /** + * Return the number of bytes that this UTF-16 string would require in Latin1 format. + * + * + * @param input the UTF-16 string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @return the number of bytes required to encode the UTF-16 string as Latin1 + */ simdutf_warn_unused virtual size_t utf16_length_from_latin1(size_t length) const noexcept = 0; - /** - * Convert possibly broken UTF-32 string into UTF-16LE string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return number of written words; 0 if input is not a valid UTF-32 string - */ - simdutf_warn_unused virtual size_t convert_utf32_to_utf16le(const char32_t* input, size_t length, char16_t* utf16_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-32 string into UTF-16BE string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return number of written words; 0 if input is not a valid UTF-32 string - */ - simdutf_warn_unused virtual size_t convert_utf32_to_utf16be(const char32_t* input, size_t length, char16_t* utf16_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-32 string into UTF-16LE string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful. - */ - simdutf_warn_unused virtual result convert_utf32_to_utf16le_with_errors(const char32_t* input, size_t length, char16_t* utf16_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-32 string into UTF-16BE string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful. - */ - simdutf_warn_unused virtual result convert_utf32_to_utf16be_with_errors(const char32_t* input, size_t length, char16_t* utf16_buffer) const noexcept = 0; - - /** - * Convert valid UTF-32 string into UTF-16LE string. - * - * This function assumes that the input string is valid UTF-32. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @param utf16_buffer the pointer to buffer that can hold the conversion result - * @return number of written words; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf16le(const char32_t* input, size_t length, char16_t* utf16_buffer) const noexcept = 0; - - /** - * Convert valid UTF-32 string into UTF-16BE string. - * - * This function assumes that the input string is valid UTF-32. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @param utf16_buffer the pointer to buffer that can hold the conversion result - * @return number of written words; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf16be(const char32_t* input, size_t length, char16_t* utf16_buffer) const noexcept = 0; + /** + * Convert possibly broken UTF-32 string into UTF-16LE string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if input is not a valid UTF-32 string + */ + simdutf_warn_unused virtual size_t convert_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-32 string into UTF-16BE string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if input is not a valid UTF-32 string + */ + simdutf_warn_unused virtual size_t convert_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-32 string into UTF-16LE string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful. + */ + simdutf_warn_unused virtual result convert_utf32_to_utf16le_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-32 string into UTF-16BE string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful. + */ + simdutf_warn_unused virtual result convert_utf32_to_utf16be_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0; + + /** + * Convert valid UTF-32 string into UTF-16LE string. + * + * This function assumes that the input string is valid UTF-32. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param utf16_buffer the pointer to buffer that can hold the conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0; + + /** + * Convert valid UTF-32 string into UTF-16BE string. + * + * This function assumes that the input string is valid UTF-32. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param utf16_buffer the pointer to buffer that can hold the conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0; + + /** + * Change the endianness of the input. Can be used to go from UTF-16LE to UTF-16BE or + * from UTF-16BE to UTF-16LE. + * + * This function does not validate the input. + * + * This function is not BOM-aware. + * + * @param input the UTF-16 string to process + * @param length the length of the string in 2-byte words (char16_t) + * @param output the pointer to buffer that can hold the conversion result + */ + virtual void change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept = 0; + + /** + * Return the number of bytes that this Latin1 string would require in UTF-8 format. + * + * @param input the Latin1 string to convert + * @param length the length of the string bytes + * @return the number of bytes required to encode the Latin1 string as UTF-8 + */ + simdutf_warn_unused virtual size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept = 0; + + /** + * Compute the number of bytes that this UTF-32 string would require in UTF-8 format. + * + * This function does not validate the input. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @return the number of bytes required to encode the UTF-32 string as UTF-8 + */ + simdutf_warn_unused virtual size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0; + + /** + * Compute the number of bytes that this UTF-32 string would require in Latin1 format. + * + * This function does not validate the input. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @return the number of bytes required to encode the UTF-32 string as Latin1 + */ + simdutf_warn_unused virtual size_t latin1_length_from_utf32( size_t length) const noexcept = 0; + + /** + * Compute the number of bytes that this UTF-8 string would require in Latin1 format. + * + * This function does not validate the input. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in byte + * @return the number of bytes required to encode the UTF-8 string as Latin1 + */ + simdutf_warn_unused virtual size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept = 0; - /** - * Change the endianness of the input. Can be used to go from UTF-16LE to UTF-16BE or - * from UTF-16BE to UTF-16LE. - * - * This function does not validate the input. - * - * This function is not BOM-aware. - * - * @param input the UTF-16 string to process - * @param length the length of the string in 2-byte words (char16_t) - * @param output the pointer to buffer that can hold the conversion result - */ - virtual void change_endianness_utf16(const char16_t* input, size_t length, char16_t* output) const noexcept = 0; - - /** - * Return the number of bytes that this Latin1 string would require in UTF-8 format. - * - * @param input the Latin1 string to convert - * @param length the length of the string bytes - * @return the number of bytes required to encode the Latin1 string as UTF-8 - */ - simdutf_warn_unused virtual size_t utf8_length_from_latin1(const char* input, size_t length) const noexcept = 0; - - /** - * Compute the number of bytes that this UTF-32 string would require in UTF-8 format. - * - * This function does not validate the input. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @return the number of bytes required to encode the UTF-32 string as UTF-8 - */ - simdutf_warn_unused virtual size_t utf8_length_from_utf32(const char32_t* input, size_t length) const noexcept = 0; - - /** - * Compute the number of bytes that this UTF-32 string would require in Latin1 format. - * - * This function does not validate the input. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @return the number of bytes required to encode the UTF-32 string as Latin1 - */ - simdutf_warn_unused virtual size_t latin1_length_from_utf32(size_t length) const noexcept = 0; - - /** - * Compute the number of bytes that this UTF-8 string would require in Latin1 format. - * - * This function does not validate the input. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in byte - * @return the number of bytes required to encode the UTF-8 string as Latin1 - */ - simdutf_warn_unused virtual size_t latin1_length_from_utf8(const char* input, size_t length) const noexcept = 0; - - /* - * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format. - * - * This function does not validate the input. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @return the number of bytes required to encode the UTF-16LE string as Latin1 - */ - simdutf_warn_unused virtual size_t latin1_length_from_utf16(size_t length) const noexcept = 0; - - /** - * Compute the number of two-byte words that this UTF-32 string would require in UTF-16 format. - * - * This function does not validate the input. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @return the number of bytes required to encode the UTF-32 string as UTF-16 - */ - simdutf_warn_unused virtual size_t utf16_length_from_utf32(const char32_t* input, size_t length) const noexcept = 0; - - /** - * Return the number of bytes that this UTF-32 string would require in Latin1 format. - * - * This function does not validate the input. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte words (char32_t) - * @return the number of bytes required to encode the UTF-32 string as Latin1 - */ +/* + * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format. + * + * This function does not validate the input. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @return the number of bytes required to encode the UTF-16LE string as Latin1 + */ + simdutf_warn_unused virtual size_t latin1_length_from_utf16(size_t length) const noexcept = 0; + + /** + * Compute the number of two-byte words that this UTF-32 string would require in UTF-16 format. + * + * This function does not validate the input. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @return the number of bytes required to encode the UTF-32 string as UTF-16 + */ + simdutf_warn_unused virtual size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0; + + + /** + * Return the number of bytes that this UTF-32 string would require in Latin1 format. + * + * This function does not validate the input. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @return the number of bytes required to encode the UTF-32 string as Latin1 + */ simdutf_warn_unused virtual size_t utf32_length_from_latin1(size_t length) const noexcept = 0; - /* - * Compute the number of bytes that this UTF-16LE string would require in UTF-32 format. - * - * This function is equivalent to count_utf16le. - * - * This function does not validate the input. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @return the number of bytes required to encode the UTF-16LE string as UTF-32 - */ - simdutf_warn_unused virtual size_t utf32_length_from_utf16le(const char16_t* input, size_t length) const noexcept = 0; - - /* - * Compute the number of bytes that this UTF-16BE string would require in UTF-32 format. - * - * This function is equivalent to count_utf16be. - * - * This function does not validate the input. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte words (char16_t) - * @return the number of bytes required to encode the UTF-16BE string as UTF-32 - */ - simdutf_warn_unused virtual size_t utf32_length_from_utf16be(const char16_t* input, size_t length) const noexcept = 0; + /* + * Compute the number of bytes that this UTF-16LE string would require in UTF-32 format. + * + * This function is equivalent to count_utf16le. + * + * This function does not validate the input. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @return the number of bytes required to encode the UTF-16LE string as UTF-32 + */ + simdutf_warn_unused virtual size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept = 0; + + /* + * Compute the number of bytes that this UTF-16BE string would require in UTF-32 format. + * + * This function is equivalent to count_utf16be. + * + * This function does not validate the input. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @return the number of bytes required to encode the UTF-16BE string as UTF-32 + */ + simdutf_warn_unused virtual size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept = 0; + + /** + * Count the number of code points (characters) in the string assuming that + * it is valid. + * + * This function assumes that the input string is valid UTF-16LE. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to process + * @param length the length of the string in 2-byte words (char16_t) + * @return number of code points + */ + simdutf_warn_unused virtual size_t count_utf16le(const char16_t * input, size_t length) const noexcept = 0; + + /** + * Count the number of code points (characters) in the string assuming that + * it is valid. + * + * This function assumes that the input string is valid UTF-16BE. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to process + * @param length the length of the string in 2-byte words (char16_t) + * @return number of code points + */ + simdutf_warn_unused virtual size_t count_utf16be(const char16_t * input, size_t length) const noexcept = 0; + + + /** + * Count the number of code points (characters) in the string assuming that + * it is valid. + * + * This function assumes that the input string is valid UTF-8. + * + * @param input the UTF-8 string to process + * @param length the length of the string in bytes + * @return number of code points + */ + simdutf_warn_unused virtual size_t count_utf8(const char * input, size_t length) const noexcept = 0; - /** - * Count the number of code points (characters) in the string assuming that - * it is valid. - * - * This function assumes that the input string is valid UTF-16LE. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to process - * @param length the length of the string in 2-byte words (char16_t) - * @return number of code points - */ - simdutf_warn_unused virtual size_t count_utf16le(const char16_t* input, size_t length) const noexcept = 0; - /** - * Count the number of code points (characters) in the string assuming that - * it is valid. - * - * This function assumes that the input string is valid UTF-16BE. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to process - * @param length the length of the string in 2-byte words (char16_t) - * @return number of code points - */ - simdutf_warn_unused virtual size_t count_utf16be(const char16_t* input, size_t length) const noexcept = 0; - - /** - * Count the number of code points (characters) in the string assuming that - * it is valid. - * - * This function assumes that the input string is valid UTF-8. - * - * @param input the UTF-8 string to process - * @param length the length of the string in bytes - * @return number of code points - */ - simdutf_warn_unused virtual size_t count_utf8(const char* input, size_t length) const noexcept = 0; protected: - /** @private Construct an implementation with the given name and description. For subclasses. */ - simdutf_really_inline implementation( - std::string name, - std::string description, - uint32_t required_instruction_sets) - : _name(name) - , _description(description) - , _required_instruction_sets(required_instruction_sets) - { - } - virtual ~implementation() = default; + /** @private Construct an implementation with the given name and description. For subclasses. */ + simdutf_really_inline implementation( + std::string name, + std::string description, + uint32_t required_instruction_sets + ) : + _name(name), + _description(description), + _required_instruction_sets(required_instruction_sets) + { + } + virtual ~implementation()=default; private: - /** - * The name of this implementation. - */ - const std::string _name; - - /** - * The description of this implementation. - */ - const std::string _description; - - /** - * Instruction sets required for this implementation. - */ - const uint32_t _required_instruction_sets; + /** + * The name of this implementation. + */ + const std::string _name; + + /** + * The description of this implementation. + */ + const std::string _description; + + /** + * Instruction sets required for this implementation. + */ + const uint32_t _required_instruction_sets; }; /** @private */ @@ -3210,102 +3223,82 @@ namespace internal { */ class available_implementation_list { public: - /** Get the list of available implementations compiled into simdutf */ - simdutf_really_inline available_implementation_list() {} - /** Number of implementations */ - size_t size() const noexcept; - /** STL const begin() iterator */ - const implementation* const* begin() const noexcept; - /** STL const end() iterator */ - const implementation* const* end() const noexcept; - - /** - * Get the implementation with the given name. - * - * Case sensitive. - * - * const implementation *impl = simdutf::available_implementations["westmere"]; - * if (!impl) { exit(1); } - * if (!imp->supported_by_runtime_system()) { exit(1); } - * simdutf::active_implementation = impl; - * - * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" - * @return the implementation, or nullptr if the parse failed. - */ - const implementation* operator[](const std::string& name) const noexcept - { - for (const implementation* impl : *this) { - if (impl->name() == name) { - return impl; - } - } - return nullptr; + /** Get the list of available implementations compiled into simdutf */ + simdutf_really_inline available_implementation_list() {} + /** Number of implementations */ + size_t size() const noexcept; + /** STL const begin() iterator */ + const implementation * const *begin() const noexcept; + /** STL const end() iterator */ + const implementation * const *end() const noexcept; + + /** + * Get the implementation with the given name. + * + * Case sensitive. + * + * const implementation *impl = simdutf::available_implementations["westmere"]; + * if (!impl) { exit(1); } + * if (!imp->supported_by_runtime_system()) { exit(1); } + * simdutf::active_implementation = impl; + * + * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" + * @return the implementation, or nullptr if the parse failed. + */ + const implementation * operator[](const std::string &name) const noexcept { + for (const implementation * impl : *this) { + if (impl->name() == name) { return impl; } } - - /** - * Detect the most advanced implementation supported by the current host. - * - * This is used to initialize the implementation on startup. - * - * const implementation *impl = simdutf::available_implementation::detect_best_supported(); - * simdutf::active_implementation = impl; - * - * @return the most advanced supported implementation for the current host, or an - * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported - * implementation. Will never return nullptr. - */ - const implementation* detect_best_supported() const noexcept; + return nullptr; + } + + /** + * Detect the most advanced implementation supported by the current host. + * + * This is used to initialize the implementation on startup. + * + * const implementation *impl = simdutf::available_implementation::detect_best_supported(); + * simdutf::active_implementation = impl; + * + * @return the most advanced supported implementation for the current host, or an + * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported + * implementation. Will never return nullptr. + */ + const implementation *detect_best_supported() const noexcept; }; template<typename T> class atomic_ptr { public: - atomic_ptr(T* _ptr) - : ptr { _ptr } - { - } + atomic_ptr(T *_ptr) : ptr{_ptr} {} #if defined(SIMDUTF_NO_THREADS) - operator const T*() const - { - return ptr; - } - const T& operator*() const { return *ptr; } - const T* operator->() const { return ptr; } - - operator T*() { return ptr; } - T& operator*() { return *ptr; } - T* operator->() { return ptr; } - atomic_ptr& operator=(T* _ptr) - { - ptr = _ptr; - return *this; - } + operator const T*() const { return ptr; } + const T& operator*() const { return *ptr; } + const T* operator->() const { return ptr; } + + operator T*() { return ptr; } + T& operator*() { return *ptr; } + T* operator->() { return ptr; } + atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; } #else - operator const T*() const - { - return ptr.load(); - } - const T& operator*() const { return *ptr; } - const T* operator->() const { return ptr.load(); } - - operator T*() { return ptr.load(); } - T& operator*() { return *ptr; } - T* operator->() { return ptr.load(); } - atomic_ptr& operator=(T* _ptr) - { - ptr = _ptr; - return *this; - } + operator const T*() const { return ptr.load(); } + const T& operator*() const { return *ptr; } + const T* operator->() const { return ptr.load(); } + + operator T*() { return ptr.load(); } + T& operator*() { return *ptr; } + T* operator->() { return ptr.load(); } + atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; } #endif private: #if defined(SIMDUTF_NO_THREADS) - T* ptr; + T* ptr; #else - std::atomic<T*> ptr; + std::atomic<T*> ptr; #endif }; @@ -3319,22 +3312,25 @@ class detect_best_supported_implementation_on_first_use; extern SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations(); /** - * The active implementation. - * - * Automatically initialized on first use to the most advanced implementation supported by this hardware. - */ + * The active implementation. + * + * Automatically initialized on first use to the most advanced implementation supported by this hardware. + */ extern SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation(); + } // namespace simdutf #endif // SIMDUTF_IMPLEMENTATION_H /* end file include/simdutf/implementation.h */ + // Implementation-internal files (must be included before the implementations themselves, to keep // amalgamation working--otherwise, the first time a file is included, it might be put inside the // #ifdef SIMDUTF_IMPLEMENTATION_ARM64/FALLBACK/etc., which means the other implementations can't // compile unless that implementation is turned on). + SIMDUTF_POP_DISABLE_WARNINGS #endif // SIMDUTF_H |