aboutsummaryrefslogtreecommitdiff
path: root/src/bun.js/bindings/simdutf.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/bun.js/bindings/simdutf.h')
-rw-r--r--src/bun.js/bindings/simdutf.h3610
1 files changed, 1803 insertions, 1807 deletions
diff --git a/src/bun.js/bindings/simdutf.h b/src/bun.js/bindings/simdutf.h
index 4c04ae467..7ad28528d 100644
--- a/src/bun.js/bindings/simdutf.h
+++ b/src/bun.js/bindings/simdutf.h
@@ -1,4 +1,4 @@
-/* auto-generated on 2023-06-21 08:09:45 -0400. Do not edit! */
+/* auto-generated on 2023-08-08 16:23:39 -0400. Do not edit! */
// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf.h
/* begin file include/simdutf.h */
#ifndef SIMDUTF_H
@@ -78,7 +78,7 @@
#include <machine/endian.h>
#elif defined(sun) || defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__)
#include <sys/byteorder.h>
-#else // defined(__APPLE__) || defined(__FreeBSD__)
+#else // defined(__APPLE__) || defined(__FreeBSD__)
#ifdef __has_include
#if __has_include(<endian.h>)
@@ -88,6 +88,7 @@
#endif // defined(__APPLE__) || defined(__FreeBSD__)
+
#ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__)
#define SIMDUTF_IS_BIG_ENDIAN 0
#endif
@@ -100,6 +101,7 @@
#endif // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
+
/**
* At this point in time, SIMDUTF_IS_BIG_ENDIAN is defined.
*/
@@ -137,9 +139,9 @@
#elif defined(__aarch64__) || defined(_M_ARM64)
#define SIMDUTF_IS_ARM64 1
#elif defined(__PPC64__) || defined(_M_PPC64)
-// #define SIMDUTF_IS_PPC64 1
-// The simdutf library does yet support SIMD acceleration under
-// POWER processors. Please see https://github.com/lemire/simdutf/issues/51
+//#define SIMDUTF_IS_PPC64 1
+// The simdutf library does yet support SIMD acceleration under
+// POWER processors. Please see https://github.com/lemire/simdutf/issues/51
#elif defined(__s390__)
// s390 IBM system. Big endian.
#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64
@@ -198,14 +200,14 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.")
// warning: clang attribute push can't be used within a namespace in clang up
// til 8.0 so SIMDUTF_TARGET_REGION and SIMDUTF_UNTARGET_REGION must be *outside* of a
// namespace.
-#define SIMDUTF_TARGET_REGION(T) \
- _Pragma(SIMDUTF_STRINGIFY( \
- clang attribute push(__attribute__((target(T))), apply_to = function)))
+#define SIMDUTF_TARGET_REGION(T) \
+ _Pragma(SIMDUTF_STRINGIFY( \
+ clang attribute push(__attribute__((target(T))), apply_to = function)))
#define SIMDUTF_UNTARGET_REGION _Pragma("clang attribute pop")
#elif defined(__GNUC__)
// GCC is easier
-#define SIMDUTF_TARGET_REGION(T) \
- _Pragma("GCC push_options") _Pragma(SIMDUTF_STRINGIFY(GCC target(T)))
+#define SIMDUTF_TARGET_REGION(T) \
+ _Pragma("GCC push_options") _Pragma(SIMDUTF_STRINGIFY(GCC target(T)))
#define SIMDUTF_UNTARGET_REGION _Pragma("GCC pop_options")
#endif // clang then gcc
@@ -258,11 +260,7 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.")
#define SIMDUTF_ASSUME(COND) __assume(COND)
#else
#define SIMDUTF_UNREACHABLE() __builtin_unreachable();
-#define SIMDUTF_ASSUME(COND) \
- do { \
- if (!(COND)) \
- __builtin_unreachable(); \
- } while (0)
+#define SIMDUTF_ASSUME(COND) do { if (!(COND)) __builtin_unreachable(); } while (0)
#endif
#else // NDEBUG
@@ -272,12 +270,14 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.")
#endif
+
#if defined(__GNUC__) && !defined(__clang__)
#if __GNUC__ >= 11
#define SIMDUTF_GCC11ORMORE 1
#endif // __GNUC__ >= 11
#endif // defined(__GNUC__) && !defined(__clang__)
+
#endif // SIMDUTF_PORTABILITY_H
/* end file include/simdutf/portability.h */
// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/avx512.h
@@ -295,86 +295,84 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.")
*/
#ifndef SIMDUTF_HAS_AVX512F
-#if defined(__AVX512F__) && __AVX512F__ == 1
-#define SIMDUTF_HAS_AVX512F 1
-#endif
+# if defined(__AVX512F__) && __AVX512F__ == 1
+# define SIMDUTF_HAS_AVX512F 1
+# endif
#endif
#ifndef SIMDUTF_HAS_AVX512DQ
-#if defined(__AVX512DQ__) && __AVX512DQ__ == 1
-#define SIMDUTF_HAS_AVX512DQ 1
-#endif
+# if defined(__AVX512DQ__) && __AVX512DQ__ == 1
+# define SIMDUTF_HAS_AVX512DQ 1
+# endif
#endif
#ifndef SIMDUTF_HAS_AVX512IFMA
-#if defined(__AVX512IFMA__) && __AVX512IFMA__ == 1
-#define SIMDUTF_HAS_AVX512IFMA 1
-#endif
+# if defined(__AVX512IFMA__) && __AVX512IFMA__ == 1
+# define SIMDUTF_HAS_AVX512IFMA 1
+# endif
#endif
#ifndef SIMDUTF_HAS_AVX512CD
-#if defined(__AVX512CD__) && __AVX512CD__ == 1
-#define SIMDUTF_HAS_AVX512CD 1
-#endif
+# if defined(__AVX512CD__) && __AVX512CD__ == 1
+# define SIMDUTF_HAS_AVX512CD 1
+# endif
#endif
#ifndef SIMDUTF_HAS_AVX512BW
-#if defined(__AVX512BW__) && __AVX512BW__ == 1
-#define SIMDUTF_HAS_AVX512BW 1
-#endif
+# if defined(__AVX512BW__) && __AVX512BW__ == 1
+# define SIMDUTF_HAS_AVX512BW 1
+# endif
#endif
#ifndef SIMDUTF_HAS_AVX512VL
-#if defined(__AVX512VL__) && __AVX512VL__ == 1
-#define SIMDUTF_HAS_AVX512VL 1
-#endif
+# if defined(__AVX512VL__) && __AVX512VL__ == 1
+# define SIMDUTF_HAS_AVX512VL 1
+# endif
#endif
#ifndef SIMDUTF_HAS_AVX512VBMI
-#if defined(__AVX512VBMI__) && __AVX512VBMI__ == 1
-#define SIMDUTF_HAS_AVX512VBMI 1
-#endif
+# if defined(__AVX512VBMI__) && __AVX512VBMI__ == 1
+# define SIMDUTF_HAS_AVX512VBMI 1
+# endif
#endif
#ifndef SIMDUTF_HAS_AVX512VBMI2
-#if defined(__AVX512VBMI2__) && __AVX512VBMI2__ == 1
-#define SIMDUTF_HAS_AVX512VBMI2 1
-#endif
+# if defined(__AVX512VBMI2__) && __AVX512VBMI2__ == 1
+# define SIMDUTF_HAS_AVX512VBMI2 1
+# endif
#endif
#ifndef SIMDUTF_HAS_AVX512VNNI
-#if defined(__AVX512VNNI__) && __AVX512VNNI__ == 1
-#define SIMDUTF_HAS_AVX512VNNI 1
-#endif
+# if defined(__AVX512VNNI__) && __AVX512VNNI__ == 1
+# define SIMDUTF_HAS_AVX512VNNI 1
+# endif
#endif
#ifndef SIMDUTF_HAS_AVX512BITALG
-#if defined(__AVX512BITALG__) && __AVX512BITALG__ == 1
-#define SIMDUTF_HAS_AVX512BITALG 1
-#endif
+# if defined(__AVX512BITALG__) && __AVX512BITALG__ == 1
+# define SIMDUTF_HAS_AVX512BITALG 1
+# endif
#endif
#ifndef SIMDUTF_HAS_AVX512VPOPCNTDQ
-#if defined(__AVX512VPOPCNTDQ__) && __AVX512VPOPCNTDQ__ == 1
-#define SIMDUTF_HAS_AVX512VPOPCNTDQ 1
-#endif
+# if defined(__AVX512VPOPCNTDQ__) && __AVX512VPOPCNTDQ__ == 1
+# define SIMDUTF_HAS_AVX512VPOPCNTDQ 1
+# endif
#endif
#endif // SIMDUTF_AVX512_H_
/* end file include/simdutf/avx512.h */
+
#if defined(__GNUC__)
-// Marks a block with a name so that MCA analysis can see it.
-#define SIMDUTF_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name);
-#define SIMDUTF_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name);
-#define SIMDUTF_DEBUG_BLOCK(name, block) \
- BEGIN_DEBUG_BLOCK(name); \
- block; \
- END_DEBUG_BLOCK(name);
+ // Marks a block with a name so that MCA analysis can see it.
+ #define SIMDUTF_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name);
+ #define SIMDUTF_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name);
+ #define SIMDUTF_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name);
#else
-#define SIMDUTF_BEGIN_DEBUG_BLOCK(name)
-#define SIMDUTF_END_DEBUG_BLOCK(name)
-#define SIMDUTF_DEBUG_BLOCK(name, block)
+ #define SIMDUTF_BEGIN_DEBUG_BLOCK(name)
+ #define SIMDUTF_END_DEBUG_BLOCK(name)
+ #define SIMDUTF_DEBUG_BLOCK(name, block)
#endif
// Align to N-byte boundary
@@ -385,108 +383,103 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.")
#if defined(SIMDUTF_REGULAR_VISUAL_STUDIO)
-#define simdutf_really_inline __forceinline
-#define simdutf_never_inline __declspec(noinline)
-
-#define simdutf_unused
-#define simdutf_warn_unused
-
-#ifndef simdutf_likely
-#define simdutf_likely(x) x
-#endif
-#ifndef simdutf_unlikely
-#define simdutf_unlikely(x) x
-#endif
-
-#define SIMDUTF_PUSH_DISABLE_WARNINGS __pragma(warning(push))
-#define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS __pragma(warning(push, 0))
-#define SIMDUTF_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning(disable \
- : WARNING_NUMBER))
-// Get rid of Intellisense-only warnings (Code Analysis)
-// Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910).
-#ifdef __has_include
-#if __has_include(<CppCoreCheck\Warnings.h>)
-#include <CppCoreCheck\Warnings.h>
-#define SIMDUTF_DISABLE_UNDESIRED_WARNINGS SIMDUTF_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS)
-#endif
-#endif
-
-#ifndef SIMDUTF_DISABLE_UNDESIRED_WARNINGS
-#define SIMDUTF_DISABLE_UNDESIRED_WARNINGS
-#endif
-
-#define SIMDUTF_DISABLE_DEPRECATED_WARNING SIMDUTF_DISABLE_VS_WARNING(4996)
-#define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING
-#define SIMDUTF_POP_DISABLE_WARNINGS __pragma(warning(pop))
+ #define simdutf_really_inline __forceinline
+ #define simdutf_never_inline __declspec(noinline)
+
+ #define simdutf_unused
+ #define simdutf_warn_unused
+
+ #ifndef simdutf_likely
+ #define simdutf_likely(x) x
+ #endif
+ #ifndef simdutf_unlikely
+ #define simdutf_unlikely(x) x
+ #endif
+
+ #define SIMDUTF_PUSH_DISABLE_WARNINGS __pragma(warning( push ))
+ #define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 ))
+ #define SIMDUTF_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER ))
+ // Get rid of Intellisense-only warnings (Code Analysis)
+ // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910).
+ #ifdef __has_include
+ #if __has_include(<CppCoreCheck\Warnings.h>)
+ #include <CppCoreCheck\Warnings.h>
+ #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS SIMDUTF_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS)
+ #endif
+ #endif
+
+ #ifndef SIMDUTF_DISABLE_UNDESIRED_WARNINGS
+ #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS
+ #endif
+
+ #define SIMDUTF_DISABLE_DEPRECATED_WARNING SIMDUTF_DISABLE_VS_WARNING(4996)
+ #define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING
+ #define SIMDUTF_POP_DISABLE_WARNINGS __pragma(warning( pop ))
#else // SIMDUTF_REGULAR_VISUAL_STUDIO
-#define simdutf_really_inline inline __attribute__((always_inline))
-#define simdutf_never_inline inline __attribute__((noinline))
+ #define simdutf_really_inline inline __attribute__((always_inline))
+ #define simdutf_never_inline inline __attribute__((noinline))
+
+ #define simdutf_unused __attribute__((unused))
+ #define simdutf_warn_unused __attribute__((warn_unused_result))
+
+ #ifndef simdutf_likely
+ #define simdutf_likely(x) __builtin_expect(!!(x), 1)
+ #endif
+ #ifndef simdutf_unlikely
+ #define simdutf_unlikely(x) __builtin_expect(!!(x), 0)
+ #endif
+
+ #define SIMDUTF_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push")
+ // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary
+ #define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS SIMDUTF_PUSH_DISABLE_WARNINGS \
+ SIMDUTF_DISABLE_GCC_WARNING(-Weffc++) \
+ SIMDUTF_DISABLE_GCC_WARNING(-Wall) \
+ SIMDUTF_DISABLE_GCC_WARNING(-Wconversion) \
+ SIMDUTF_DISABLE_GCC_WARNING(-Wextra) \
+ SIMDUTF_DISABLE_GCC_WARNING(-Wattributes) \
+ SIMDUTF_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \
+ SIMDUTF_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \
+ SIMDUTF_DISABLE_GCC_WARNING(-Wreturn-type) \
+ SIMDUTF_DISABLE_GCC_WARNING(-Wshadow) \
+ SIMDUTF_DISABLE_GCC_WARNING(-Wunused-parameter) \
+ SIMDUTF_DISABLE_GCC_WARNING(-Wunused-variable)
+ #define SIMDUTF_PRAGMA(P) _Pragma(#P)
+ #define SIMDUTF_DISABLE_GCC_WARNING(WARNING) SIMDUTF_PRAGMA(GCC diagnostic ignored #WARNING)
+ #if defined(SIMDUTF_CLANG_VISUAL_STUDIO)
+ #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS SIMDUTF_DISABLE_GCC_WARNING(-Wmicrosoft-include)
+ #else
+ #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS
+ #endif
+ #define SIMDUTF_DISABLE_DEPRECATED_WARNING SIMDUTF_DISABLE_GCC_WARNING(-Wdeprecated-declarations)
+ #define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING SIMDUTF_DISABLE_GCC_WARNING(-Wstrict-overflow)
+ #define SIMDUTF_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop")
-#define simdutf_unused __attribute__((unused))
-#define simdutf_warn_unused __attribute__((warn_unused_result))
-#ifndef simdutf_likely
-#define simdutf_likely(x) __builtin_expect(!!(x), 1)
-#endif
-#ifndef simdutf_unlikely
-#define simdutf_unlikely(x) __builtin_expect(!!(x), 0)
-#endif
-
-#define SIMDUTF_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push")
-// gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary
-#define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS \
- SIMDUTF_PUSH_DISABLE_WARNINGS \
- SIMDUTF_DISABLE_GCC_WARNING(-Weffc++) \
- SIMDUTF_DISABLE_GCC_WARNING(-Wall) \
- SIMDUTF_DISABLE_GCC_WARNING(-Wconversion) \
- SIMDUTF_DISABLE_GCC_WARNING(-Wextra) \
- SIMDUTF_DISABLE_GCC_WARNING(-Wattributes) \
- SIMDUTF_DISABLE_GCC_WARNING(-Wimplicit - fallthrough) \
- SIMDUTF_DISABLE_GCC_WARNING(-Wnon - virtual - dtor) \
- SIMDUTF_DISABLE_GCC_WARNING(-Wreturn - type) \
- SIMDUTF_DISABLE_GCC_WARNING(-Wshadow) \
- SIMDUTF_DISABLE_GCC_WARNING(-Wunused - parameter) \
- SIMDUTF_DISABLE_GCC_WARNING(-Wunused - variable)
-#define SIMDUTF_PRAGMA(P) _Pragma(#P)
-#define SIMDUTF_DISABLE_GCC_WARNING(WARNING) SIMDUTF_PRAGMA(GCC diagnostic ignored #WARNING)
-#if defined(SIMDUTF_CLANG_VISUAL_STUDIO)
-#define SIMDUTF_DISABLE_UNDESIRED_WARNINGS SIMDUTF_DISABLE_GCC_WARNING(-Wmicrosoft - include)
-#else
-#define SIMDUTF_DISABLE_UNDESIRED_WARNINGS
-#endif
-#define SIMDUTF_DISABLE_DEPRECATED_WARNING SIMDUTF_DISABLE_GCC_WARNING(-Wdeprecated - declarations)
-#define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING SIMDUTF_DISABLE_GCC_WARNING(-Wstrict - overflow)
-#define SIMDUTF_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop")
#endif // MSC_VER
#ifndef SIMDUTF_DLLIMPORTEXPORT
-#if defined(SIMDUTF_VISUAL_STUDIO)
-/**
- * It does not matter here whether you are using
- * the regular visual studio or clang under visual
- * studio.
- */
-#if SIMDUTF_USING_LIBRARY
-#define SIMDUTF_DLLIMPORTEXPORT __declspec(dllimport)
-#else
-#define SIMDUTF_DLLIMPORTEXPORT __declspec(dllexport)
-#endif
-#else
-#define SIMDUTF_DLLIMPORTEXPORT
-#endif
+ #if defined(SIMDUTF_VISUAL_STUDIO)
+ /**
+ * It does not matter here whether you are using
+ * the regular visual studio or clang under visual
+ * studio.
+ */
+ #if SIMDUTF_USING_LIBRARY
+ #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllimport)
+ #else
+ #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllexport)
+ #endif
+ #else
+ #define SIMDUTF_DLLIMPORTEXPORT
+ #endif
#endif
/// If EXPR is an error, returns it.
-#define SIMDUTF_TRY(EXPR) \
- { \
- auto _err = (EXPR); \
- if (_err) { \
- return _err; \
- } \
- }
+#define SIMDUTF_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } }
+
#endif // SIMDUTF_COMMON_DEFS_H
/* end file include/simdutf/common_defs.h */
@@ -497,19 +490,19 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.")
namespace simdutf {
enum encoding_type {
- UTF8 = 1, // BOM 0xef 0xbb 0xbf
- UTF16_LE = 2, // BOM 0xff 0xfe
- UTF16_BE = 4, // BOM 0xfe 0xff
- UTF32_LE = 8, // BOM 0xff 0xfe 0x00 0x00
- UTF32_BE = 16, // BOM 0x00 0x00 0xfe 0xff
- Latin1 = 32,
-
- unspecified = 0
+ UTF8 = 1, // BOM 0xef 0xbb 0xbf
+ UTF16_LE = 2, // BOM 0xff 0xfe
+ UTF16_BE = 4, // BOM 0xfe 0xff
+ UTF32_LE = 8, // BOM 0xff 0xfe 0x00 0x00
+ UTF32_BE = 16, // BOM 0x00 0x00 0xfe 0xff
+ Latin1 = 32,
+
+ unspecified = 0
};
enum endianness {
- LITTLE,
- BIG
+ LITTLE,
+ BIG
};
bool match_system(endianness e);
@@ -546,27 +539,27 @@ size_t bom_byte_size(encoding_type bom);
namespace simdutf {
enum error_code {
- SUCCESS = 0,
- HEADER_BITS, // Any byte must have fewer than 5 header bits.
- TOO_SHORT, // The leading byte must be followed by N-1 continuation bytes, where N is the UTF-8 character length
- // This is also the error when the input is truncated.
- TOO_LONG, // We either have too many consecutive continuation bytes or the string starts with a continuation byte.
- OVERLONG, // The decoded character must be above U+7F for two-byte characters, U+7FF for three-byte characters,
- // and U+FFFF for four-byte characters.
- TOO_LARGE, // The decoded character must be less than or equal to U+10FFFF,less than or equal than U+7F for ASCII OR less than equal than U+FF for Latin1
- SURROGATE, // The decoded character must be not be in U+D800...DFFF (UTF-8 or UTF-32) OR
- // a high surrogate must be followed by a low surrogate and a low surrogate must be preceded by a high surrogate (UTF-16) OR
- // there must be no surrogate at all (Latin1)
- OTHER // Not related to validation/transcoding.
+ SUCCESS = 0,
+ HEADER_BITS, // Any byte must have fewer than 5 header bits.
+ TOO_SHORT, // The leading byte must be followed by N-1 continuation bytes, where N is the UTF-8 character length
+ // This is also the error when the input is truncated.
+ TOO_LONG, // We either have too many consecutive continuation bytes or the string starts with a continuation byte.
+ OVERLONG, // The decoded character must be above U+7F for two-byte characters, U+7FF for three-byte characters,
+ // and U+FFFF for four-byte characters.
+ TOO_LARGE, // The decoded character must be less than or equal to U+10FFFF,less than or equal than U+7F for ASCII OR less than equal than U+FF for Latin1
+ SURROGATE, // The decoded character must be not be in U+D800...DFFF (UTF-8 or UTF-32) OR
+ // a high surrogate must be followed by a low surrogate and a low surrogate must be preceded by a high surrogate (UTF-16) OR
+ // there must be no surrogate at all (Latin1)
+ OTHER // Not related to validation/transcoding.
};
struct result {
- error_code error;
- size_t count; // In case of error, indicates the position of the error. In case of success, indicates the number of words validated/written.
+ error_code error;
+ size_t count; // In case of error, indicates the position of the error. In case of success, indicates the number of words validated/written.
- simdutf_really_inline result();
+ simdutf_really_inline result();
- simdutf_really_inline result(error_code, size_t);
+ simdutf_really_inline result(error_code, size_t);
};
}
@@ -589,18 +582,18 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
namespace simdutf {
enum {
- /**
- * The major version (MAJOR.minor.revision) of simdutf being used.
- */
- SIMDUTF_VERSION_MAJOR = 3,
- /**
- * The minor version (major.MINOR.revision) of simdutf being used.
- */
- SIMDUTF_VERSION_MINOR = 2,
- /**
- * The revision (major.minor.REVISION) of simdutf being used.
- */
- SIMDUTF_VERSION_REVISION = 14
+ /**
+ * The major version (MAJOR.minor.revision) of simdutf being used.
+ */
+ SIMDUTF_VERSION_MAJOR = 3,
+ /**
+ * The minor version (major.MINOR.revision) of simdutf being used.
+ */
+ SIMDUTF_VERSION_MINOR = 2,
+ /**
+ * The revision (major.minor.REVISION) of simdutf being used.
+ */
+ SIMDUTF_VERSION_REVISION = 14
};
} // namespace simdutf
@@ -678,192 +671,191 @@ namespace simdutf {
namespace internal {
enum instruction_set {
- DEFAULT = 0x0,
- NEON = 0x1,
- AVX2 = 0x4,
- SSE42 = 0x8,
- PCLMULQDQ = 0x10,
- BMI1 = 0x20,
- BMI2 = 0x40,
- ALTIVEC = 0x80,
- AVX512F = 0x100,
- AVX512DQ = 0x200,
- AVX512IFMA = 0x400,
- AVX512PF = 0x800,
- AVX512ER = 0x1000,
- AVX512CD = 0x2000,
- AVX512BW = 0x4000,
- AVX512VL = 0x8000,
- AVX512VBMI2 = 0x10000
+ DEFAULT = 0x0,
+ NEON = 0x1,
+ AVX2 = 0x4,
+ SSE42 = 0x8,
+ PCLMULQDQ = 0x10,
+ BMI1 = 0x20,
+ BMI2 = 0x40,
+ ALTIVEC = 0x80,
+ AVX512F = 0x100,
+ AVX512DQ = 0x200,
+ AVX512IFMA = 0x400,
+ AVX512PF = 0x800,
+ AVX512ER = 0x1000,
+ AVX512CD = 0x2000,
+ AVX512BW = 0x4000,
+ AVX512VL = 0x8000,
+ AVX512VBMI2 = 0x10000,
+ AVX512VPOPCNTDQ = 0x2000
};
#if defined(__PPC64__)
-static inline uint32_t detect_supported_architectures()
-{
- return instruction_set::ALTIVEC;
+static inline uint32_t detect_supported_architectures() {
+ return instruction_set::ALTIVEC;
}
#elif defined(__aarch64__) || defined(_M_ARM64)
-static inline uint32_t detect_supported_architectures()
-{
- return instruction_set::NEON;
+static inline uint32_t detect_supported_architectures() {
+ return instruction_set::NEON;
}
#elif defined(__x86_64__) || defined(_M_AMD64) // x64
+
namespace {
namespace cpuid_bit {
-// Can be found on Intel ISA Reference for CPUID
-
-// EAX = 0x01
-constexpr uint32_t pclmulqdq = uint32_t(1) << 1; ///< @private bit 1 of ECX for EAX=0x1
-constexpr uint32_t sse42 = uint32_t(1) << 20; ///< @private bit 20 of ECX for EAX=0x1
-constexpr uint32_t osxsave = (uint32_t(1) << 26) | (uint32_t(1) << 27); ///< @private bits 26+27 of ECX for EAX=0x1
-
-// EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf)
-// See: "Table 3-8. Information Returned by CPUID Instruction"
-namespace ebx {
-constexpr uint32_t bmi1 = uint32_t(1) << 3;
-constexpr uint32_t avx2 = uint32_t(1) << 5;
-constexpr uint32_t bmi2 = uint32_t(1) << 8;
-constexpr uint32_t avx512f = uint32_t(1) << 16;
-constexpr uint32_t avx512dq = uint32_t(1) << 17;
-constexpr uint32_t avx512ifma = uint32_t(1) << 21;
-constexpr uint32_t avx512cd = uint32_t(1) << 28;
-constexpr uint32_t avx512bw = uint32_t(1) << 30;
-constexpr uint32_t avx512vl = uint32_t(1) << 31;
-}
+ // Can be found on Intel ISA Reference for CPUID
+
+ // EAX = 0x01
+ constexpr uint32_t pclmulqdq = uint32_t(1) << 1; ///< @private bit 1 of ECX for EAX=0x1
+ constexpr uint32_t sse42 = uint32_t(1) << 20; ///< @private bit 20 of ECX for EAX=0x1
+ constexpr uint32_t osxsave = (uint32_t(1) << 26) | (uint32_t(1) << 27); ///< @private bits 26+27 of ECX for EAX=0x1
+
+ // EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf)
+ // See: "Table 3-8. Information Returned by CPUID Instruction"
+ namespace ebx {
+ constexpr uint32_t bmi1 = uint32_t(1) << 3;
+ constexpr uint32_t avx2 = uint32_t(1) << 5;
+ constexpr uint32_t bmi2 = uint32_t(1) << 8;
+ constexpr uint32_t avx512f = uint32_t(1) << 16;
+ constexpr uint32_t avx512dq = uint32_t(1) << 17;
+ constexpr uint32_t avx512ifma = uint32_t(1) << 21;
+ constexpr uint32_t avx512cd = uint32_t(1) << 28;
+ constexpr uint32_t avx512bw = uint32_t(1) << 30;
+ constexpr uint32_t avx512vl = uint32_t(1) << 31;
+ }
-namespace ecx {
-constexpr uint32_t avx512vbmi = uint32_t(1) << 1;
-constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6;
-constexpr uint32_t avx512vnni = uint32_t(1) << 11;
-constexpr uint32_t avx512bitalg = uint32_t(1) << 12;
-constexpr uint32_t avx512vpopcnt = uint32_t(1) << 14;
-}
-namespace edx {
-constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8;
-}
-namespace xcr0_bit {
-constexpr uint64_t avx256_saved = uint64_t(1) << 2; ///< @private bit 2 = AVX
-constexpr uint64_t avx512_saved = uint64_t(7) << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM
-}
-}
+ namespace ecx {
+ constexpr uint32_t avx512vbmi = uint32_t(1) << 1;
+ constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6;
+ constexpr uint32_t avx512vnni = uint32_t(1) << 11;
+ constexpr uint32_t avx512bitalg = uint32_t(1) << 12;
+ constexpr uint32_t avx512vpopcnt = uint32_t(1) << 14;
+ }
+ namespace edx {
+ constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8;
+ }
+ namespace xcr0_bit {
+ constexpr uint64_t avx256_saved = uint64_t(1) << 2; ///< @private bit 2 = AVX
+ constexpr uint64_t avx512_saved = uint64_t(7) << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM
+ }
+ }
}
-static inline void cpuid(uint32_t* eax, uint32_t* ebx, uint32_t* ecx,
- uint32_t* edx)
-{
-#if defined(_MSC_VER)
- int cpu_info[4];
- __cpuidex(cpu_info, *eax, *ecx);
- *eax = cpu_info[0];
- *ebx = cpu_info[1];
- *ecx = cpu_info[2];
- *edx = cpu_info[3];
-#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
- uint32_t level = *eax;
- __get_cpuid(level, eax, ebx, ecx, edx);
-#else
- uint32_t a = *eax, b, c = *ecx, d;
- asm volatile("cpuid\n\t"
- : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
- *eax = a;
- *ebx = b;
- *ecx = c;
- *edx = d;
-#endif
-}
-static inline uint64_t xgetbv()
-{
+
+static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
+ uint32_t *edx) {
#if defined(_MSC_VER)
- return _xgetbv(0);
+ int cpu_info[4];
+ __cpuidex(cpu_info, *eax, *ecx);
+ *eax = cpu_info[0];
+ *ebx = cpu_info[1];
+ *ecx = cpu_info[2];
+ *edx = cpu_info[3];
+#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
+ uint32_t level = *eax;
+ __get_cpuid(level, eax, ebx, ecx, edx);
#else
- uint32_t xcr0_lo, xcr0_hi;
- asm volatile("xgetbv\n\t"
- : "=a"(xcr0_lo), "=d"(xcr0_hi)
- : "c"(0));
- return xcr0_lo | ((uint64_t)xcr0_hi << 32);
+ uint32_t a = *eax, b, c = *ecx, d;
+ asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
+ *eax = a;
+ *ebx = b;
+ *ecx = c;
+ *edx = d;
#endif
}
-static inline uint32_t detect_supported_architectures()
-{
- uint32_t eax;
- uint32_t ebx = 0;
- uint32_t ecx = 0;
- uint32_t edx = 0;
- uint32_t host_isa = 0x0;
-
- // EBX for EAX=0x1
- eax = 0x1;
- cpuid(&eax, &ebx, &ecx, &edx);
-
- if (ecx & cpuid_bit::sse42) {
- host_isa |= instruction_set::SSE42;
- }
-
- if (ecx & cpuid_bit::pclmulqdq) {
- host_isa |= instruction_set::PCLMULQDQ;
- }
-
- if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) {
- return host_isa;
- }
+static inline uint64_t xgetbv() {
+ #if defined(_MSC_VER)
+ return _xgetbv(0);
+ #else
+ uint32_t xcr0_lo, xcr0_hi;
+ asm volatile("xgetbv\n\t" : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0));
+ return xcr0_lo | ((uint64_t)xcr0_hi << 32);
+ #endif
+ }
+
+static inline uint32_t detect_supported_architectures() {
+ uint32_t eax;
+ uint32_t ebx = 0;
+ uint32_t ecx = 0;
+ uint32_t edx = 0;
+ uint32_t host_isa = 0x0;
+
+ // EBX for EAX=0x1
+ eax = 0x1;
+ cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (ecx & cpuid_bit::sse42) {
+ host_isa |= instruction_set::SSE42;
+ }
+
+ if (ecx & cpuid_bit::pclmulqdq) {
+ host_isa |= instruction_set::PCLMULQDQ;
+ }
+
+ if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) {
+ return host_isa;
+ }
- // xgetbv for checking if the OS saves registers
- uint64_t xcr0 = xgetbv();
+ // xgetbv for checking if the OS saves registers
+ uint64_t xcr0 = xgetbv();
- if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) {
- return host_isa;
- }
- // ECX for EAX=0x7
- eax = 0x7;
- ecx = 0x0; // Sub-leaf = 0
- cpuid(&eax, &ebx, &ecx, &edx);
- if (ebx & cpuid_bit::ebx::avx2) {
- host_isa |= instruction_set::AVX2;
- }
- if (ebx & cpuid_bit::ebx::bmi1) {
- host_isa |= instruction_set::BMI1;
- }
- if (ebx & cpuid_bit::ebx::bmi2) {
- host_isa |= instruction_set::BMI2;
- }
- if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) == cpuid_bit::xcr0_bit::avx512_saved)) {
- return host_isa;
- }
- if (ebx & cpuid_bit::ebx::avx512f) {
- host_isa |= instruction_set::AVX512F;
- }
- if (ebx & cpuid_bit::ebx::avx512bw) {
- host_isa |= instruction_set::AVX512BW;
- }
- if (ebx & cpuid_bit::ebx::avx512cd) {
- host_isa |= instruction_set::AVX512CD;
- }
- if (ebx & cpuid_bit::ebx::avx512dq) {
- host_isa |= instruction_set::AVX512DQ;
- }
- if (ebx & cpuid_bit::ebx::avx512vl) {
- host_isa |= instruction_set::AVX512VL;
- }
- if (ecx & cpuid_bit::ecx::avx512vbmi2) {
- host_isa |= instruction_set::AVX512VBMI2;
- }
+ if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) {
+ return host_isa;
+ }
+ // ECX for EAX=0x7
+ eax = 0x7;
+ ecx = 0x0; // Sub-leaf = 0
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (ebx & cpuid_bit::ebx::avx2) {
+ host_isa |= instruction_set::AVX2;
+ }
+ if (ebx & cpuid_bit::ebx::bmi1) {
+ host_isa |= instruction_set::BMI1;
+ }
+ if (ebx & cpuid_bit::ebx::bmi2) {
+ host_isa |= instruction_set::BMI2;
+ }
+ if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) == cpuid_bit::xcr0_bit::avx512_saved)) {
return host_isa;
+ }
+ if (ebx & cpuid_bit::ebx::avx512f) {
+ host_isa |= instruction_set::AVX512F;
+ }
+ if (ebx & cpuid_bit::ebx::avx512bw) {
+ host_isa |= instruction_set::AVX512BW;
+ }
+ if (ebx & cpuid_bit::ebx::avx512cd) {
+ host_isa |= instruction_set::AVX512CD;
+ }
+ if (ebx & cpuid_bit::ebx::avx512dq) {
+ host_isa |= instruction_set::AVX512DQ;
+ }
+ if (ebx & cpuid_bit::ebx::avx512vl) {
+ host_isa |= instruction_set::AVX512VL;
+ }
+ if (ecx & cpuid_bit::ecx::avx512vbmi2) {
+ host_isa |= instruction_set::AVX512VBMI2;
+ }
+ if (ecx & cpuid_bit::ecx::avx512vpopcnt) {
+ host_isa |= instruction_set::AVX512VPOPCNTDQ;
+ }
+ return host_isa;
}
#else // fallback
// includes 32-bit ARM.
-static inline uint32_t detect_supported_architectures()
-{
- return instruction_set::DEFAULT;
+static inline uint32_t detect_supported_architectures() {
+ return instruction_set::DEFAULT;
}
+
#endif // end SIMD extension detection code
} // namespace internal
@@ -872,6 +864,7 @@ static inline uint32_t detect_supported_architectures()
#endif // SIMDutf_INTERNAL_ISADETECTION_H
/* end file include/simdutf/internal/isadetection.h */
+
namespace simdutf {
/**
@@ -884,10 +877,9 @@ namespace simdutf {
* @param length the length of the string in bytes.
* @return the detected encoding type
*/
-simdutf_warn_unused simdutf::encoding_type autodetect_encoding(const char* input, size_t length) noexcept;
-simdutf_really_inline simdutf_warn_unused simdutf::encoding_type autodetect_encoding(const uint8_t* input, size_t length) noexcept
-{
- return autodetect_encoding(reinterpret_cast<const char*>(input), length);
+simdutf_warn_unused simdutf::encoding_type autodetect_encoding(const char * input, size_t length) noexcept;
+simdutf_really_inline simdutf_warn_unused simdutf::encoding_type autodetect_encoding(const uint8_t * input, size_t length) noexcept {
+ return autodetect_encoding(reinterpret_cast<const char *>(input), length);
}
/**
@@ -901,10 +893,9 @@ simdutf_really_inline simdutf_warn_unused simdutf::encoding_type autodetect_enco
* @param length the length of the string in bytes.
* @return the detected encoding type
*/
-simdutf_warn_unused int detect_encodings(const char* input, size_t length) noexcept;
-simdutf_really_inline simdutf_warn_unused int detect_encodings(const uint8_t* input, size_t length) noexcept
-{
- return detect_encodings(reinterpret_cast<const char*>(input), length);
+simdutf_warn_unused int detect_encodings(const char * input, size_t length) noexcept;
+simdutf_really_inline simdutf_warn_unused int detect_encodings(const uint8_t * input, size_t length) noexcept {
+ return detect_encodings(reinterpret_cast<const char *>(input), length);
}
/**
@@ -918,7 +909,7 @@ simdutf_really_inline simdutf_warn_unused int detect_encodings(const uint8_t* in
* @param len the length of the string in bytes.
* @return true if and only if the string is valid UTF-8.
*/
-simdutf_warn_unused bool validate_utf8(const char* buf, size_t len) noexcept;
+simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept;
/**
* Validate the UTF-8 string and stop on error.
@@ -929,7 +920,7 @@ simdutf_warn_unused bool validate_utf8(const char* buf, size_t len) noexcept;
* @param len the length of the string in bytes.
* @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
*/
-simdutf_warn_unused result validate_utf8_with_errors(const char* buf, size_t len) noexcept;
+simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) noexcept;
/**
* Validate the ASCII string.
@@ -940,7 +931,7 @@ simdutf_warn_unused result validate_utf8_with_errors(const char* buf, size_t len
* @param len the length of the string in bytes.
* @return true if and only if the string is valid ASCII.
*/
-simdutf_warn_unused bool validate_ascii(const char* buf, size_t len) noexcept;
+simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept;
/**
* Validate the ASCII string and stop on error. It might be faster than
@@ -952,7 +943,7 @@ simdutf_warn_unused bool validate_ascii(const char* buf, size_t len) noexcept;
* @param len the length of the string in bytes.
* @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
*/
-simdutf_warn_unused result validate_ascii_with_errors(const char* buf, size_t len) noexcept;
+simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) noexcept;
/**
* Using native endianness; Validate the UTF-16 string.
@@ -967,7 +958,7 @@ simdutf_warn_unused result validate_ascii_with_errors(const char* buf, size_t le
* @param len the length of the string in number of 2-byte words (char16_t).
* @return true if and only if the string is valid UTF-16.
*/
-simdutf_warn_unused bool validate_utf16(const char16_t* buf, size_t len) noexcept;
+simdutf_warn_unused bool validate_utf16(const char16_t *buf, size_t len) noexcept;
/**
* Validate the UTF-16LE string. This function may be best when you expect
@@ -982,7 +973,7 @@ simdutf_warn_unused bool validate_utf16(const char16_t* buf, size_t len) noexcep
* @param len the length of the string in number of 2-byte words (char16_t).
* @return true if and only if the string is valid UTF-16LE.
*/
-simdutf_warn_unused bool validate_utf16le(const char16_t* buf, size_t len) noexcept;
+simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) noexcept;
/**
* Validate the UTF-16BE string. This function may be best when you expect
@@ -997,7 +988,7 @@ simdutf_warn_unused bool validate_utf16le(const char16_t* buf, size_t len) noexc
* @param len the length of the string in number of 2-byte words (char16_t).
* @return true if and only if the string is valid UTF-16BE.
*/
-simdutf_warn_unused bool validate_utf16be(const char16_t* buf, size_t len) noexcept;
+simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) noexcept;
/**
* Using native endianness; Validate the UTF-16 string and stop on error.
@@ -1011,7 +1002,7 @@ simdutf_warn_unused bool validate_utf16be(const char16_t* buf, size_t len) noexc
* @param len the length of the string in number of 2-byte words (char16_t).
* @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
*/
-simdutf_warn_unused result validate_utf16_with_errors(const char16_t* buf, size_t len) noexcept;
+simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, size_t len) noexcept;
/**
* Validate the UTF-16LE string and stop on error. It might be faster than
@@ -1025,7 +1016,7 @@ simdutf_warn_unused result validate_utf16_with_errors(const char16_t* buf, size_
* @param len the length of the string in number of 2-byte words (char16_t).
* @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
*/
-simdutf_warn_unused result validate_utf16le_with_errors(const char16_t* buf, size_t len) noexcept;
+simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) noexcept;
/**
* Validate the UTF-16BE string and stop on error. It might be faster than
@@ -1039,7 +1030,7 @@ simdutf_warn_unused result validate_utf16le_with_errors(const char16_t* buf, siz
* @param len the length of the string in number of 2-byte words (char16_t).
* @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
*/
-simdutf_warn_unused result validate_utf16be_with_errors(const char16_t* buf, size_t len) noexcept;
+simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) noexcept;
/**
* Validate the UTF-32 string. This function may be best when you expect
@@ -1054,7 +1045,7 @@ simdutf_warn_unused result validate_utf16be_with_errors(const char16_t* buf, siz
* @param len the length of the string in number of 4-byte words (char32_t).
* @return true if and only if the string is valid UTF-32.
*/
-simdutf_warn_unused bool validate_utf32(const char32_t* buf, size_t len) noexcept;
+simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) noexcept;
/**
* Validate the UTF-32 string and stop on error. It might be faster than
@@ -1068,68 +1059,69 @@ simdutf_warn_unused bool validate_utf32(const char32_t* buf, size_t len) noexcep
* @param len the length of the string in number of 4-byte words (char32_t).
* @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
*/
-simdutf_warn_unused result validate_utf32_with_errors(const char32_t* buf, size_t len) noexcept;
-
-/**
- * Convert Latin1 string into UTF8 string.
- *
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the Latin1 string to convert
- * @param length the length of the string in bytes
- * @param latin1_output the pointer to buffer that can hold conversion result
- * @return the number of written char; 0 if conversion is not possible
- */
-simdutf_warn_unused size_t convert_latin1_to_utf8(const char* input, size_t length, char* utf8_output) noexcept;
-
-/**
- * Convert possibly Latin1 string into UTF-16LE string.
- *
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the Latin1 string to convert
- * @param length the length of the string in bytes
- * @param utf16_buffer the pointer to buffer that can hold conversion result
- * @return the number of written char16_t; 0 if conversion is not possible
- */
-simdutf_warn_unused size_t convert_latin1_to_utf16le(const char* input, size_t length, char16_t* utf16_output) noexcept;
-
-/**
- * Convert Latin1 string into UTF-16BE string.
- *
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the Latin1 string to convert
- * @param length the length of the string in bytes
- * @param utf16_buffer the pointer to buffer that can hold conversion result
- * @return the number of written char16_t; 0 if conversion is not possible
- */
-simdutf_warn_unused size_t convert_latin1_to_utf16be(const char* input, size_t length, char16_t* utf16_output) noexcept;
-
-/**
- * Convert Latin1 string into UTF-32 string.
- *
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the Latin1 string to convert
- * @param length the length of the string in bytes
- * @param utf32_buffer the pointer to buffer that can hold conversion result
- * @return the number of written char32_t; 0 if conversion is not possible
- */
-simdutf_warn_unused size_t convert_latin1_to_utf32(const char* input, size_t length, char32_t* utf32_buffer) noexcept;
-
-/**
- * Convert possibly broken UTF-8 string into latin1 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the UTF-8 string to convert
- * @param length the length of the string in bytes
- * @param latin1_output the pointer to buffer that can hold conversion result
- * @return the number of written char; 0 if the input was not valid UTF-8 string
- */
-simdutf_warn_unused size_t convert_utf8_to_latin1(const char* input, size_t length, char* latin1_output) noexcept;
+simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) noexcept;
+
+ /**
+ * Convert Latin1 string into UTF8 string.
+ *
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string in bytes
+ * @param latin1_output the pointer to buffer that can hold conversion result
+ * @return the number of written char; 0 if conversion is not possible
+ */
+ simdutf_warn_unused size_t convert_latin1_to_utf8(const char * input, size_t length, char* utf8_output) noexcept;
+
+
+ /**
+ * Convert possibly Latin1 string into UTF-16LE string.
+ *
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string in bytes
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char16_t; 0 if conversion is not possible
+ */
+ simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * input, size_t length, char16_t* utf16_output) noexcept;
+
+ /**
+ * Convert Latin1 string into UTF-16BE string.
+ *
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string in bytes
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char16_t; 0 if conversion is not possible
+ */
+ simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * input, size_t length, char16_t* utf16_output) noexcept;
+
+ /**
+ * Convert Latin1 string into UTF-32 string.
+ *
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string in bytes
+ * @param utf32_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char32_t; 0 if conversion is not possible
+ */
+ simdutf_warn_unused size_t convert_latin1_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) noexcept;
+
+ /**
+ * Convert possibly broken UTF-8 string into latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param latin1_output the pointer to buffer that can hold conversion result
+ * @return the number of written char; 0 if the input was not valid UTF-8 string
+ */
+ simdutf_warn_unused size_t convert_utf8_to_latin1(const char * input, size_t length, char* latin1_output) noexcept;
/**
* Using native endianness; Convert possibly broken UTF-8 string into UTF-16 string.
@@ -1142,7 +1134,7 @@ simdutf_warn_unused size_t convert_utf8_to_latin1(const char* input, size_t leng
* @param utf16_buffer the pointer to buffer that can hold conversion result
* @return the number of written char16_t; 0 if the input was not valid UTF-8 string
*/
-simdutf_warn_unused size_t convert_utf8_to_utf16(const char* input, size_t length, char16_t* utf16_output) noexcept;
+simdutf_warn_unused size_t convert_utf8_to_utf16(const char * input, size_t length, char16_t* utf16_output) noexcept;
/**
* Convert possibly broken UTF-8 string into UTF-16LE string.
@@ -1155,7 +1147,7 @@ simdutf_warn_unused size_t convert_utf8_to_utf16(const char* input, size_t lengt
* @param utf16_buffer the pointer to buffer that can hold conversion result
* @return the number of written char16_t; 0 if the input was not valid UTF-8 string
*/
-simdutf_warn_unused size_t convert_utf8_to_utf16le(const char* input, size_t length, char16_t* utf16_output) noexcept;
+simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_output) noexcept;
/**
* Convert possibly broken UTF-8 string into UTF-16BE string.
@@ -1168,20 +1160,21 @@ simdutf_warn_unused size_t convert_utf8_to_utf16le(const char* input, size_t len
* @param utf16_buffer the pointer to buffer that can hold conversion result
* @return the number of written char16_t; 0 if the input was not valid UTF-8 string
*/
-simdutf_warn_unused size_t convert_utf8_to_utf16be(const char* input, size_t length, char16_t* utf16_output) noexcept;
+simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_output) noexcept;
-/**
- * Convert possibly broken UTF-8 string into latin1 string. with errors
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the UTF-8 string to convert
- * @param length the length of the string in bytes
- * @param latin1_output the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
- */
-simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char* input, size_t length, char* latin1_output) noexcept;
+
+ /**
+ * Convert possibly broken UTF-8 string into latin1 string. with errors
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param latin1_output the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ */
+ simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * input, size_t length, char* latin1_output) noexcept;
/**
* Using native endianness; Convert possibly broken UTF-8 string into UTF-16
@@ -1195,7 +1188,7 @@ simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char* input,
* @param utf16_buffer the pointer to buffer that can hold conversion result
* @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
*/
-simdutf_warn_unused result convert_utf8_to_utf16_with_errors(const char* input, size_t length, char16_t* utf16_output) noexcept;
+simdutf_warn_unused result convert_utf8_to_utf16_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept;
/**
* Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
@@ -1208,7 +1201,7 @@ simdutf_warn_unused result convert_utf8_to_utf16_with_errors(const char* input,
* @param utf16_buffer the pointer to buffer that can hold conversion result
* @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
*/
-simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char* input, size_t length, char16_t* utf16_output) noexcept;
+simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept;
/**
* Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
@@ -1221,7 +1214,7 @@ simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char* input
* @param utf16_buffer the pointer to buffer that can hold conversion result
* @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
*/
-simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char* input, size_t length, char16_t* utf16_output) noexcept;
+simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept;
/**
* Convert possibly broken UTF-8 string into UTF-32 string.
@@ -1234,7 +1227,7 @@ simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char* input
* @param utf32_buffer the pointer to buffer that can hold conversion result
* @return the number of written char32_t; 0 if the input was not valid UTF-8 string
*/
-simdutf_warn_unused size_t convert_utf8_to_utf32(const char* input, size_t length, char32_t* utf32_output) noexcept;
+simdutf_warn_unused size_t convert_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_output) noexcept;
/**
* Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
@@ -1247,21 +1240,22 @@ simdutf_warn_unused size_t convert_utf8_to_utf32(const char* input, size_t lengt
* @param utf32_buffer the pointer to buffer that can hold conversion result
* @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
*/
-simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char* input, size_t length, char32_t* utf32_output) noexcept;
+simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * input, size_t length, char32_t* utf32_output) noexcept;
+
+ /**
+ * Convert valid UTF-8 string into latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-8.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param latin1_output the pointer to buffer that can hold conversion result
+ * @return the number of written char; 0 if the input was not valid UTF-8 string
+ */
+ simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * input, size_t length, char* latin1_output) noexcept;
-/**
- * Convert valid UTF-8 string into latin1 string.
- *
- * This function assumes that the input string is valid UTF-8.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-8 string to convert
- * @param length the length of the string in bytes
- * @param latin1_output the pointer to buffer that can hold conversion result
- * @return the number of written char; 0 if the input was not valid UTF-8 string
- */
-simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char* input, size_t length, char* latin1_output) noexcept;
/**
* Using native endianness; Convert valid UTF-8 string into UTF-16 string.
@@ -1273,7 +1267,7 @@ simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char* input, size_
* @param utf16_buffer the pointer to buffer that can hold conversion result
* @return the number of written char16_t
*/
-simdutf_warn_unused size_t convert_valid_utf8_to_utf16(const char* input, size_t length, char16_t* utf16_buffer) noexcept;
+simdutf_warn_unused size_t convert_valid_utf8_to_utf16(const char * input, size_t length, char16_t* utf16_buffer) noexcept;
/**
* Convert valid UTF-8 string into UTF-16LE string.
@@ -1285,7 +1279,7 @@ simdutf_warn_unused size_t convert_valid_utf8_to_utf16(const char* input, size_t
* @param utf16_buffer the pointer to buffer that can hold conversion result
* @return the number of written char16_t
*/
-simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char* input, size_t length, char16_t* utf16_buffer) noexcept;
+simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_buffer) noexcept;
/**
* Convert valid UTF-8 string into UTF-16BE string.
@@ -1297,7 +1291,7 @@ simdutf_warn_unused size_t convert_valid_utf8_to_utf16le(const char* input, size
* @param utf16_buffer the pointer to buffer that can hold conversion result
* @return the number of written char16_t
*/
-simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char* input, size_t length, char16_t* utf16_buffer) noexcept;
+simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_buffer) noexcept;
/**
* Convert valid UTF-8 string into UTF-32 string.
@@ -1309,29 +1303,30 @@ simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char* input, size
* @param utf32_buffer the pointer to buffer that can hold conversion result
* @return the number of written char32_t
*/
-simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char* input, size_t length, char32_t* utf32_buffer) noexcept;
+simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) noexcept;
-/**
- * Return the number of bytes that this Latin1 string would require in UTF-8 format.
- *
- * @param input the Latin1 string to convert
- * @param length the length of the string bytes
- * @return the number of bytes required to encode the Latin1 string as UTF-8
- */
-simdutf_warn_unused size_t utf8_length_from_latin1(const char* input, size_t length) noexcept;
-/**
- * Compute the number of bytes that this UTF-8 string would require in Latin1 format.
- *
- * This function does not validate the input.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-8 string to convert
- * @param length the length of the string in byte
- * @return the number of bytes required to encode the UTF-8 string as Latin1
- */
-simdutf_warn_unused size_t latin1_length_from_utf8(const char* input, size_t length) noexcept;
+ /**
+ * Return the number of bytes that this Latin1 string would require in UTF-8 format.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string bytes
+ * @return the number of bytes required to encode the Latin1 string as UTF-8
+ */
+ simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) noexcept;
+
+ /**
+ * Compute the number of bytes that this UTF-8 string would require in Latin1 format.
+ *
+ * This function does not validate the input.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in byte
+ * @return the number of bytes required to encode the UTF-8 string as Latin1
+ */
+ simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) noexcept;
/**
* Compute the number of 2-byte words that this UTF-8 string would require in UTF-16LE format.
@@ -1344,7 +1339,7 @@ simdutf_warn_unused size_t latin1_length_from_utf8(const char* input, size_t len
* @param length the length of the string in bytes
* @return the number of char16_t words required to encode the UTF-8 string as UTF-16LE
*/
-simdutf_warn_unused size_t utf16_length_from_utf8(const char* input, size_t length) noexcept;
+simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) noexcept;
/**
* Compute the number of 4-byte words that this UTF-8 string would require in UTF-32 format.
@@ -1359,7 +1354,7 @@ simdutf_warn_unused size_t utf16_length_from_utf8(const char* input, size_t leng
* @param length the length of the string in bytes
* @return the number of char32_t words required to encode the UTF-8 string as UTF-32
*/
-simdutf_warn_unused size_t utf32_length_from_utf8(const char* input, size_t length) noexcept;
+simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) noexcept;
/**
* Using native endianness; Convert possibly broken UTF-16 string into UTF-8 string.
@@ -1374,37 +1369,39 @@ simdutf_warn_unused size_t utf32_length_from_utf8(const char* input, size_t leng
* @param utf8_buffer the pointer to buffer that can hold conversion result
* @return number of written words; 0 if input is not a valid UTF-16LE string
*/
-simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) noexcept;
-
-/**
- * Convert possibly broken UTF-16LE string into Latin1 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16LE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param latin1_buffer the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-16LE string
- */
-simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t* input, size_t length, char* latin1_buffer) noexcept;
+simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
+
+
+ /**
+ * Convert possibly broken UTF-16LE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-16LE string
+ */
+ simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+ /**
+ * Convert possibly broken UTF-16BE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+ simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
-/**
- * Convert possibly broken UTF-16BE string into Latin1 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16BE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param latin1_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
- */
-simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t* input, size_t length, char* latin1_buffer) noexcept;
/**
* Convert possibly broken UTF-16LE string into UTF-8 string.
@@ -1419,7 +1416,7 @@ simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t* input, size
* @param utf8_buffer the pointer to buffer that can hold conversion result
* @return number of written words; 0 if input is not a valid UTF-16LE string
*/
-simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) noexcept;
+simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
/**
* Convert possibly broken UTF-16BE string into UTF-8 string.
@@ -1434,35 +1431,36 @@ simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t* input, size_t
* @param utf8_buffer the pointer to buffer that can hold conversion result
* @return number of written words; 0 if input is not a valid UTF-16LE string
*/
-simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) noexcept;
+simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
+
+ /**
+ * Convert possibly broken UTF-16LE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+ simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+ /**
+ * Convert possibly broken UTF-16BE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+ simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
-/**
- * Convert possibly broken UTF-16LE string into Latin1 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- * This function is not BOM-aware.
- *
- * @param input the UTF-16LE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param latin1_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
- */
-simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t* input, size_t length, char* latin1_buffer) noexcept;
-
-/**
- * Convert possibly broken UTF-16BE string into Latin1 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- * This function is not BOM-aware.
- *
- * @param input the UTF-16BE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param latin1_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
- */
-simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t* input, size_t length, char* latin1_buffer) noexcept;
/**
* Using native endianness; Convert possibly broken UTF-16 string into UTF-8 string and stop on error.
@@ -1477,7 +1475,7 @@ simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t*
* @param utf8_buffer the pointer to buffer that can hold conversion result
* @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
*/
-simdutf_warn_unused result convert_utf16_to_utf8_with_errors(const char16_t* input, size_t length, char* utf8_buffer) noexcept;
+simdutf_warn_unused result convert_utf16_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
/**
* Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
@@ -1492,7 +1490,7 @@ simdutf_warn_unused result convert_utf16_to_utf8_with_errors(const char16_t* inp
* @param utf8_buffer the pointer to buffer that can hold conversion result
* @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
*/
-simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t* input, size_t length, char* utf8_buffer) noexcept;
+simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
/**
* Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
@@ -1507,7 +1505,7 @@ simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t* i
* @param utf8_buffer the pointer to buffer that can hold conversion result
* @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
*/
-simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t* input, size_t length, char* utf8_buffer) noexcept;
+simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
/**
* Using native endianness; Convert valid UTF-16 string into UTF-8 string.
@@ -1521,35 +1519,37 @@ simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t* i
* @param utf8_buffer the pointer to buffer that can hold the conversion result
* @return number of written words; 0 if conversion is not possible
*/
-simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) noexcept;
+simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
-/**
- * Convert valid UTF-16LE string into Latin1 string.
- *
- * This function assumes that the input string is valid UTF-8.
- * This function is not BOM-aware.
- *
- * @param input the UTF-16LE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param latin1_buffer the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if conversion is not possible
- */
-simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t* input, size_t length, char* latin1_buffer) noexcept;
+ /**
+ * Convert valid UTF-16LE string into Latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-8.
+
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+ /**
+ * Convert valid UTF-16BE string into Latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-8.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
-/**
- * Convert valid UTF-16BE string into Latin1 string.
- *
- * This function assumes that the input string is valid UTF-8.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16BE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param latin1_buffer the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if conversion is not possible
- */
-simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t* input, size_t length, char* latin1_buffer) noexcept;
/**
* Convert valid UTF-16LE string into UTF-8 string.
@@ -1563,7 +1563,7 @@ simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t* input
* @param utf8_buffer the pointer to buffer that can hold the conversion result
* @return number of written words; 0 if conversion is not possible
*/
-simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) noexcept;
+simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
/**
* Convert valid UTF-16BE string into UTF-8 string.
@@ -1577,7 +1577,7 @@ simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t* input,
* @param utf8_buffer the pointer to buffer that can hold the conversion result
* @return number of written words; 0 if conversion is not possible
*/
-simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) noexcept;
+simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
/**
* Using native endianness; Convert possibly broken UTF-16 string into UTF-32 string.
@@ -1592,7 +1592,7 @@ simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t* input,
* @param utf32_buffer the pointer to buffer that can hold conversion result
* @return number of written words; 0 if input is not a valid UTF-16LE string
*/
-simdutf_warn_unused size_t convert_utf16_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept;
+simdutf_warn_unused size_t convert_utf16_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
/**
* Convert possibly broken UTF-16LE string into UTF-32 string.
@@ -1607,7 +1607,7 @@ simdutf_warn_unused size_t convert_utf16_to_utf32(const char16_t* input, size_t
* @param utf32_buffer the pointer to buffer that can hold conversion result
* @return number of written words; 0 if input is not a valid UTF-16LE string
*/
-simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept;
+simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
/**
* Convert possibly broken UTF-16BE string into UTF-32 string.
@@ -1622,7 +1622,7 @@ simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t* input, size_
* @param utf32_buffer the pointer to buffer that can hold conversion result
* @return number of written words; 0 if input is not a valid UTF-16LE string
*/
-simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept;
+simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
/**
* Using native endianness; Convert possibly broken UTF-16 string into
@@ -1638,7 +1638,7 @@ simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t* input, size_
* @param utf32_buffer the pointer to buffer that can hold conversion result
* @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
*/
-simdutf_warn_unused result convert_utf16_to_utf32_with_errors(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept;
+simdutf_warn_unused result convert_utf16_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
/**
* Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
@@ -1653,7 +1653,7 @@ simdutf_warn_unused result convert_utf16_to_utf32_with_errors(const char16_t* in
* @param utf32_buffer the pointer to buffer that can hold conversion result
* @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
*/
-simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept;
+simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
/**
* Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
@@ -1668,7 +1668,7 @@ simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t*
* @param utf32_buffer the pointer to buffer that can hold conversion result
* @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
*/
-simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept;
+simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
/**
* Using native endianness; Convert valid UTF-16 string into UTF-32 string.
@@ -1682,7 +1682,7 @@ simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t*
* @param utf32_buffer the pointer to buffer that can hold the conversion result
* @return number of written words; 0 if conversion is not possible
*/
-simdutf_warn_unused size_t convert_valid_utf16_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept;
+simdutf_warn_unused size_t convert_valid_utf16_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
/**
* Convert valid UTF-16LE string into UTF-32 string.
@@ -1696,7 +1696,7 @@ simdutf_warn_unused size_t convert_valid_utf16_to_utf32(const char16_t* input, s
* @param utf32_buffer the pointer to buffer that can hold the conversion result
* @return number of written words; 0 if conversion is not possible
*/
-simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept;
+simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
/**
* Convert valid UTF-16BE string into UTF-32 string.
@@ -1710,20 +1710,22 @@ simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t* input,
* @param utf32_buffer the pointer to buffer that can hold the conversion result
* @return number of written words; 0 if conversion is not possible
*/
-simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) noexcept;
+simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
+
/*
- * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
- *
- * This function does not validate the input.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16LE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @return the number of bytes required to encode the UTF-16LE string as Latin1
- */
-simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept;
+ * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
+ *
+ * This function does not validate the input.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @return the number of bytes required to encode the UTF-16LE string as Latin1
+ */
+ simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept;
+
/**
* Using native endianness; Compute the number of bytes that this UTF-16
@@ -1735,7 +1737,7 @@ simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept;
* @param length the length of the string in 2-byte words (char16_t)
* @return the number of bytes required to encode the UTF-16LE string as UTF-8
*/
-simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t* input, size_t length) noexcept;
+simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t * input, size_t length) noexcept;
/**
* Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
@@ -1746,7 +1748,7 @@ simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t* input, size_t
* @param length the length of the string in 2-byte words (char16_t)
* @return the number of bytes required to encode the UTF-16LE string as UTF-8
*/
-simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t* input, size_t length) noexcept;
+simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) noexcept;
/**
* Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
@@ -1757,7 +1759,7 @@ simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t* input, size_
* @param length the length of the string in 2-byte words (char16_t)
* @return the number of bytes required to encode the UTF-16BE string as UTF-8
*/
-simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t* input, size_t length) noexcept;
+simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) noexcept;
/**
* Convert possibly broken UTF-32 string into UTF-8 string.
@@ -1772,7 +1774,7 @@ simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t* input, size_
* @param utf8_buffer the pointer to buffer that can hold conversion result
* @return number of written words; 0 if input is not a valid UTF-32 string
*/
-simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t* input, size_t length, char* utf8_buffer) noexcept;
+simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) noexcept;
/**
* Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
@@ -1787,7 +1789,7 @@ simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t* input, size_t l
* @param utf8_buffer the pointer to buffer that can hold conversion result
* @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
*/
-simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t* input, size_t length, char* utf8_buffer) noexcept;
+simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * input, size_t length, char* utf8_buffer) noexcept;
/**
* Convert valid UTF-32 string into UTF-8 string.
@@ -1801,7 +1803,7 @@ simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t* inp
* @param utf8_buffer the pointer to buffer that can hold the conversion result
* @return number of written words; 0 if conversion is not possible
*/
-simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t* input, size_t length, char* utf8_buffer) noexcept;
+simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) noexcept;
/**
* Using native endianness; Convert possibly broken UTF-32 string into UTF-16 string.
@@ -1816,7 +1818,7 @@ simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t* input, si
* @param utf16_buffer the pointer to buffer that can hold conversion result
* @return number of written words; 0 if input is not a valid UTF-32 string
*/
-simdutf_warn_unused size_t convert_utf32_to_utf16(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept;
+simdutf_warn_unused size_t convert_utf32_to_utf16(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
/**
* Convert possibly broken UTF-32 string into UTF-16LE string.
@@ -1831,53 +1833,54 @@ simdutf_warn_unused size_t convert_utf32_to_utf16(const char32_t* input, size_t
* @param utf16_buffer the pointer to buffer that can hold conversion result
* @return number of written words; 0 if input is not a valid UTF-32 string
*/
-simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept;
-
-/**
- * Convert possibly broken UTF-32 string into Latin1 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @param latin1_buffer the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-32 string
- */
-
-simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t* input, size_t length, char* latin1_buffer) noexcept;
-
-/**
- * Convert possibly broken UTF-32 string into Latin1 string and stop on error.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @param latin1_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
- */
-
-simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t* input, size_t length, char* latin1_buffer) noexcept;
-
-/**
- * Convert valid UTF-32 string into Latin1 string.
- *
- * This function assumes that the input string is valid UTF-32.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @param latin1_buffer the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
- */
-simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t* input, size_t length, char* latin1_buffer) noexcept;
+simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
+
+ /**
+ * Convert possibly broken UTF-32 string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-32 string
+ */
+
+ simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) noexcept;
+
+
+ /**
+ * Convert possibly broken UTF-32 string into Latin1 string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+
+ simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * input, size_t length, char* latin1_buffer) noexcept;
+
+ /**
+ * Convert valid UTF-32 string into Latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-32.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param latin1_buffer the pointer to buffer that can hold the conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) noexcept;
/**
* Convert possibly broken UTF-32 string into UTF-16BE string.
@@ -1892,7 +1895,7 @@ simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t* input,
* @param utf16_buffer the pointer to buffer that can hold conversion result
* @return number of written words; 0 if input is not a valid UTF-32 string
*/
-simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept;
+simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
/**
* Using native endianness; Convert possibly broken UTF-32 string into UTF-16
@@ -1908,7 +1911,7 @@ simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t* input, size_
* @param utf16_buffer the pointer to buffer that can hold conversion result
* @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
*/
-simdutf_warn_unused result convert_utf32_to_utf16_with_errors(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept;
+simdutf_warn_unused result convert_utf32_to_utf16_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
/**
* Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
@@ -1923,7 +1926,7 @@ simdutf_warn_unused result convert_utf32_to_utf16_with_errors(const char32_t* in
* @param utf16_buffer the pointer to buffer that can hold conversion result
* @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
*/
-simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept;
+simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
/**
* Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
@@ -1938,7 +1941,7 @@ simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t*
* @param utf16_buffer the pointer to buffer that can hold conversion result
* @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
*/
-simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept;
+simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
/**
* Using native endianness; Convert valid UTF-32 string into UTF-16 string.
@@ -1952,7 +1955,7 @@ simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t*
* @param utf16_buffer the pointer to buffer that can hold the conversion result
* @return number of written words; 0 if conversion is not possible
*/
-simdutf_warn_unused size_t convert_valid_utf32_to_utf16(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept;
+simdutf_warn_unused size_t convert_valid_utf32_to_utf16(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
/**
* Convert valid UTF-32 string into UTF-16LE string.
@@ -1966,7 +1969,7 @@ simdutf_warn_unused size_t convert_valid_utf32_to_utf16(const char32_t* input, s
* @param utf16_buffer the pointer to buffer that can hold the conversion result
* @return number of written words; 0 if conversion is not possible
*/
-simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept;
+simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
/**
* Convert valid UTF-32 string into UTF-16BE string.
@@ -1980,7 +1983,7 @@ simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t* input,
* @param utf16_buffer the pointer to buffer that can hold the conversion result
* @return number of written words; 0 if conversion is not possible
*/
-simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t* input, size_t length, char16_t* utf16_buffer) noexcept;
+simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
/**
* Change the endianness of the input. Can be used to go from UTF-16LE to UTF-16BE or
@@ -1994,7 +1997,7 @@ simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t* input,
* @param length the length of the string in 2-byte words (char16_t)
* @param output the pointer to buffer that can hold the conversion result
*/
-void change_endianness_utf16(const char16_t* input, size_t length, char16_t* output) noexcept;
+void change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) noexcept;
/**
* Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
@@ -2005,7 +2008,7 @@ void change_endianness_utf16(const char16_t* input, size_t length, char16_t* out
* @param length the length of the string in 4-byte words (char32_t)
* @return the number of bytes required to encode the UTF-32 string as UTF-8
*/
-simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t* input, size_t length) noexcept;
+simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) noexcept;
/**
* Compute the number of two-byte words that this UTF-32 string would require in UTF-16 format.
@@ -2016,7 +2019,7 @@ simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t* input, size_t
* @param length the length of the string in 4-byte words (char32_t)
* @return the number of bytes required to encode the UTF-32 string as UTF-16
*/
-simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t* input, size_t length) noexcept;
+simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) noexcept;
/**
* Using native endianness; Compute the number of bytes that this UTF-16
@@ -2032,7 +2035,7 @@ simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t* input, size_t
* @param length the length of the string in 2-byte words (char16_t)
* @return the number of bytes required to encode the UTF-16LE string as UTF-32
*/
-simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t* input, size_t length) noexcept;
+simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t * input, size_t length) noexcept;
/**
* Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
@@ -2047,7 +2050,7 @@ simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t* input, size_t
* @param length the length of the string in 2-byte words (char16_t)
* @return the number of bytes required to encode the UTF-16LE string as UTF-32
*/
-simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t* input, size_t length) noexcept;
+simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) noexcept;
/**
* Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
@@ -2062,7 +2065,7 @@ simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t* input, size
* @param length the length of the string in 2-byte words (char16_t)
* @return the number of bytes required to encode the UTF-16BE string as UTF-32
*/
-simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t* input, size_t length) noexcept;
+simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) noexcept;
/**
* Count the number of code points (characters) in the string assuming that
@@ -2076,7 +2079,7 @@ simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t* input, size
* @param length the length of the string in 2-byte words (char16_t)
* @return number of code points
*/
-simdutf_warn_unused size_t count_utf16(const char16_t* input, size_t length) noexcept;
+simdutf_warn_unused size_t count_utf16(const char16_t * input, size_t length) noexcept;
/**
* Count the number of code points (characters) in the string assuming that
@@ -2090,7 +2093,7 @@ simdutf_warn_unused size_t count_utf16(const char16_t* input, size_t length) noe
* @param length the length of the string in 2-byte words (char16_t)
* @return number of code points
*/
-simdutf_warn_unused size_t count_utf16le(const char16_t* input, size_t length) noexcept;
+simdutf_warn_unused size_t count_utf16le(const char16_t * input, size_t length) noexcept;
/**
* Count the number of code points (characters) in the string assuming that
@@ -2104,7 +2107,7 @@ simdutf_warn_unused size_t count_utf16le(const char16_t* input, size_t length) n
* @param length the length of the string in 2-byte words (char16_t)
* @return number of code points
*/
-simdutf_warn_unused size_t count_utf16be(const char16_t* input, size_t length) noexcept;
+simdutf_warn_unused size_t count_utf16be(const char16_t * input, size_t length) noexcept;
/**
* Count the number of code points (characters) in the string assuming that
@@ -2116,7 +2119,7 @@ simdutf_warn_unused size_t count_utf16be(const char16_t* input, size_t length) n
* @param length the length of the string in bytes
* @return number of code points
*/
-simdutf_warn_unused size_t count_utf8(const char* input, size_t length) noexcept;
+simdutf_warn_unused size_t count_utf8(const char * input, size_t length) noexcept;
/**
* An implementation of simdutf for a particular CPU architecture.
@@ -2126,1080 +2129,1090 @@ simdutf_warn_unused size_t count_utf8(const char* input, size_t length) noexcept
*/
class implementation {
public:
- /**
- * The name of this implementation.
- *
- * const implementation *impl = simdutf::active_implementation;
- * cout << "simdutf is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
- *
- * @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
- */
- virtual const std::string& name() const { return _name; }
-
- /**
- * The description of this implementation.
- *
- * const implementation *impl = simdutf::active_implementation;
- * cout << "simdutf is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
- *
- * @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
- */
- virtual const std::string& description() const { return _description; }
-
- /**
- * The instruction sets this implementation is compiled against
- * and the current CPU match. This function may poll the current CPU/system
- * and should therefore not be called too often if performance is a concern.
- *
- *
- * @return true if the implementation can be safely used on the current system (determined at runtime)
- */
- bool supported_by_runtime_system() const;
-
- /**
- * This function will try to detect the encoding
- * @param input the string to identify
- * @param length the length of the string in bytes.
- * @return the encoding type detected
- */
- virtual encoding_type autodetect_encoding(const char* input, size_t length) const noexcept;
-
- /**
- * This function will try to detect the possible encodings in one pass
- * @param input the string to identify
- * @param length the length of the string in bytes.
- * @return the encoding type detected
- */
- virtual int detect_encodings(const char* input, size_t length) const noexcept = 0;
-
- /**
- * @private For internal implementation use
- *
- * The instruction sets this implementation is compiled against.
- *
- * @return a mask of all required `internal::instruction_set::` values
- */
- virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; }
-
- /**
- * Validate the UTF-8 string.
- *
- * Overridden by each implementation.
- *
- * @param buf the UTF-8 string to validate.
- * @param len the length of the string in bytes.
- * @return true if and only if the string is valid UTF-8.
- */
- simdutf_warn_unused virtual bool validate_utf8(const char* buf, size_t len) const noexcept = 0;
-
- /**
- * Validate the UTF-8 string and stop on errors.
- *
- * Overridden by each implementation.
- *
- * @param buf the UTF-8 string to validate.
- * @param len the length of the string in bytes.
- * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
- */
- simdutf_warn_unused virtual result validate_utf8_with_errors(const char* buf, size_t len) const noexcept = 0;
-
- /**
- * Validate the ASCII string.
- *
- * Overridden by each implementation.
- *
- * @param buf the ASCII string to validate.
- * @param len the length of the string in bytes.
- * @return true if and only if the string is valid ASCII.
- */
- simdutf_warn_unused virtual bool validate_ascii(const char* buf, size_t len) const noexcept = 0;
-
- /**
- * Validate the ASCII string and stop on error.
- *
- * Overridden by each implementation.
- *
- * @param buf the ASCII string to validate.
- * @param len the length of the string in bytes.
- * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
- */
- simdutf_warn_unused virtual result validate_ascii_with_errors(const char* buf, size_t len) const noexcept = 0;
-
- /**
- * Validate the UTF-16LE string.This function may be best when you expect
- * the input to be almost always valid. Otherwise, consider using
- * validate_utf16le_with_errors.
- *
- * Overridden by each implementation.
- *
- * This function is not BOM-aware.
- *
- * @param buf the UTF-16LE string to validate.
- * @param len the length of the string in number of 2-byte words (char16_t).
- * @return true if and only if the string is valid UTF-16LE.
- */
- simdutf_warn_unused virtual bool validate_utf16le(const char16_t* buf, size_t len) const noexcept = 0;
-
- /**
- * Validate the UTF-16BE string. This function may be best when you expect
- * the input to be almost always valid. Otherwise, consider using
- * validate_utf16be_with_errors.
- *
- * Overridden by each implementation.
- *
- * This function is not BOM-aware.
- *
- * @param buf the UTF-16BE string to validate.
- * @param len the length of the string in number of 2-byte words (char16_t).
- * @return true if and only if the string is valid UTF-16BE.
- */
- simdutf_warn_unused virtual bool validate_utf16be(const char16_t* buf, size_t len) const noexcept = 0;
-
- /**
- * Validate the UTF-16LE string and stop on error. It might be faster than
- * validate_utf16le when an error is expected to occur early.
- *
- * Overridden by each implementation.
- *
- * This function is not BOM-aware.
- *
- * @param buf the UTF-16LE string to validate.
- * @param len the length of the string in number of 2-byte words (char16_t).
- * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
- */
- simdutf_warn_unused virtual result validate_utf16le_with_errors(const char16_t* buf, size_t len) const noexcept = 0;
-
- /**
- * Validate the UTF-16BE string and stop on error. It might be faster than
- * validate_utf16be when an error is expected to occur early.
- *
- * Overridden by each implementation.
- *
- * This function is not BOM-aware.
- *
- * @param buf the UTF-16BE string to validate.
- * @param len the length of the string in number of 2-byte words (char16_t).
- * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
- */
- simdutf_warn_unused virtual result validate_utf16be_with_errors(const char16_t* buf, size_t len) const noexcept = 0;
-
- /**
- * Validate the UTF-32 string.
- *
- * Overridden by each implementation.
- *
- * This function is not BOM-aware.
- *
- * @param buf the UTF-32 string to validate.
- * @param len the length of the string in number of 4-byte words (char32_t).
- * @return true if and only if the string is valid UTF-32.
- */
- simdutf_warn_unused virtual bool validate_utf32(const char32_t* buf, size_t len) const noexcept = 0;
-
- /**
- * Validate the UTF-32 string and stop on error.
- *
- * Overridden by each implementation.
- *
- * This function is not BOM-aware.
- *
- * @param buf the UTF-32 string to validate.
- * @param len the length of the string in number of 4-byte words (char32_t).
- * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
- */
- simdutf_warn_unused virtual result validate_utf32_with_errors(const char32_t* buf, size_t len) const noexcept = 0;
-
- /**
- * Convert Latin1 string into UTF8 string.
- *
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the Latin1 string to convert
- * @param length the length of the string in bytes
- * @param latin1_output the pointer to buffer that can hold conversion result
- * @return the number of written char; 0 if conversion is not possible
- */
- simdutf_warn_unused virtual size_t convert_latin1_to_utf8(const char* input, size_t length, char* utf8_output) const noexcept = 0;
-
- /**
- * Convert possibly Latin1 string into UTF-16LE string.
- *
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the Latin1 string to convert
- * @param length the length of the string in bytes
- * @param utf16_buffer the pointer to buffer that can hold conversion result
- * @return the number of written char16_t; 0 if conversion is not possible
- */
- simdutf_warn_unused virtual size_t convert_latin1_to_utf16le(const char* input, size_t length, char16_t* utf16_output) const noexcept = 0;
-
- /**
- * Convert Latin1 string into UTF-16BE string.
- *
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the Latin1 string to convert
- * @param length the length of the string in bytes
- * @param utf16_buffer the pointer to buffer that can hold conversion result
- * @return the number of written char16_t; 0 if conversion is not possible
- */
- simdutf_warn_unused virtual size_t convert_latin1_to_utf16be(const char* input, size_t length, char16_t* utf16_output) const noexcept = 0;
-
- /**
- * Convert Latin1 string into UTF-32 string.
- *
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the Latin1 string to convert
- * @param length the length of the string in bytes
- * @param utf32_buffer the pointer to buffer that can hold conversion result
- * @return the number of written char32_t; 0 if conversion is not possible
- */
- simdutf_warn_unused virtual size_t convert_latin1_to_utf32(const char* input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-8 string into latin1 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the UTF-8 string to convert
- * @param length the length of the string in bytes
- * @param latin1_output the pointer to buffer that can hold conversion result
- * @return the number of written char; 0 if the input was not valid UTF-8 string
- */
- simdutf_warn_unused virtual size_t convert_utf8_to_latin1(const char* input, size_t length, char* latin1_output) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-8 string into latin1 string. with errors
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the UTF-8 string to convert
- * @param length the length of the string in bytes
- * @param latin1_output the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
- */
- simdutf_warn_unused virtual result convert_utf8_to_latin1_with_errors(const char* input, size_t length, char* latin1_output) const noexcept = 0;
-
- /**
- * Convert valid UTF-8 string into latin1 string.
- *
- * This function assumes that the input string is valid UTF-8.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-8 string to convert
- * @param length the length of the string in bytes
- * @param latin1_output the pointer to buffer that can hold conversion result
- * @return the number of written char; 0 if the input was not valid UTF-8 string
- */
- simdutf_warn_unused virtual size_t convert_valid_utf8_to_latin1(const char* input, size_t length, char* latin1_output) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-8 string into UTF-16LE string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the UTF-8 string to convert
- * @param length the length of the string in bytes
- * @param utf16_buffer the pointer to buffer that can hold conversion result
- * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
- */
- simdutf_warn_unused virtual size_t convert_utf8_to_utf16le(const char* input, size_t length, char16_t* utf16_output) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-8 string into UTF-16BE string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the UTF-8 string to convert
- * @param length the length of the string in bytes
- * @param utf16_buffer the pointer to buffer that can hold conversion result
- * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
- */
- simdutf_warn_unused virtual size_t convert_utf8_to_utf16be(const char* input, size_t length, char16_t* utf16_output) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the UTF-8 string to convert
- * @param length the length of the string in bytes
- * @param utf16_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
- */
- simdutf_warn_unused virtual result convert_utf8_to_utf16le_with_errors(const char* input, size_t length, char16_t* utf16_output) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the UTF-8 string to convert
- * @param length the length of the string in bytes
- * @param utf16_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
- */
- simdutf_warn_unused virtual result convert_utf8_to_utf16be_with_errors(const char* input, size_t length, char16_t* utf16_output) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-8 string into UTF-32 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the UTF-8 string to convert
- * @param length the length of the string in bytes
- * @param utf32_buffer the pointer to buffer that can hold conversion result
- * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
- */
- simdutf_warn_unused virtual size_t convert_utf8_to_utf32(const char* input, size_t length, char32_t* utf32_output) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * @param input the UTF-8 string to convert
- * @param length the length of the string in bytes
- * @param utf32_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
- */
- simdutf_warn_unused virtual result convert_utf8_to_utf32_with_errors(const char* input, size_t length, char32_t* utf32_output) const noexcept = 0;
-
- /**
- * Convert valid UTF-8 string into UTF-16LE string.
- *
- * This function assumes that the input string is valid UTF-8.
- *
- * @param input the UTF-8 string to convert
- * @param length the length of the string in bytes
- * @param utf16_buffer the pointer to buffer that can hold conversion result
- * @return the number of written char16_t
- */
- simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf16le(const char* input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
-
- /**
- * Convert valid UTF-8 string into UTF-16BE string.
- *
- * This function assumes that the input string is valid UTF-8.
- *
- * @param input the UTF-8 string to convert
- * @param length the length of the string in bytes
- * @param utf16_buffer the pointer to buffer that can hold conversion result
- * @return the number of written char16_t
- */
- simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf16be(const char* input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
-
- /**
- * Convert valid UTF-8 string into UTF-32 string.
- *
- * This function assumes that the input string is valid UTF-8.
- *
- * @param input the UTF-8 string to convert
- * @param length the length of the string in bytes
- * @param utf16_buffer the pointer to buffer that can hold conversion result
- * @return the number of written char32_t
- */
- simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf32(const char* input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
- /**
- * Compute the number of 2-byte words that this UTF-8 string would require in UTF-16LE format.
- *
- * This function does not validate the input.
- *
- * @param input the UTF-8 string to process
- * @param length the length of the string in bytes
- * @return the number of char16_t words required to encode the UTF-8 string as UTF-16LE
- */
- simdutf_warn_unused virtual size_t utf16_length_from_utf8(const char* input, size_t length) const noexcept = 0;
-
- /**
- * Compute the number of 4-byte words that this UTF-8 string would require in UTF-32 format.
- *
- * This function is equivalent to count_utf8.
- *
- * This function does not validate the input.
- *
- * @param input the UTF-8 string to process
- * @param length the length of the string in bytes
- * @return the number of char32_t words required to encode the UTF-8 string as UTF-32
- */
- simdutf_warn_unused virtual size_t utf32_length_from_utf8(const char* input, size_t length) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-16LE string into Latin1 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16LE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param latin1_buffer the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-16LE string
- */
- simdutf_warn_unused virtual size_t convert_utf16le_to_latin1(const char16_t* input, size_t length, char* latin1_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-16BE string into Latin1 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16BE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param latin1_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
- */
- simdutf_warn_unused virtual size_t convert_utf16be_to_latin1(const char16_t* input, size_t length, char* latin1_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-16LE string into Latin1 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- * This function is not BOM-aware.
- *
- * @param input the UTF-16LE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param latin1_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
- */
- simdutf_warn_unused virtual result convert_utf16le_to_latin1_with_errors(const char16_t* input, size_t length, char* latin1_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-16BE string into Latin1 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- * This function is not BOM-aware.
- *
- * @param input the UTF-16BE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param latin1_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
- */
- simdutf_warn_unused virtual result convert_utf16be_to_latin1_with_errors(const char16_t* input, size_t length, char* latin1_buffer) const noexcept = 0;
-
- /**
- * Convert valid UTF-16LE string into Latin1 string.
- *
- * This function assumes that the input string is valid UTF-8.
-
- * This function is not BOM-aware.
- *
- * @param input the UTF-16LE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param latin1_buffer the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if conversion is not possible
- */
- simdutf_warn_unused virtual size_t convert_valid_utf16le_to_latin1(const char16_t* input, size_t length, char* latin1_buffer) const noexcept = 0;
-
- /**
- * Convert valid UTF-16BE string into Latin1 string.
- *
- * This function assumes that the input string is valid UTF-8.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16BE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param latin1_buffer the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if conversion is not possible
- */
- simdutf_warn_unused virtual size_t convert_valid_utf16be_to_latin1(const char16_t* input, size_t length, char* latin1_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-16LE string into UTF-8 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16LE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param utf8_buffer the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-16LE string
- */
- simdutf_warn_unused virtual size_t convert_utf16le_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-16BE string into UTF-8 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16BE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param utf8_buffer the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-16BE string
- */
- simdutf_warn_unused virtual size_t convert_utf16be_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16LE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param utf8_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
- */
- simdutf_warn_unused virtual result convert_utf16le_to_utf8_with_errors(const char16_t* input, size_t length, char* utf8_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16BE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param utf8_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
- */
- simdutf_warn_unused virtual result convert_utf16be_to_utf8_with_errors(const char16_t* input, size_t length, char* utf8_buffer) const noexcept = 0;
-
- /**
- * Convert valid UTF-16LE string into UTF-8 string.
- *
- * This function assumes that the input string is valid UTF-16LE.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16LE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param utf8_buffer the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
- */
- simdutf_warn_unused virtual size_t convert_valid_utf16le_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) const noexcept = 0;
-
- /**
- * Convert valid UTF-16BE string into UTF-8 string.
- *
- * This function assumes that the input string is valid UTF-16BE.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16BE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param utf8_buffer the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
- */
- simdutf_warn_unused virtual size_t convert_valid_utf16be_to_utf8(const char16_t* input, size_t length, char* utf8_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-16LE string into UTF-32 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16LE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param utf32_buffer the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-16LE string
- */
- simdutf_warn_unused virtual size_t convert_utf16le_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-16BE string into UTF-32 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16BE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param utf32_buffer the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-16BE string
- */
- simdutf_warn_unused virtual size_t convert_utf16be_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16LE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param utf32_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
- */
- simdutf_warn_unused virtual result convert_utf16le_to_utf32_with_errors(const char16_t* input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16BE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param utf32_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
- */
- simdutf_warn_unused virtual result convert_utf16be_to_utf32_with_errors(const char16_t* input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
-
- /**
- * Convert valid UTF-16LE string into UTF-32 string.
- *
- * This function assumes that the input string is valid UTF-16LE.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16LE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param utf32_buffer the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
- */
- simdutf_warn_unused virtual size_t convert_valid_utf16le_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
-
- /**
- * Convert valid UTF-16LE string into UTF-32BE string.
- *
- * This function assumes that the input string is valid UTF-16BE.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16BE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @param utf32_buffer the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
- */
- simdutf_warn_unused virtual size_t convert_valid_utf16be_to_utf32(const char16_t* input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
-
- /**
- * Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
- *
- * This function does not validate the input.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16LE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @return the number of bytes required to encode the UTF-16LE string as UTF-8
- */
- simdutf_warn_unused virtual size_t utf8_length_from_utf16le(const char16_t* input, size_t length) const noexcept = 0;
-
- /**
- * Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
- *
- * This function does not validate the input.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16BE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @return the number of bytes required to encode the UTF-16BE string as UTF-8
- */
- simdutf_warn_unused virtual size_t utf8_length_from_utf16be(const char16_t* input, size_t length) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-32 string into Latin1 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @param latin1_buffer the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-32 string
- */
-
- simdutf_warn_unused virtual size_t convert_utf32_to_latin1(const char32_t* input, size_t length, char* latin1_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-32 string into Latin1 string and stop on error.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @param latin1_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
- */
-
- simdutf_warn_unused virtual result convert_utf32_to_latin1_with_errors(const char32_t* input, size_t length, char* latin1_buffer) const noexcept = 0;
-
- /**
- * Convert valid UTF-32 string into Latin1 string.
- *
- * This function assumes that the input string is valid UTF-32.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @param latin1_buffer the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
- */
- simdutf_warn_unused virtual size_t convert_valid_utf32_to_latin1(const char32_t* input, size_t length, char* latin1_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-32 string into UTF-8 string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @param utf8_buffer the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-32 string
- */
- simdutf_warn_unused virtual size_t convert_utf32_to_utf8(const char32_t* input, size_t length, char* utf8_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @param utf8_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
- */
- simdutf_warn_unused virtual result convert_utf32_to_utf8_with_errors(const char32_t* input, size_t length, char* utf8_buffer) const noexcept = 0;
-
- /**
- * Convert valid UTF-32 string into UTF-8 string.
- *
- * This function assumes that the input string is valid UTF-32.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @param utf8_buffer the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
- */
- simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf8(const char32_t* input, size_t length, char* utf8_buffer) const noexcept = 0;
-
- /**
- * Return the number of bytes that this UTF-16 string would require in Latin1 format.
- *
- *
- * @param input the UTF-16 string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @return the number of bytes required to encode the UTF-16 string as Latin1
- */
+ /**
+ * The name of this implementation.
+ *
+ * const implementation *impl = simdutf::active_implementation;
+ * cout << "simdutf is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
+ *
+ * @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
+ */
+ virtual const std::string &name() const { return _name; }
+
+ /**
+ * The description of this implementation.
+ *
+ * const implementation *impl = simdutf::active_implementation;
+ * cout << "simdutf is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
+ *
+ * @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
+ */
+ virtual const std::string &description() const { return _description; }
+
+ /**
+ * The instruction sets this implementation is compiled against
+ * and the current CPU match. This function may poll the current CPU/system
+ * and should therefore not be called too often if performance is a concern.
+ *
+ *
+ * @return true if the implementation can be safely used on the current system (determined at runtime)
+ */
+ bool supported_by_runtime_system() const;
+
+ /**
+ * This function will try to detect the encoding
+ * @param input the string to identify
+ * @param length the length of the string in bytes.
+ * @return the encoding type detected
+ */
+ virtual encoding_type autodetect_encoding(const char * input, size_t length) const noexcept;
+
+ /**
+ * This function will try to detect the possible encodings in one pass
+ * @param input the string to identify
+ * @param length the length of the string in bytes.
+ * @return the encoding type detected
+ */
+ virtual int detect_encodings(const char * input, size_t length) const noexcept = 0;
+
+ /**
+ * @private For internal implementation use
+ *
+ * The instruction sets this implementation is compiled against.
+ *
+ * @return a mask of all required `internal::instruction_set::` values
+ */
+ virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; }
+
+
+ /**
+ * Validate the UTF-8 string.
+ *
+ * Overridden by each implementation.
+ *
+ * @param buf the UTF-8 string to validate.
+ * @param len the length of the string in bytes.
+ * @return true if and only if the string is valid UTF-8.
+ */
+ simdutf_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0;
+
+ /**
+ * Validate the UTF-8 string and stop on errors.
+ *
+ * Overridden by each implementation.
+ *
+ * @param buf the UTF-8 string to validate.
+ * @param len the length of the string in bytes.
+ * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ */
+ simdutf_warn_unused virtual result validate_utf8_with_errors(const char *buf, size_t len) const noexcept = 0;
+
+ /**
+ * Validate the ASCII string.
+ *
+ * Overridden by each implementation.
+ *
+ * @param buf the ASCII string to validate.
+ * @param len the length of the string in bytes.
+ * @return true if and only if the string is valid ASCII.
+ */
+ simdutf_warn_unused virtual bool validate_ascii(const char *buf, size_t len) const noexcept = 0;
+
+ /**
+ * Validate the ASCII string and stop on error.
+ *
+ * Overridden by each implementation.
+ *
+ * @param buf the ASCII string to validate.
+ * @param len the length of the string in bytes.
+ * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ */
+ simdutf_warn_unused virtual result validate_ascii_with_errors(const char *buf, size_t len) const noexcept = 0;
+
+ /**
+ * Validate the UTF-16LE string.This function may be best when you expect
+ * the input to be almost always valid. Otherwise, consider using
+ * validate_utf16le_with_errors.
+ *
+ * Overridden by each implementation.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param buf the UTF-16LE string to validate.
+ * @param len the length of the string in number of 2-byte words (char16_t).
+ * @return true if and only if the string is valid UTF-16LE.
+ */
+ simdutf_warn_unused virtual bool validate_utf16le(const char16_t *buf, size_t len) const noexcept = 0;
+
+ /**
+ * Validate the UTF-16BE string. This function may be best when you expect
+ * the input to be almost always valid. Otherwise, consider using
+ * validate_utf16be_with_errors.
+ *
+ * Overridden by each implementation.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param buf the UTF-16BE string to validate.
+ * @param len the length of the string in number of 2-byte words (char16_t).
+ * @return true if and only if the string is valid UTF-16BE.
+ */
+ simdutf_warn_unused virtual bool validate_utf16be(const char16_t *buf, size_t len) const noexcept = 0;
+
+ /**
+ * Validate the UTF-16LE string and stop on error. It might be faster than
+ * validate_utf16le when an error is expected to occur early.
+ *
+ * Overridden by each implementation.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param buf the UTF-16LE string to validate.
+ * @param len the length of the string in number of 2-byte words (char16_t).
+ * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ */
+ simdutf_warn_unused virtual result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept = 0;
+
+ /**
+ * Validate the UTF-16BE string and stop on error. It might be faster than
+ * validate_utf16be when an error is expected to occur early.
+ *
+ * Overridden by each implementation.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param buf the UTF-16BE string to validate.
+ * @param len the length of the string in number of 2-byte words (char16_t).
+ * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ */
+ simdutf_warn_unused virtual result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept = 0;
+
+ /**
+ * Validate the UTF-32 string.
+ *
+ * Overridden by each implementation.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param buf the UTF-32 string to validate.
+ * @param len the length of the string in number of 4-byte words (char32_t).
+ * @return true if and only if the string is valid UTF-32.
+ */
+ simdutf_warn_unused virtual bool validate_utf32(const char32_t *buf, size_t len) const noexcept = 0;
+
+ /**
+ * Validate the UTF-32 string and stop on error.
+ *
+ * Overridden by each implementation.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param buf the UTF-32 string to validate.
+ * @param len the length of the string in number of 4-byte words (char32_t).
+ * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ */
+ simdutf_warn_unused virtual result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept = 0;
+
+ /**
+ * Convert Latin1 string into UTF8 string.
+ *
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string in bytes
+ * @param latin1_output the pointer to buffer that can hold conversion result
+ * @return the number of written char; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_latin1_to_utf8(const char * input, size_t length, char* utf8_output) const noexcept = 0;
+
+
+ /**
+ * Convert possibly Latin1 string into UTF-16LE string.
+ *
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string in bytes
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char16_t; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_latin1_to_utf16le(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
+
+ /**
+ * Convert Latin1 string into UTF-16BE string.
+ *
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string in bytes
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char16_t; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_latin1_to_utf16be(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
+
+ /**
+ * Convert Latin1 string into UTF-32 string.
+ *
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string in bytes
+ * @param utf32_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char32_t; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_latin1_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-8 string into latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param latin1_output the pointer to buffer that can hold conversion result
+ * @return the number of written char; 0 if the input was not valid UTF-8 string
+ */
+ simdutf_warn_unused virtual size_t convert_utf8_to_latin1(const char * input, size_t length, char* latin1_output) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-8 string into latin1 string. with errors
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param latin1_output the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ */
+ simdutf_warn_unused virtual result convert_utf8_to_latin1_with_errors(const char * input, size_t length, char* latin1_output) const noexcept = 0;
+
+ /**
+ * Convert valid UTF-8 string into latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-8.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param latin1_output the pointer to buffer that can hold conversion result
+ * @return the number of written char; 0 if the input was not valid UTF-8 string
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf8_to_latin1(const char * input, size_t length, char* latin1_output) const noexcept = 0;
+
+
+ /**
+ * Convert possibly broken UTF-8 string into UTF-16LE string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
+ */
+ simdutf_warn_unused virtual size_t convert_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-8 string into UTF-16BE string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
+ */
+ simdutf_warn_unused virtual size_t convert_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-8 string into UTF-16LE string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ */
+ simdutf_warn_unused virtual result convert_utf8_to_utf16le_with_errors(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-8 string into UTF-16BE string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ */
+ simdutf_warn_unused virtual result convert_utf8_to_utf16be_with_errors(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-8 string into UTF-32 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param utf32_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char16_t; 0 if the input was not valid UTF-8 string
+ */
+ simdutf_warn_unused virtual size_t convert_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_output) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-8 string into UTF-32 string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param utf32_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
+ */
+ simdutf_warn_unused virtual result convert_utf8_to_utf32_with_errors(const char * input, size_t length, char32_t* utf32_output) const noexcept = 0;
+
+ /**
+ * Convert valid UTF-8 string into UTF-16LE string.
+ *
+ * This function assumes that the input string is valid UTF-8.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char16_t
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
+
+/**
+ * Convert valid UTF-8 string into UTF-16BE string.
+ *
+ * This function assumes that the input string is valid UTF-8.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char16_t
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
+
+ /**
+ * Convert valid UTF-8 string into UTF-32 string.
+ *
+ * This function assumes that the input string is valid UTF-8.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char32_t
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
+
+ /**
+ * Compute the number of 2-byte words that this UTF-8 string would require in UTF-16LE format.
+ *
+ * This function does not validate the input.
+ *
+ * @param input the UTF-8 string to process
+ * @param length the length of the string in bytes
+ * @return the number of char16_t words required to encode the UTF-8 string as UTF-16LE
+ */
+ simdutf_warn_unused virtual size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept = 0;
+
+ /**
+ * Compute the number of 4-byte words that this UTF-8 string would require in UTF-32 format.
+ *
+ * This function is equivalent to count_utf8.
+ *
+ * This function does not validate the input.
+ *
+ * @param input the UTF-8 string to process
+ * @param length the length of the string in bytes
+ * @return the number of char32_t words required to encode the UTF-8 string as UTF-32
+ */
+ simdutf_warn_unused virtual size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-16LE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-16LE string
+ */
+ simdutf_warn_unused virtual size_t convert_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-16BE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+ simdutf_warn_unused virtual size_t convert_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-16LE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+ simdutf_warn_unused virtual result convert_utf16le_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-16BE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+ simdutf_warn_unused virtual result convert_utf16be_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
+ * Convert valid UTF-16LE string into Latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-8.
+
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
+ * Convert valid UTF-16BE string into Latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-8.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-16LE string into UTF-8 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param utf8_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-16LE string
+ */
+ simdutf_warn_unused virtual size_t convert_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-16BE string into UTF-8 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param utf8_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-16BE string
+ */
+ simdutf_warn_unused virtual size_t convert_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-16LE string into UTF-8 string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param utf8_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+ simdutf_warn_unused virtual result convert_utf16le_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-16BE string into UTF-8 string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param utf8_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+ simdutf_warn_unused virtual result convert_utf16be_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
+
+ /**
+ * Convert valid UTF-16LE string into UTF-8 string.
+ *
+ * This function assumes that the input string is valid UTF-16LE.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param utf8_buffer the pointer to buffer that can hold the conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
+
+ /**
+ * Convert valid UTF-16BE string into UTF-8 string.
+ *
+ * This function assumes that the input string is valid UTF-16BE.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param utf8_buffer the pointer to buffer that can hold the conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-16LE string into UTF-32 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param utf32_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-16LE string
+ */
+ simdutf_warn_unused virtual size_t convert_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-16BE string into UTF-32 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param utf32_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-16BE string
+ */
+ simdutf_warn_unused virtual size_t convert_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-16LE string into UTF-32 string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param utf32_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
+ */
+ simdutf_warn_unused virtual result convert_utf16le_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-16BE string into UTF-32 string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param utf32_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
+ */
+ simdutf_warn_unused virtual result convert_utf16be_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
+
+ /**
+ * Convert valid UTF-16LE string into UTF-32 string.
+ *
+ * This function assumes that the input string is valid UTF-16LE.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param utf32_buffer the pointer to buffer that can hold the conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
+
+ /**
+ * Convert valid UTF-16LE string into UTF-32BE string.
+ *
+ * This function assumes that the input string is valid UTF-16BE.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param utf32_buffer the pointer to buffer that can hold the conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
+
+ /**
+ * Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
+ *
+ * This function does not validate the input.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @return the number of bytes required to encode the UTF-16LE string as UTF-8
+ */
+ simdutf_warn_unused virtual size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept = 0;
+
+ /**
+ * Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
+ *
+ * This function does not validate the input.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @return the number of bytes required to encode the UTF-16BE string as UTF-8
+ */
+ simdutf_warn_unused virtual size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-32 string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-32 string
+ */
+
+ simdutf_warn_unused virtual size_t convert_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-32 string into Latin1 string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+
+ simdutf_warn_unused virtual result convert_utf32_to_latin1_with_errors(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
+ * Convert valid UTF-32 string into Latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-32.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param latin1_buffer the pointer to buffer that can hold the conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-32 string into UTF-8 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param utf8_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-32 string
+ */
+ simdutf_warn_unused virtual size_t convert_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-32 string into UTF-8 string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param utf8_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+ simdutf_warn_unused virtual result convert_utf32_to_utf8_with_errors(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
+
+ /**
+ * Convert valid UTF-32 string into UTF-8 string.
+ *
+ * This function assumes that the input string is valid UTF-32.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param utf8_buffer the pointer to buffer that can hold the conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
+
+
+ /**
+ * Return the number of bytes that this UTF-16 string would require in Latin1 format.
+ *
+ *
+ * @param input the UTF-16 string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @return the number of bytes required to encode the UTF-16 string as Latin1
+ */
simdutf_warn_unused virtual size_t utf16_length_from_latin1(size_t length) const noexcept = 0;
- /**
- * Convert possibly broken UTF-32 string into UTF-16LE string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @param utf16_buffer the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-32 string
- */
- simdutf_warn_unused virtual size_t convert_utf32_to_utf16le(const char32_t* input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-32 string into UTF-16BE string.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @param utf16_buffer the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-32 string
- */
- simdutf_warn_unused virtual size_t convert_utf32_to_utf16be(const char32_t* input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @param utf16_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
- */
- simdutf_warn_unused virtual result convert_utf32_to_utf16le_with_errors(const char32_t* input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
-
- /**
- * Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
- *
- * During the conversion also validation of the input string is done.
- * This function is suitable to work with inputs from untrusted sources.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @param utf16_buffer the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
- */
- simdutf_warn_unused virtual result convert_utf32_to_utf16be_with_errors(const char32_t* input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
-
- /**
- * Convert valid UTF-32 string into UTF-16LE string.
- *
- * This function assumes that the input string is valid UTF-32.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @param utf16_buffer the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
- */
- simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf16le(const char32_t* input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
-
- /**
- * Convert valid UTF-32 string into UTF-16BE string.
- *
- * This function assumes that the input string is valid UTF-32.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @param utf16_buffer the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
- */
- simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf16be(const char32_t* input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
+ /**
+ * Convert possibly broken UTF-32 string into UTF-16LE string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-32 string
+ */
+ simdutf_warn_unused virtual size_t convert_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-32 string into UTF-16BE string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-32 string
+ */
+ simdutf_warn_unused virtual size_t convert_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-32 string into UTF-16LE string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
+ */
+ simdutf_warn_unused virtual result convert_utf32_to_utf16le_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-32 string into UTF-16BE string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
+ */
+ simdutf_warn_unused virtual result convert_utf32_to_utf16be_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
+
+ /**
+ * Convert valid UTF-32 string into UTF-16LE string.
+ *
+ * This function assumes that the input string is valid UTF-32.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param utf16_buffer the pointer to buffer that can hold the conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
+
+ /**
+ * Convert valid UTF-32 string into UTF-16BE string.
+ *
+ * This function assumes that the input string is valid UTF-32.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param utf16_buffer the pointer to buffer that can hold the conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
+
+ /**
+ * Change the endianness of the input. Can be used to go from UTF-16LE to UTF-16BE or
+ * from UTF-16BE to UTF-16LE.
+ *
+ * This function does not validate the input.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16 string to process
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param output the pointer to buffer that can hold the conversion result
+ */
+ virtual void change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept = 0;
+
+ /**
+ * Return the number of bytes that this Latin1 string would require in UTF-8 format.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string bytes
+ * @return the number of bytes required to encode the Latin1 string as UTF-8
+ */
+ simdutf_warn_unused virtual size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept = 0;
+
+ /**
+ * Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
+ *
+ * This function does not validate the input.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @return the number of bytes required to encode the UTF-32 string as UTF-8
+ */
+ simdutf_warn_unused virtual size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0;
+
+ /**
+ * Compute the number of bytes that this UTF-32 string would require in Latin1 format.
+ *
+ * This function does not validate the input.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @return the number of bytes required to encode the UTF-32 string as Latin1
+ */
+ simdutf_warn_unused virtual size_t latin1_length_from_utf32( size_t length) const noexcept = 0;
+
+ /**
+ * Compute the number of bytes that this UTF-8 string would require in Latin1 format.
+ *
+ * This function does not validate the input.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in byte
+ * @return the number of bytes required to encode the UTF-8 string as Latin1
+ */
+ simdutf_warn_unused virtual size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept = 0;
- /**
- * Change the endianness of the input. Can be used to go from UTF-16LE to UTF-16BE or
- * from UTF-16BE to UTF-16LE.
- *
- * This function does not validate the input.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16 string to process
- * @param length the length of the string in 2-byte words (char16_t)
- * @param output the pointer to buffer that can hold the conversion result
- */
- virtual void change_endianness_utf16(const char16_t* input, size_t length, char16_t* output) const noexcept = 0;
-
- /**
- * Return the number of bytes that this Latin1 string would require in UTF-8 format.
- *
- * @param input the Latin1 string to convert
- * @param length the length of the string bytes
- * @return the number of bytes required to encode the Latin1 string as UTF-8
- */
- simdutf_warn_unused virtual size_t utf8_length_from_latin1(const char* input, size_t length) const noexcept = 0;
-
- /**
- * Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
- *
- * This function does not validate the input.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @return the number of bytes required to encode the UTF-32 string as UTF-8
- */
- simdutf_warn_unused virtual size_t utf8_length_from_utf32(const char32_t* input, size_t length) const noexcept = 0;
-
- /**
- * Compute the number of bytes that this UTF-32 string would require in Latin1 format.
- *
- * This function does not validate the input.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @return the number of bytes required to encode the UTF-32 string as Latin1
- */
- simdutf_warn_unused virtual size_t latin1_length_from_utf32(size_t length) const noexcept = 0;
-
- /**
- * Compute the number of bytes that this UTF-8 string would require in Latin1 format.
- *
- * This function does not validate the input.
- *
- * @param input the UTF-8 string to convert
- * @param length the length of the string in byte
- * @return the number of bytes required to encode the UTF-8 string as Latin1
- */
- simdutf_warn_unused virtual size_t latin1_length_from_utf8(const char* input, size_t length) const noexcept = 0;
-
- /*
- * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
- *
- * This function does not validate the input.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16LE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @return the number of bytes required to encode the UTF-16LE string as Latin1
- */
- simdutf_warn_unused virtual size_t latin1_length_from_utf16(size_t length) const noexcept = 0;
-
- /**
- * Compute the number of two-byte words that this UTF-32 string would require in UTF-16 format.
- *
- * This function does not validate the input.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @return the number of bytes required to encode the UTF-32 string as UTF-16
- */
- simdutf_warn_unused virtual size_t utf16_length_from_utf32(const char32_t* input, size_t length) const noexcept = 0;
-
- /**
- * Return the number of bytes that this UTF-32 string would require in Latin1 format.
- *
- * This function does not validate the input.
- *
- * @param input the UTF-32 string to convert
- * @param length the length of the string in 4-byte words (char32_t)
- * @return the number of bytes required to encode the UTF-32 string as Latin1
- */
+/*
+ * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
+ *
+ * This function does not validate the input.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @return the number of bytes required to encode the UTF-16LE string as Latin1
+ */
+ simdutf_warn_unused virtual size_t latin1_length_from_utf16(size_t length) const noexcept = 0;
+
+ /**
+ * Compute the number of two-byte words that this UTF-32 string would require in UTF-16 format.
+ *
+ * This function does not validate the input.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @return the number of bytes required to encode the UTF-32 string as UTF-16
+ */
+ simdutf_warn_unused virtual size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0;
+
+
+ /**
+ * Return the number of bytes that this UTF-32 string would require in Latin1 format.
+ *
+ * This function does not validate the input.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @return the number of bytes required to encode the UTF-32 string as Latin1
+ */
simdutf_warn_unused virtual size_t utf32_length_from_latin1(size_t length) const noexcept = 0;
- /*
- * Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
- *
- * This function is equivalent to count_utf16le.
- *
- * This function does not validate the input.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16LE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @return the number of bytes required to encode the UTF-16LE string as UTF-32
- */
- simdutf_warn_unused virtual size_t utf32_length_from_utf16le(const char16_t* input, size_t length) const noexcept = 0;
-
- /*
- * Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
- *
- * This function is equivalent to count_utf16be.
- *
- * This function does not validate the input.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16BE string to convert
- * @param length the length of the string in 2-byte words (char16_t)
- * @return the number of bytes required to encode the UTF-16BE string as UTF-32
- */
- simdutf_warn_unused virtual size_t utf32_length_from_utf16be(const char16_t* input, size_t length) const noexcept = 0;
+ /*
+ * Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
+ *
+ * This function is equivalent to count_utf16le.
+ *
+ * This function does not validate the input.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @return the number of bytes required to encode the UTF-16LE string as UTF-32
+ */
+ simdutf_warn_unused virtual size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept = 0;
+
+ /*
+ * Compute the number of bytes that this UTF-16BE string would require in UTF-32 format.
+ *
+ * This function is equivalent to count_utf16be.
+ *
+ * This function does not validate the input.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @return the number of bytes required to encode the UTF-16BE string as UTF-32
+ */
+ simdutf_warn_unused virtual size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept = 0;
+
+ /**
+ * Count the number of code points (characters) in the string assuming that
+ * it is valid.
+ *
+ * This function assumes that the input string is valid UTF-16LE.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to process
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @return number of code points
+ */
+ simdutf_warn_unused virtual size_t count_utf16le(const char16_t * input, size_t length) const noexcept = 0;
+
+ /**
+ * Count the number of code points (characters) in the string assuming that
+ * it is valid.
+ *
+ * This function assumes that the input string is valid UTF-16BE.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to process
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @return number of code points
+ */
+ simdutf_warn_unused virtual size_t count_utf16be(const char16_t * input, size_t length) const noexcept = 0;
+
+
+ /**
+ * Count the number of code points (characters) in the string assuming that
+ * it is valid.
+ *
+ * This function assumes that the input string is valid UTF-8.
+ *
+ * @param input the UTF-8 string to process
+ * @param length the length of the string in bytes
+ * @return number of code points
+ */
+ simdutf_warn_unused virtual size_t count_utf8(const char * input, size_t length) const noexcept = 0;
- /**
- * Count the number of code points (characters) in the string assuming that
- * it is valid.
- *
- * This function assumes that the input string is valid UTF-16LE.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16LE string to process
- * @param length the length of the string in 2-byte words (char16_t)
- * @return number of code points
- */
- simdutf_warn_unused virtual size_t count_utf16le(const char16_t* input, size_t length) const noexcept = 0;
- /**
- * Count the number of code points (characters) in the string assuming that
- * it is valid.
- *
- * This function assumes that the input string is valid UTF-16BE.
- *
- * This function is not BOM-aware.
- *
- * @param input the UTF-16BE string to process
- * @param length the length of the string in 2-byte words (char16_t)
- * @return number of code points
- */
- simdutf_warn_unused virtual size_t count_utf16be(const char16_t* input, size_t length) const noexcept = 0;
-
- /**
- * Count the number of code points (characters) in the string assuming that
- * it is valid.
- *
- * This function assumes that the input string is valid UTF-8.
- *
- * @param input the UTF-8 string to process
- * @param length the length of the string in bytes
- * @return number of code points
- */
- simdutf_warn_unused virtual size_t count_utf8(const char* input, size_t length) const noexcept = 0;
protected:
- /** @private Construct an implementation with the given name and description. For subclasses. */
- simdutf_really_inline implementation(
- std::string name,
- std::string description,
- uint32_t required_instruction_sets)
- : _name(name)
- , _description(description)
- , _required_instruction_sets(required_instruction_sets)
- {
- }
- virtual ~implementation() = default;
+ /** @private Construct an implementation with the given name and description. For subclasses. */
+ simdutf_really_inline implementation(
+ std::string name,
+ std::string description,
+ uint32_t required_instruction_sets
+ ) :
+ _name(name),
+ _description(description),
+ _required_instruction_sets(required_instruction_sets)
+ {
+ }
+ virtual ~implementation()=default;
private:
- /**
- * The name of this implementation.
- */
- const std::string _name;
-
- /**
- * The description of this implementation.
- */
- const std::string _description;
-
- /**
- * Instruction sets required for this implementation.
- */
- const uint32_t _required_instruction_sets;
+ /**
+ * The name of this implementation.
+ */
+ const std::string _name;
+
+ /**
+ * The description of this implementation.
+ */
+ const std::string _description;
+
+ /**
+ * Instruction sets required for this implementation.
+ */
+ const uint32_t _required_instruction_sets;
};
/** @private */
@@ -3210,102 +3223,82 @@ namespace internal {
*/
class available_implementation_list {
public:
- /** Get the list of available implementations compiled into simdutf */
- simdutf_really_inline available_implementation_list() {}
- /** Number of implementations */
- size_t size() const noexcept;
- /** STL const begin() iterator */
- const implementation* const* begin() const noexcept;
- /** STL const end() iterator */
- const implementation* const* end() const noexcept;
-
- /**
- * Get the implementation with the given name.
- *
- * Case sensitive.
- *
- * const implementation *impl = simdutf::available_implementations["westmere"];
- * if (!impl) { exit(1); }
- * if (!imp->supported_by_runtime_system()) { exit(1); }
- * simdutf::active_implementation = impl;
- *
- * @param name the implementation to find, e.g. "westmere", "haswell", "arm64"
- * @return the implementation, or nullptr if the parse failed.
- */
- const implementation* operator[](const std::string& name) const noexcept
- {
- for (const implementation* impl : *this) {
- if (impl->name() == name) {
- return impl;
- }
- }
- return nullptr;
+ /** Get the list of available implementations compiled into simdutf */
+ simdutf_really_inline available_implementation_list() {}
+ /** Number of implementations */
+ size_t size() const noexcept;
+ /** STL const begin() iterator */
+ const implementation * const *begin() const noexcept;
+ /** STL const end() iterator */
+ const implementation * const *end() const noexcept;
+
+ /**
+ * Get the implementation with the given name.
+ *
+ * Case sensitive.
+ *
+ * const implementation *impl = simdutf::available_implementations["westmere"];
+ * if (!impl) { exit(1); }
+ * if (!imp->supported_by_runtime_system()) { exit(1); }
+ * simdutf::active_implementation = impl;
+ *
+ * @param name the implementation to find, e.g. "westmere", "haswell", "arm64"
+ * @return the implementation, or nullptr if the parse failed.
+ */
+ const implementation * operator[](const std::string &name) const noexcept {
+ for (const implementation * impl : *this) {
+ if (impl->name() == name) { return impl; }
}
-
- /**
- * Detect the most advanced implementation supported by the current host.
- *
- * This is used to initialize the implementation on startup.
- *
- * const implementation *impl = simdutf::available_implementation::detect_best_supported();
- * simdutf::active_implementation = impl;
- *
- * @return the most advanced supported implementation for the current host, or an
- * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported
- * implementation. Will never return nullptr.
- */
- const implementation* detect_best_supported() const noexcept;
+ return nullptr;
+ }
+
+ /**
+ * Detect the most advanced implementation supported by the current host.
+ *
+ * This is used to initialize the implementation on startup.
+ *
+ * const implementation *impl = simdutf::available_implementation::detect_best_supported();
+ * simdutf::active_implementation = impl;
+ *
+ * @return the most advanced supported implementation for the current host, or an
+ * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported
+ * implementation. Will never return nullptr.
+ */
+ const implementation *detect_best_supported() const noexcept;
};
template<typename T>
class atomic_ptr {
public:
- atomic_ptr(T* _ptr)
- : ptr { _ptr }
- {
- }
+ atomic_ptr(T *_ptr) : ptr{_ptr} {}
#if defined(SIMDUTF_NO_THREADS)
- operator const T*() const
- {
- return ptr;
- }
- const T& operator*() const { return *ptr; }
- const T* operator->() const { return ptr; }
-
- operator T*() { return ptr; }
- T& operator*() { return *ptr; }
- T* operator->() { return ptr; }
- atomic_ptr& operator=(T* _ptr)
- {
- ptr = _ptr;
- return *this;
- }
+ operator const T*() const { return ptr; }
+ const T& operator*() const { return *ptr; }
+ const T* operator->() const { return ptr; }
+
+ operator T*() { return ptr; }
+ T& operator*() { return *ptr; }
+ T* operator->() { return ptr; }
+ atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; }
#else
- operator const T*() const
- {
- return ptr.load();
- }
- const T& operator*() const { return *ptr; }
- const T* operator->() const { return ptr.load(); }
-
- operator T*() { return ptr.load(); }
- T& operator*() { return *ptr; }
- T* operator->() { return ptr.load(); }
- atomic_ptr& operator=(T* _ptr)
- {
- ptr = _ptr;
- return *this;
- }
+ operator const T*() const { return ptr.load(); }
+ const T& operator*() const { return *ptr; }
+ const T* operator->() const { return ptr.load(); }
+
+ operator T*() { return ptr.load(); }
+ T& operator*() { return *ptr; }
+ T* operator->() { return ptr.load(); }
+ atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; }
#endif
private:
#if defined(SIMDUTF_NO_THREADS)
- T* ptr;
+ T* ptr;
#else
- std::atomic<T*> ptr;
+ std::atomic<T*> ptr;
#endif
};
@@ -3319,22 +3312,25 @@ class detect_best_supported_implementation_on_first_use;
extern SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations();
/**
- * The active implementation.
- *
- * Automatically initialized on first use to the most advanced implementation supported by this hardware.
- */
+ * The active implementation.
+ *
+ * Automatically initialized on first use to the most advanced implementation supported by this hardware.
+ */
extern SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation();
+
} // namespace simdutf
#endif // SIMDUTF_IMPLEMENTATION_H
/* end file include/simdutf/implementation.h */
+
// Implementation-internal files (must be included before the implementations themselves, to keep
// amalgamation working--otherwise, the first time a file is included, it might be put inside the
// #ifdef SIMDUTF_IMPLEMENTATION_ARM64/FALLBACK/etc., which means the other implementations can't
// compile unless that implementation is turned on).
+
SIMDUTF_POP_DISABLE_WARNINGS
#endif // SIMDUTF_H