diff options
Diffstat (limited to 'src/bun.js/bindings/simdutf.h')
-rw-r--r-- | src/bun.js/bindings/simdutf.h | 659 |
1 files changed, 619 insertions, 40 deletions
diff --git a/src/bun.js/bindings/simdutf.h b/src/bun.js/bindings/simdutf.h index 0a57a69f7..7fb388e9e 100644 --- a/src/bun.js/bindings/simdutf.h +++ b/src/bun.js/bindings/simdutf.h @@ -1,11 +1,11 @@ -/* auto-generated on 2023-02-10 14:42:58 -0500. Do not edit! */ -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf.h +/* auto-generated on 2023-06-21 08:09:45 -0400. Do not edit! */ +// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf.h /* begin file include/simdutf.h */ #ifndef SIMDUTF_H #define SIMDUTF_H #include <cstring> -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/compiler_check.h +// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/compiler_check.h /* begin file include/simdutf/compiler_check.h */ #ifndef SIMDUTF_COMPILER_CHECK_H #define SIMDUTF_COMPILER_CHECK_H @@ -43,13 +43,13 @@ #endif // SIMDUTF_COMPILER_CHECK_H /* end file include/simdutf/compiler_check.h */ -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/common_defs.h +// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/common_defs.h /* begin file include/simdutf/common_defs.h */ #ifndef SIMDUTF_COMMON_DEFS_H #define SIMDUTF_COMMON_DEFS_H #include <cassert> -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/portability.h +// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/portability.h /* begin file include/simdutf/portability.h */ #ifndef SIMDUTF_PORTABILITY_H #define SIMDUTF_PORTABILITY_H @@ -144,6 +144,8 @@ // POWER processors. Please see https://github.com/lemire/simdutf/issues/51 #elif defined(__s390__) // s390 IBM system. Big endian. +#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64 +// RISC-V 64-bit #else // The simdutf library is designed // for 64-bit processors and it seems that you are not @@ -278,7 +280,7 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") #endif // SIMDUTF_PORTABILITY_H /* end file include/simdutf/portability.h */ -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/avx512.h +// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/avx512.h /* begin file include/simdutf/avx512.h */ #ifndef SIMDUTF_AVX512_H_ #define SIMDUTF_AVX512_H_ @@ -458,19 +460,21 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") #endif // MSC_VER -#if defined(SIMDUTF_VISUAL_STUDIO) - /** - * It does not matter here whether you are using - * the regular visual studio or clang under visual - * studio. - */ - #if SIMDUTF_USING_LIBRARY - #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllimport) +#ifndef SIMDUTF_DLLIMPORTEXPORT + #if defined(SIMDUTF_VISUAL_STUDIO) + /** + * It does not matter here whether you are using + * the regular visual studio or clang under visual + * studio. + */ + #if SIMDUTF_USING_LIBRARY + #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllimport) + #else + #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllexport) + #endif #else - #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllexport) + #define SIMDUTF_DLLIMPORTEXPORT #endif -#else - #define SIMDUTF_DLLIMPORTEXPORT #endif /// If EXPR is an error, returns it. @@ -479,7 +483,7 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") #endif // SIMDUTF_COMMON_DEFS_H /* end file include/simdutf/common_defs.h */ -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/encoding_types.h +// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/encoding_types.h /* begin file include/simdutf/encoding_types.h */ #include <string> @@ -491,6 +495,7 @@ enum encoding_type { UTF16_BE = 4, // BOM 0xfe 0xff UTF32_LE = 8, // BOM 0xff 0xfe 0x00 0x00 UTF32_BE = 16, // BOM 0x00 0x00 0xfe 0xff + Latin1 = 32, unspecified = 0 }; @@ -527,7 +532,7 @@ size_t bom_byte_size(encoding_type bom); } // BOM namespace } // simdutf namespace /* end file include/simdutf/encoding_types.h */ -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/error.h +// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/error.h /* begin file include/simdutf/error.h */ #ifndef ERROR_H #define ERROR_H @@ -541,9 +546,10 @@ enum error_code { TOO_LONG, // We either have too many consecutive continuation bytes or the string starts with a continuation byte. OVERLONG, // The decoded character must be above U+7F for two-byte characters, U+7FF for three-byte characters, // and U+FFFF for four-byte characters. - TOO_LARGE, // The decoded character must be less than or equal to U+10FFFF OR less than or equal than U+7F for ASCII. + TOO_LARGE, // The decoded character must be less than or equal to U+10FFFF,less than or equal than U+7F for ASCII OR less than equal than U+FF for Latin1 SURROGATE, // The decoded character must be not be in U+D800...DFFF (UTF-8 or UTF-32) OR - // a high surrogate must be followed by a low surrogate and a low surrogate must be preceded by a high surrogate (UTF-16) + // a high surrogate must be followed by a low surrogate and a low surrogate must be preceded by a high surrogate (UTF-16) OR + // there must be no surrogate at all (Latin1) OTHER // Not related to validation/transcoding. }; @@ -564,7 +570,7 @@ SIMDUTF_PUSH_DISABLE_WARNINGS SIMDUTF_DISABLE_UNDESIRED_WARNINGS // Public API -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/simdutf_version.h +// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/simdutf_version.h /* begin file include/simdutf/simdutf_version.h */ // /include/simdutf/simdutf_version.h automatically generated by release.py, // do not change by hand @@ -572,7 +578,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS #define SIMDUTF_SIMDUTF_VERSION_H /** The version of simdutf being used (major.minor.revision) */ -#define SIMDUTF_VERSION "3.2.0" +#define SIMDUTF_VERSION "3.2.14" namespace simdutf { enum { @@ -587,13 +593,13 @@ enum { /** * The revision (major.minor.REVISION) of simdutf being used. */ - SIMDUTF_VERSION_REVISION = 0 + SIMDUTF_VERSION_REVISION = 14 }; } // namespace simdutf #endif // SIMDUTF_SIMDUTF_VERSION_H /* end file include/simdutf/simdutf_version.h */ -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/implementation.h +// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/implementation.h /* begin file include/simdutf/implementation.h */ #ifndef SIMDUTF_IMPLEMENTATION_H #define SIMDUTF_IMPLEMENTATION_H @@ -603,7 +609,7 @@ enum { #endif #include <vector> #include <tuple> -// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/internal/isadetection.h +// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/internal/isadetection.h /* begin file include/simdutf/internal/isadetection.h */ /* From https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h @@ -690,22 +696,12 @@ static inline uint32_t detect_supported_architectures() { return instruction_set::ALTIVEC; } -#elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64 - -#if defined(__ARM_NEON) +#elif defined(__aarch64__) || defined(_M_ARM64) static inline uint32_t detect_supported_architectures() { return instruction_set::NEON; } -#else // ARM without NEON - -static inline uint32_t detect_supported_architectures() { - return instruction_set::DEFAULT; -} - -#endif - #elif defined(__x86_64__) || defined(_M_AMD64) // x64 @@ -716,6 +712,7 @@ namespace cpuid_bit { // EAX = 0x01 constexpr uint32_t pclmulqdq = uint32_t(1) << 1; ///< @private bit 1 of ECX for EAX=0x1 constexpr uint32_t sse42 = uint32_t(1) << 20; ///< @private bit 20 of ECX for EAX=0x1 + constexpr uint32_t osxsave = (uint32_t(1) << 26) | (uint32_t(1) << 27); ///< @private bits 26+27 of ECX for EAX=0x1 // EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf) // See: "Table 3-8. Information Returned by CPUID Instruction" @@ -741,6 +738,10 @@ namespace cpuid_bit { namespace edx { constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8; } + namespace xcr0_bit { + constexpr uint64_t avx256_saved = uint64_t(1) << 2; ///< @private bit 2 = AVX + constexpr uint64_t avx512_saved = uint64_t(7) << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM + } } } @@ -750,7 +751,7 @@ static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { #if defined(_MSC_VER) int cpu_info[4]; - __cpuid(cpu_info, *eax); + __cpuidex(cpu_info, *eax, *ecx); *eax = cpu_info[0]; *ebx = cpu_info[1]; *ecx = cpu_info[2]; @@ -768,6 +769,16 @@ static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, #endif } +static inline uint64_t xgetbv() { + #if defined(_MSC_VER) + return _xgetbv(0); + #else + uint32_t xcr0_lo, xcr0_hi; + asm volatile("xgetbv\n\t" : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0)); + return xcr0_lo | ((uint64_t)xcr0_hi << 32); + #endif + } + static inline uint32_t detect_supported_architectures() { uint32_t eax; uint32_t ebx = 0; @@ -787,6 +798,16 @@ static inline uint32_t detect_supported_architectures() { host_isa |= instruction_set::PCLMULQDQ; } + if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) { + return host_isa; + } + + // xgetbv for checking if the OS saves registers + uint64_t xcr0 = xgetbv(); + + if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) { + return host_isa; + } // ECX for EAX=0x7 eax = 0x7; ecx = 0x0; // Sub-leaf = 0 @@ -800,6 +821,9 @@ static inline uint32_t detect_supported_architectures() { if (ebx & cpuid_bit::ebx::bmi2) { host_isa |= instruction_set::BMI2; } + if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) == cpuid_bit::xcr0_bit::avx512_saved)) { + return host_isa; + } if (ebx & cpuid_bit::ebx::avx512f) { host_isa |= instruction_set::AVX512F; } @@ -822,7 +846,7 @@ static inline uint32_t detect_supported_architectures() { } #else // fallback - +// includes 32-bit ARM. static inline uint32_t detect_supported_architectures() { return instruction_set::DEFAULT; } @@ -870,7 +894,6 @@ simdutf_really_inline simdutf_warn_unused int detect_encodings(const uint8_t * i return detect_encodings(reinterpret_cast<const char *>(input), length); } - /** * Validate the UTF-8 string. This function may be best when you expect * the input to be almost always valid. Otherwise, consider using @@ -1034,6 +1057,68 @@ simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) noexcep */ simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) noexcept; + /** + * Convert Latin1 string into UTF8 string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if conversion is not possible + */ + simdutf_warn_unused size_t convert_latin1_to_utf8(const char * input, size_t length, char* utf8_output) noexcept; + + + /** + * Convert possibly Latin1 string into UTF-16LE string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if conversion is not possible + */ + simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * input, size_t length, char16_t* utf16_output) noexcept; + + /** + * Convert Latin1 string into UTF-16BE string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if conversion is not possible + */ + simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * input, size_t length, char16_t* utf16_output) noexcept; + + /** + * Convert Latin1 string into UTF-32 string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return the number of written char32_t; 0 if conversion is not possible + */ + simdutf_warn_unused size_t convert_latin1_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) noexcept; + + /** + * Convert possibly broken UTF-8 string into latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if the input was not valid UTF-8 string + */ + simdutf_warn_unused size_t convert_utf8_to_latin1(const char * input, size_t length, char* latin1_output) noexcept; + /** * Using native endianness; Convert possibly broken UTF-8 string into UTF-16 string. * @@ -1073,6 +1158,20 @@ simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * input, size_t le */ simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_output) noexcept; + + /** + * Convert possibly broken UTF-8 string into latin1 string. with errors + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. + */ + simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * input, size_t length, char* latin1_output) noexcept; + /** * Using native endianness; Convert possibly broken UTF-8 string into UTF-16 * string and stop on error. @@ -1139,6 +1238,21 @@ simdutf_warn_unused size_t convert_utf8_to_utf32(const char * input, size_t leng */ simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * input, size_t length, char32_t* utf32_output) noexcept; + /** + * Convert valid UTF-8 string into latin1 string. + * + * This function assumes that the input string is valid UTF-8. + * + * This function is not BOM-aware. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if the input was not valid UTF-8 string + */ + simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * input, size_t length, char* latin1_output) noexcept; + + /** * Using native endianness; Convert valid UTF-8 string into UTF-16 string. * @@ -1187,6 +1301,29 @@ simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * input, siz */ simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) noexcept; + + /** + * Return the number of bytes that this Latin1 string would require in UTF-8 format. + * + * @param input the Latin1 string to convert + * @param length the length of the string bytes + * @return the number of bytes required to encode the Latin1 string as UTF-8 + */ + simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) noexcept; + + /** + * Compute the number of bytes that this UTF-8 string would require in Latin1 format. + * + * This function does not validate the input. + * + * This function is not BOM-aware. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in byte + * @return the number of bytes required to encode the UTF-8 string as Latin1 + */ + simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) noexcept; + /** * Compute the number of 2-byte words that this UTF-8 string would require in UTF-16LE format. * @@ -1230,6 +1367,38 @@ simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t len */ simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept; + + /** + * Convert possibly broken UTF-16LE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if input is not a valid UTF-16LE string + */ + simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept; + + /** + * Convert possibly broken UTF-16BE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept; + + /** * Convert possibly broken UTF-16LE string into UTF-8 string. * @@ -1260,6 +1429,35 @@ simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * input, size_ */ simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept; + /** + * Convert possibly broken UTF-16LE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) noexcept; + + /** + * Convert possibly broken UTF-16BE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) noexcept; + + /** * Using native endianness; Convert possibly broken UTF-16 string into UTF-8 string and stop on error. * @@ -1319,6 +1517,36 @@ simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * */ simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept; + + /** + * Convert valid UTF-16LE string into Latin1 string. + * + * This function assumes that the input string is valid UTF-8. + + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept; + + /** + * Convert valid UTF-16BE string into Latin1 string. + * + * This function assumes that the input string is valid UTF-8. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept; + + /** * Convert valid UTF-16LE string into UTF-8 string. * @@ -1480,6 +1708,21 @@ simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * input */ simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept; + +/* + * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format. + * + * This function does not validate the input. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @return the number of bytes required to encode the UTF-16LE string as Latin1 + */ + simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept; + + /** * Using native endianness; Compute the number of bytes that this UTF-16 * string would require in UTF-8 format. @@ -1588,6 +1831,53 @@ simdutf_warn_unused size_t convert_utf32_to_utf16(const char32_t * input, size_t */ simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept; + /** + * Convert possibly broken UTF-32 string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if input is not a valid UTF-32 string + */ + + simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) noexcept; + + + /** + * Convert possibly broken UTF-32 string into Latin1 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + + simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * input, size_t length, char* latin1_buffer) noexcept; + + /** + * Convert valid UTF-32 string into Latin1 string. + * + * This function assumes that the input string is valid UTF-32. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param latin1_buffer the pointer to buffer that can hold the conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) noexcept; + /** * Convert possibly broken UTF-32 string into UTF-16BE string. * @@ -2021,6 +2311,96 @@ public: simdutf_warn_unused virtual result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept = 0; /** + * Convert Latin1 string into UTF8 string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_latin1_to_utf8(const char * input, size_t length, char* utf8_output) const noexcept = 0; + + + /** + * Convert possibly Latin1 string into UTF-16LE string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_latin1_to_utf16le(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0; + + /** + * Convert Latin1 string into UTF-16BE string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf16_buffer the pointer to buffer that can hold conversion result + * @return the number of written char16_t; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_latin1_to_utf16be(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0; + + /** + * Convert Latin1 string into UTF-32 string. + * + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the Latin1 string to convert + * @param length the length of the string in bytes + * @param utf32_buffer the pointer to buffer that can hold conversion result + * @return the number of written char32_t; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_latin1_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-8 string into latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if the input was not valid UTF-8 string + */ + simdutf_warn_unused virtual size_t convert_utf8_to_latin1(const char * input, size_t length, char* latin1_output) const noexcept = 0; + + /** + * Convert possibly broken UTF-8 string into latin1 string. with errors + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful. + */ + simdutf_warn_unused virtual result convert_utf8_to_latin1_with_errors(const char * input, size_t length, char* latin1_output) const noexcept = 0; + + /** + * Convert valid UTF-8 string into latin1 string. + * + * This function assumes that the input string is valid UTF-8. + * + * This function is not BOM-aware. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in bytes + * @param latin1_output the pointer to buffer that can hold conversion result + * @return the number of written char; 0 if the input was not valid UTF-8 string + */ + simdutf_warn_unused virtual size_t convert_valid_utf8_to_latin1(const char * input, size_t length, char* latin1_output) const noexcept = 0; + + + /** * Convert possibly broken UTF-8 string into UTF-16LE string. * * During the conversion also validation of the input string is done. @@ -2159,6 +2539,92 @@ public: simdutf_warn_unused virtual size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept = 0; /** + * Convert possibly broken UTF-16LE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if input is not a valid UTF-16LE string + */ + simdutf_warn_unused virtual size_t convert_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16BE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + simdutf_warn_unused virtual size_t convert_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16LE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + simdutf_warn_unused virtual result convert_utf16le_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-16BE string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + simdutf_warn_unused virtual result convert_utf16be_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** + * Convert valid UTF-16LE string into Latin1 string. + * + * This function assumes that the input string is valid UTF-8. + + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_valid_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** + * Convert valid UTF-16BE string into Latin1 string. + * + * This function assumes that the input string is valid UTF-8. + * + * This function is not BOM-aware. + * + * @param input the UTF-16BE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_valid_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** * Convert possibly broken UTF-16LE string into UTF-8 string. * * During the conversion also validation of the input string is done. @@ -2361,6 +2827,52 @@ public: simdutf_warn_unused virtual size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept = 0; /** + * Convert possibly broken UTF-32 string into Latin1 string. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return number of written words; 0 if input is not a valid UTF-32 string + */ + + simdutf_warn_unused virtual size_t convert_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** + * Convert possibly broken UTF-32 string into Latin1 string and stop on error. + * + * During the conversion also validation of the input string is done. + * This function is suitable to work with inputs from untrusted sources. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param latin1_buffer the pointer to buffer that can hold conversion result + * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful. + */ + + simdutf_warn_unused virtual result convert_utf32_to_latin1_with_errors(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** + * Convert valid UTF-32 string into Latin1 string. + * + * This function assumes that the input string is valid UTF-32. + * + * This function is not BOM-aware. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @param latin1_buffer the pointer to buffer that can hold the conversion result + * @return number of written words; 0 if conversion is not possible + */ + simdutf_warn_unused virtual size_t convert_valid_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0; + + /** * Convert possibly broken UTF-32 string into UTF-8 string. * * During the conversion also validation of the input string is done. @@ -2404,6 +2916,17 @@ public: */ simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0; + + /** + * Return the number of bytes that this UTF-16 string would require in Latin1 format. + * + * + * @param input the UTF-16 string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @return the number of bytes required to encode the UTF-16 string as Latin1 + */ + simdutf_warn_unused virtual size_t utf16_length_from_latin1(size_t length) const noexcept = 0; + /** * Convert possibly broken UTF-32 string into UTF-16LE string. * @@ -2506,6 +3029,15 @@ public: */ virtual void change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept = 0; + /** + * Return the number of bytes that this Latin1 string would require in UTF-8 format. + * + * @param input the Latin1 string to convert + * @param length the length of the string bytes + * @return the number of bytes required to encode the Latin1 string as UTF-8 + */ + simdutf_warn_unused virtual size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept = 0; + /** * Compute the number of bytes that this UTF-32 string would require in UTF-8 format. * @@ -2518,6 +3050,41 @@ public: simdutf_warn_unused virtual size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0; /** + * Compute the number of bytes that this UTF-32 string would require in Latin1 format. + * + * This function does not validate the input. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @return the number of bytes required to encode the UTF-32 string as Latin1 + */ + simdutf_warn_unused virtual size_t latin1_length_from_utf32( size_t length) const noexcept = 0; + + /** + * Compute the number of bytes that this UTF-8 string would require in Latin1 format. + * + * This function does not validate the input. + * + * @param input the UTF-8 string to convert + * @param length the length of the string in byte + * @return the number of bytes required to encode the UTF-8 string as Latin1 + */ + simdutf_warn_unused virtual size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept = 0; + +/* + * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format. + * + * This function does not validate the input. + * + * This function is not BOM-aware. + * + * @param input the UTF-16LE string to convert + * @param length the length of the string in 2-byte words (char16_t) + * @return the number of bytes required to encode the UTF-16LE string as Latin1 + */ + simdutf_warn_unused virtual size_t latin1_length_from_utf16(size_t length) const noexcept = 0; + + /** * Compute the number of two-byte words that this UTF-32 string would require in UTF-16 format. * * This function does not validate the input. @@ -2528,6 +3095,18 @@ public: */ simdutf_warn_unused virtual size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0; + + /** + * Return the number of bytes that this UTF-32 string would require in Latin1 format. + * + * This function does not validate the input. + * + * @param input the UTF-32 string to convert + * @param length the length of the string in 4-byte words (char32_t) + * @return the number of bytes required to encode the UTF-32 string as Latin1 + */ + simdutf_warn_unused virtual size_t utf32_length_from_latin1(size_t length) const noexcept = 0; + /* * Compute the number of bytes that this UTF-16LE string would require in UTF-32 format. * |