aboutsummaryrefslogtreecommitdiff
path: root/src/bun.js/bindings/simdutf.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/bun.js/bindings/simdutf.h')
-rw-r--r--src/bun.js/bindings/simdutf.h659
1 files changed, 619 insertions, 40 deletions
diff --git a/src/bun.js/bindings/simdutf.h b/src/bun.js/bindings/simdutf.h
index 0a57a69f7..7fb388e9e 100644
--- a/src/bun.js/bindings/simdutf.h
+++ b/src/bun.js/bindings/simdutf.h
@@ -1,11 +1,11 @@
-/* auto-generated on 2023-02-10 14:42:58 -0500. Do not edit! */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf.h
+/* auto-generated on 2023-06-21 08:09:45 -0400. Do not edit! */
+// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf.h
/* begin file include/simdutf.h */
#ifndef SIMDUTF_H
#define SIMDUTF_H
#include <cstring>
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/compiler_check.h
+// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/compiler_check.h
/* begin file include/simdutf/compiler_check.h */
#ifndef SIMDUTF_COMPILER_CHECK_H
#define SIMDUTF_COMPILER_CHECK_H
@@ -43,13 +43,13 @@
#endif // SIMDUTF_COMPILER_CHECK_H
/* end file include/simdutf/compiler_check.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/common_defs.h
+// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/common_defs.h
/* begin file include/simdutf/common_defs.h */
#ifndef SIMDUTF_COMMON_DEFS_H
#define SIMDUTF_COMMON_DEFS_H
#include <cassert>
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/portability.h
+// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/portability.h
/* begin file include/simdutf/portability.h */
#ifndef SIMDUTF_PORTABILITY_H
#define SIMDUTF_PORTABILITY_H
@@ -144,6 +144,8 @@
// POWER processors. Please see https://github.com/lemire/simdutf/issues/51
#elif defined(__s390__)
// s390 IBM system. Big endian.
+#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64
+// RISC-V 64-bit
#else
// The simdutf library is designed
// for 64-bit processors and it seems that you are not
@@ -278,7 +280,7 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.")
#endif // SIMDUTF_PORTABILITY_H
/* end file include/simdutf/portability.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/avx512.h
+// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/avx512.h
/* begin file include/simdutf/avx512.h */
#ifndef SIMDUTF_AVX512_H_
#define SIMDUTF_AVX512_H_
@@ -458,19 +460,21 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.")
#endif // MSC_VER
-#if defined(SIMDUTF_VISUAL_STUDIO)
- /**
- * It does not matter here whether you are using
- * the regular visual studio or clang under visual
- * studio.
- */
- #if SIMDUTF_USING_LIBRARY
- #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllimport)
+#ifndef SIMDUTF_DLLIMPORTEXPORT
+ #if defined(SIMDUTF_VISUAL_STUDIO)
+ /**
+ * It does not matter here whether you are using
+ * the regular visual studio or clang under visual
+ * studio.
+ */
+ #if SIMDUTF_USING_LIBRARY
+ #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllimport)
+ #else
+ #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllexport)
+ #endif
#else
- #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllexport)
+ #define SIMDUTF_DLLIMPORTEXPORT
#endif
-#else
- #define SIMDUTF_DLLIMPORTEXPORT
#endif
/// If EXPR is an error, returns it.
@@ -479,7 +483,7 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.")
#endif // SIMDUTF_COMMON_DEFS_H
/* end file include/simdutf/common_defs.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/encoding_types.h
+// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/encoding_types.h
/* begin file include/simdutf/encoding_types.h */
#include <string>
@@ -491,6 +495,7 @@ enum encoding_type {
UTF16_BE = 4, // BOM 0xfe 0xff
UTF32_LE = 8, // BOM 0xff 0xfe 0x00 0x00
UTF32_BE = 16, // BOM 0x00 0x00 0xfe 0xff
+ Latin1 = 32,
unspecified = 0
};
@@ -527,7 +532,7 @@ size_t bom_byte_size(encoding_type bom);
} // BOM namespace
} // simdutf namespace
/* end file include/simdutf/encoding_types.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/error.h
+// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/error.h
/* begin file include/simdutf/error.h */
#ifndef ERROR_H
#define ERROR_H
@@ -541,9 +546,10 @@ enum error_code {
TOO_LONG, // We either have too many consecutive continuation bytes or the string starts with a continuation byte.
OVERLONG, // The decoded character must be above U+7F for two-byte characters, U+7FF for three-byte characters,
// and U+FFFF for four-byte characters.
- TOO_LARGE, // The decoded character must be less than or equal to U+10FFFF OR less than or equal than U+7F for ASCII.
+ TOO_LARGE, // The decoded character must be less than or equal to U+10FFFF,less than or equal than U+7F for ASCII OR less than equal than U+FF for Latin1
SURROGATE, // The decoded character must be not be in U+D800...DFFF (UTF-8 or UTF-32) OR
- // a high surrogate must be followed by a low surrogate and a low surrogate must be preceded by a high surrogate (UTF-16)
+ // a high surrogate must be followed by a low surrogate and a low surrogate must be preceded by a high surrogate (UTF-16) OR
+ // there must be no surrogate at all (Latin1)
OTHER // Not related to validation/transcoding.
};
@@ -564,7 +570,7 @@ SIMDUTF_PUSH_DISABLE_WARNINGS
SIMDUTF_DISABLE_UNDESIRED_WARNINGS
// Public API
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/simdutf_version.h
+// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/simdutf_version.h
/* begin file include/simdutf/simdutf_version.h */
// /include/simdutf/simdutf_version.h automatically generated by release.py,
// do not change by hand
@@ -572,7 +578,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
#define SIMDUTF_SIMDUTF_VERSION_H
/** The version of simdutf being used (major.minor.revision) */
-#define SIMDUTF_VERSION "3.2.0"
+#define SIMDUTF_VERSION "3.2.14"
namespace simdutf {
enum {
@@ -587,13 +593,13 @@ enum {
/**
* The revision (major.minor.REVISION) of simdutf being used.
*/
- SIMDUTF_VERSION_REVISION = 0
+ SIMDUTF_VERSION_REVISION = 14
};
} // namespace simdutf
#endif // SIMDUTF_SIMDUTF_VERSION_H
/* end file include/simdutf/simdutf_version.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/implementation.h
+// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/implementation.h
/* begin file include/simdutf/implementation.h */
#ifndef SIMDUTF_IMPLEMENTATION_H
#define SIMDUTF_IMPLEMENTATION_H
@@ -603,7 +609,7 @@ enum {
#endif
#include <vector>
#include <tuple>
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/internal/isadetection.h
+// dofile: invoked with prepath=/Users/jarred/Build/simdutf/include, filename=simdutf/internal/isadetection.h
/* begin file include/simdutf/internal/isadetection.h */
/* From
https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
@@ -690,22 +696,12 @@ static inline uint32_t detect_supported_architectures() {
return instruction_set::ALTIVEC;
}
-#elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64
-
-#if defined(__ARM_NEON)
+#elif defined(__aarch64__) || defined(_M_ARM64)
static inline uint32_t detect_supported_architectures() {
return instruction_set::NEON;
}
-#else // ARM without NEON
-
-static inline uint32_t detect_supported_architectures() {
- return instruction_set::DEFAULT;
-}
-
-#endif
-
#elif defined(__x86_64__) || defined(_M_AMD64) // x64
@@ -716,6 +712,7 @@ namespace cpuid_bit {
// EAX = 0x01
constexpr uint32_t pclmulqdq = uint32_t(1) << 1; ///< @private bit 1 of ECX for EAX=0x1
constexpr uint32_t sse42 = uint32_t(1) << 20; ///< @private bit 20 of ECX for EAX=0x1
+ constexpr uint32_t osxsave = (uint32_t(1) << 26) | (uint32_t(1) << 27); ///< @private bits 26+27 of ECX for EAX=0x1
// EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf)
// See: "Table 3-8. Information Returned by CPUID Instruction"
@@ -741,6 +738,10 @@ namespace cpuid_bit {
namespace edx {
constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8;
}
+ namespace xcr0_bit {
+ constexpr uint64_t avx256_saved = uint64_t(1) << 2; ///< @private bit 2 = AVX
+ constexpr uint64_t avx512_saved = uint64_t(7) << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM
+ }
}
}
@@ -750,7 +751,7 @@ static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
uint32_t *edx) {
#if defined(_MSC_VER)
int cpu_info[4];
- __cpuid(cpu_info, *eax);
+ __cpuidex(cpu_info, *eax, *ecx);
*eax = cpu_info[0];
*ebx = cpu_info[1];
*ecx = cpu_info[2];
@@ -768,6 +769,16 @@ static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
#endif
}
+static inline uint64_t xgetbv() {
+ #if defined(_MSC_VER)
+ return _xgetbv(0);
+ #else
+ uint32_t xcr0_lo, xcr0_hi;
+ asm volatile("xgetbv\n\t" : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0));
+ return xcr0_lo | ((uint64_t)xcr0_hi << 32);
+ #endif
+ }
+
static inline uint32_t detect_supported_architectures() {
uint32_t eax;
uint32_t ebx = 0;
@@ -787,6 +798,16 @@ static inline uint32_t detect_supported_architectures() {
host_isa |= instruction_set::PCLMULQDQ;
}
+ if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) {
+ return host_isa;
+ }
+
+ // xgetbv for checking if the OS saves registers
+ uint64_t xcr0 = xgetbv();
+
+ if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) {
+ return host_isa;
+ }
// ECX for EAX=0x7
eax = 0x7;
ecx = 0x0; // Sub-leaf = 0
@@ -800,6 +821,9 @@ static inline uint32_t detect_supported_architectures() {
if (ebx & cpuid_bit::ebx::bmi2) {
host_isa |= instruction_set::BMI2;
}
+ if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) == cpuid_bit::xcr0_bit::avx512_saved)) {
+ return host_isa;
+ }
if (ebx & cpuid_bit::ebx::avx512f) {
host_isa |= instruction_set::AVX512F;
}
@@ -822,7 +846,7 @@ static inline uint32_t detect_supported_architectures() {
}
#else // fallback
-
+// includes 32-bit ARM.
static inline uint32_t detect_supported_architectures() {
return instruction_set::DEFAULT;
}
@@ -870,7 +894,6 @@ simdutf_really_inline simdutf_warn_unused int detect_encodings(const uint8_t * i
return detect_encodings(reinterpret_cast<const char *>(input), length);
}
-
/**
* Validate the UTF-8 string. This function may be best when you expect
* the input to be almost always valid. Otherwise, consider using
@@ -1034,6 +1057,68 @@ simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) noexcep
*/
simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) noexcept;
+ /**
+ * Convert Latin1 string into UTF8 string.
+ *
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string in bytes
+ * @param latin1_output the pointer to buffer that can hold conversion result
+ * @return the number of written char; 0 if conversion is not possible
+ */
+ simdutf_warn_unused size_t convert_latin1_to_utf8(const char * input, size_t length, char* utf8_output) noexcept;
+
+
+ /**
+ * Convert possibly Latin1 string into UTF-16LE string.
+ *
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string in bytes
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char16_t; 0 if conversion is not possible
+ */
+ simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * input, size_t length, char16_t* utf16_output) noexcept;
+
+ /**
+ * Convert Latin1 string into UTF-16BE string.
+ *
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string in bytes
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char16_t; 0 if conversion is not possible
+ */
+ simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * input, size_t length, char16_t* utf16_output) noexcept;
+
+ /**
+ * Convert Latin1 string into UTF-32 string.
+ *
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string in bytes
+ * @param utf32_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char32_t; 0 if conversion is not possible
+ */
+ simdutf_warn_unused size_t convert_latin1_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) noexcept;
+
+ /**
+ * Convert possibly broken UTF-8 string into latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param latin1_output the pointer to buffer that can hold conversion result
+ * @return the number of written char; 0 if the input was not valid UTF-8 string
+ */
+ simdutf_warn_unused size_t convert_utf8_to_latin1(const char * input, size_t length, char* latin1_output) noexcept;
+
/**
* Using native endianness; Convert possibly broken UTF-8 string into UTF-16 string.
*
@@ -1073,6 +1158,20 @@ simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * input, size_t le
*/
simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_output) noexcept;
+
+ /**
+ * Convert possibly broken UTF-8 string into latin1 string. with errors
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param latin1_output the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ */
+ simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * input, size_t length, char* latin1_output) noexcept;
+
/**
* Using native endianness; Convert possibly broken UTF-8 string into UTF-16
* string and stop on error.
@@ -1139,6 +1238,21 @@ simdutf_warn_unused size_t convert_utf8_to_utf32(const char * input, size_t leng
*/
simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * input, size_t length, char32_t* utf32_output) noexcept;
+ /**
+ * Convert valid UTF-8 string into latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-8.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param latin1_output the pointer to buffer that can hold conversion result
+ * @return the number of written char; 0 if the input was not valid UTF-8 string
+ */
+ simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * input, size_t length, char* latin1_output) noexcept;
+
+
/**
* Using native endianness; Convert valid UTF-8 string into UTF-16 string.
*
@@ -1187,6 +1301,29 @@ simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * input, siz
*/
simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) noexcept;
+
+ /**
+ * Return the number of bytes that this Latin1 string would require in UTF-8 format.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string bytes
+ * @return the number of bytes required to encode the Latin1 string as UTF-8
+ */
+ simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) noexcept;
+
+ /**
+ * Compute the number of bytes that this UTF-8 string would require in Latin1 format.
+ *
+ * This function does not validate the input.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in byte
+ * @return the number of bytes required to encode the UTF-8 string as Latin1
+ */
+ simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) noexcept;
+
/**
* Compute the number of 2-byte words that this UTF-8 string would require in UTF-16LE format.
*
@@ -1230,6 +1367,38 @@ simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t len
*/
simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
+
+ /**
+ * Convert possibly broken UTF-16LE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-16LE string
+ */
+ simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+ /**
+ * Convert possibly broken UTF-16BE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+ simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+
/**
* Convert possibly broken UTF-16LE string into UTF-8 string.
*
@@ -1260,6 +1429,35 @@ simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * input, size_
*/
simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
+ /**
+ * Convert possibly broken UTF-16LE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+ simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+ /**
+ * Convert possibly broken UTF-16BE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+ simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+
/**
* Using native endianness; Convert possibly broken UTF-16 string into UTF-8 string and stop on error.
*
@@ -1319,6 +1517,36 @@ simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t *
*/
simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
+
+ /**
+ * Convert valid UTF-16LE string into Latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-8.
+
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+ /**
+ * Convert valid UTF-16BE string into Latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-8.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+
/**
* Convert valid UTF-16LE string into UTF-8 string.
*
@@ -1480,6 +1708,21 @@ simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * input
*/
simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
+
+/*
+ * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
+ *
+ * This function does not validate the input.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @return the number of bytes required to encode the UTF-16LE string as Latin1
+ */
+ simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept;
+
+
/**
* Using native endianness; Compute the number of bytes that this UTF-16
* string would require in UTF-8 format.
@@ -1588,6 +1831,53 @@ simdutf_warn_unused size_t convert_utf32_to_utf16(const char32_t * input, size_t
*/
simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
+ /**
+ * Convert possibly broken UTF-32 string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-32 string
+ */
+
+ simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) noexcept;
+
+
+ /**
+ * Convert possibly broken UTF-32 string into Latin1 string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+
+ simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * input, size_t length, char* latin1_buffer) noexcept;
+
+ /**
+ * Convert valid UTF-32 string into Latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-32.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param latin1_buffer the pointer to buffer that can hold the conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) noexcept;
+
/**
* Convert possibly broken UTF-32 string into UTF-16BE string.
*
@@ -2021,6 +2311,96 @@ public:
simdutf_warn_unused virtual result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept = 0;
/**
+ * Convert Latin1 string into UTF8 string.
+ *
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string in bytes
+ * @param latin1_output the pointer to buffer that can hold conversion result
+ * @return the number of written char; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_latin1_to_utf8(const char * input, size_t length, char* utf8_output) const noexcept = 0;
+
+
+ /**
+ * Convert possibly Latin1 string into UTF-16LE string.
+ *
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string in bytes
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char16_t; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_latin1_to_utf16le(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
+
+ /**
+ * Convert Latin1 string into UTF-16BE string.
+ *
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string in bytes
+ * @param utf16_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char16_t; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_latin1_to_utf16be(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
+
+ /**
+ * Convert Latin1 string into UTF-32 string.
+ *
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string in bytes
+ * @param utf32_buffer the pointer to buffer that can hold conversion result
+ * @return the number of written char32_t; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_latin1_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-8 string into latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param latin1_output the pointer to buffer that can hold conversion result
+ * @return the number of written char; 0 if the input was not valid UTF-8 string
+ */
+ simdutf_warn_unused virtual size_t convert_utf8_to_latin1(const char * input, size_t length, char* latin1_output) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-8 string into latin1 string. with errors
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param latin1_output the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ */
+ simdutf_warn_unused virtual result convert_utf8_to_latin1_with_errors(const char * input, size_t length, char* latin1_output) const noexcept = 0;
+
+ /**
+ * Convert valid UTF-8 string into latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-8.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in bytes
+ * @param latin1_output the pointer to buffer that can hold conversion result
+ * @return the number of written char; 0 if the input was not valid UTF-8 string
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf8_to_latin1(const char * input, size_t length, char* latin1_output) const noexcept = 0;
+
+
+ /**
* Convert possibly broken UTF-8 string into UTF-16LE string.
*
* During the conversion also validation of the input string is done.
@@ -2159,6 +2539,92 @@ public:
simdutf_warn_unused virtual size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept = 0;
/**
+ * Convert possibly broken UTF-16LE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-16LE string
+ */
+ simdutf_warn_unused virtual size_t convert_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-16BE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+ simdutf_warn_unused virtual size_t convert_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-16LE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+ simdutf_warn_unused virtual result convert_utf16le_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-16BE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+ simdutf_warn_unused virtual result convert_utf16be_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
+ * Convert valid UTF-16LE string into Latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-8.
+
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
+ * Convert valid UTF-16BE string into Latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-8.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16BE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
* Convert possibly broken UTF-16LE string into UTF-8 string.
*
* During the conversion also validation of the input string is done.
@@ -2361,6 +2827,52 @@ public:
simdutf_warn_unused virtual size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept = 0;
/**
+ * Convert possibly broken UTF-32 string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return number of written words; 0 if input is not a valid UTF-32 string
+ */
+
+ simdutf_warn_unused virtual size_t convert_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
+ * Convert possibly broken UTF-32 string into Latin1 string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param latin1_buffer the pointer to buffer that can hold conversion result
+ * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ */
+
+ simdutf_warn_unused virtual result convert_utf32_to_latin1_with_errors(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
+ * Convert valid UTF-32 string into Latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-32.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @param latin1_buffer the pointer to buffer that can hold the conversion result
+ * @return number of written words; 0 if conversion is not possible
+ */
+ simdutf_warn_unused virtual size_t convert_valid_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+ /**
* Convert possibly broken UTF-32 string into UTF-8 string.
*
* During the conversion also validation of the input string is done.
@@ -2404,6 +2916,17 @@ public:
*/
simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
+
+ /**
+ * Return the number of bytes that this UTF-16 string would require in Latin1 format.
+ *
+ *
+ * @param input the UTF-16 string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @return the number of bytes required to encode the UTF-16 string as Latin1
+ */
+ simdutf_warn_unused virtual size_t utf16_length_from_latin1(size_t length) const noexcept = 0;
+
/**
* Convert possibly broken UTF-32 string into UTF-16LE string.
*
@@ -2506,6 +3029,15 @@ public:
*/
virtual void change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept = 0;
+ /**
+ * Return the number of bytes that this Latin1 string would require in UTF-8 format.
+ *
+ * @param input the Latin1 string to convert
+ * @param length the length of the string bytes
+ * @return the number of bytes required to encode the Latin1 string as UTF-8
+ */
+ simdutf_warn_unused virtual size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept = 0;
+
/**
* Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
*
@@ -2518,6 +3050,41 @@ public:
simdutf_warn_unused virtual size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0;
/**
+ * Compute the number of bytes that this UTF-32 string would require in Latin1 format.
+ *
+ * This function does not validate the input.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @return the number of bytes required to encode the UTF-32 string as Latin1
+ */
+ simdutf_warn_unused virtual size_t latin1_length_from_utf32( size_t length) const noexcept = 0;
+
+ /**
+ * Compute the number of bytes that this UTF-8 string would require in Latin1 format.
+ *
+ * This function does not validate the input.
+ *
+ * @param input the UTF-8 string to convert
+ * @param length the length of the string in byte
+ * @return the number of bytes required to encode the UTF-8 string as Latin1
+ */
+ simdutf_warn_unused virtual size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept = 0;
+
+/*
+ * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
+ *
+ * This function does not validate the input.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input the UTF-16LE string to convert
+ * @param length the length of the string in 2-byte words (char16_t)
+ * @return the number of bytes required to encode the UTF-16LE string as Latin1
+ */
+ simdutf_warn_unused virtual size_t latin1_length_from_utf16(size_t length) const noexcept = 0;
+
+ /**
* Compute the number of two-byte words that this UTF-32 string would require in UTF-16 format.
*
* This function does not validate the input.
@@ -2528,6 +3095,18 @@ public:
*/
simdutf_warn_unused virtual size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0;
+
+ /**
+ * Return the number of bytes that this UTF-32 string would require in Latin1 format.
+ *
+ * This function does not validate the input.
+ *
+ * @param input the UTF-32 string to convert
+ * @param length the length of the string in 4-byte words (char32_t)
+ * @return the number of bytes required to encode the UTF-32 string as Latin1
+ */
+ simdutf_warn_unused virtual size_t utf32_length_from_latin1(size_t length) const noexcept = 0;
+
/*
* Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
*