skia WIPjarred/canvas

author: Jarred Sumner <jarred@jarredsumner.com> 2022-04-03 16:34:10 -0700
committer: Jarred Sumner <jarred@jarredsumner.com> 2022-04-03 16:34:10 -0700
commit: a87508008dfa1604baf2d4e39bf44704c00f261c (patch)
tree: 0be2ade96772037a02803b30e157c367d931e3d9 /src/deps/skia/include/private
parent: 4a19a3f07f1887903e5638a3be167f0c7b377ba3 (diff)
download: bun-a87508008dfa1604baf2d4e39bf44704c00f261c.tar.gz
bun-a87508008dfa1604baf2d4e39bf44704c00f261c.tar.zst
bun-a87508008dfa1604baf2d4e39bf44704c00f261c.zip
67 files changed, 12634 insertions, 0 deletions
diff --git a/src/deps/skia/include/private/BUILD.bazel b/src/deps/skia/include/private/BUILD.bazel
new file mode 100644
index 000000000..5d338ddf9
--- /dev/null
+++ b/src/deps/skia/include/private/BUILD.bazel
@@ -0,0 +1,572 @@
+load("//bazel:macros.bzl", "generated_cc_atom")
+
+generated_cc_atom(
+    name = "GrContext_Base_hdr",
+    hdrs = ["GrContext_Base.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        "//include/core:SkRefCnt_hdr",
+        "//include/gpu:GrBackendSurface_hdr",
+        "//include/gpu:GrContextOptions_hdr",
+        "//include/gpu:GrTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "GrD3DTypesMinimal_hdr",
+    hdrs = ["GrD3DTypesMinimal.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        "//include/core:SkRefCnt_hdr",
+        "//include/gpu:GrTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "GrDawnTypesPriv_hdr",
+    hdrs = ["GrDawnTypesPriv.h"],
+    visibility = ["//:__subpackages__"],
+    deps = ["//include/gpu/dawn:GrDawnTypes_hdr"],
+)
+
+generated_cc_atom(
+    name = "GrGLTypesPriv_hdr",
+    hdrs = ["GrGLTypesPriv.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        "//include/core:SkRefCnt_hdr",
+        "//include/gpu/gl:GrGLTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "GrImageContext_hdr",
+    hdrs = ["GrImageContext.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":GrContext_Base_hdr",
+        ":GrSingleOwner_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "GrMockTypesPriv_hdr",
+    hdrs = ["GrMockTypesPriv.h"],
+    visibility = ["//:__subpackages__"],
+    deps = ["//include/gpu/mock:GrMockTypes_hdr"],
+)
+
+generated_cc_atom(
+    name = "GrMtlTypesPriv_hdr",
+    hdrs = ["GrMtlTypesPriv.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        "//include/gpu:GrTypes_hdr",
+        "//include/gpu/mtl:GrMtlTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "GrSingleOwner_hdr",
+    hdrs = ["GrSingleOwner.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkMutex_hdr",
+        ":SkThreadID_hdr",
+        "//include/core:SkTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "GrTypesPriv_hdr",
+    hdrs = ["GrTypesPriv.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkImageInfoPriv_hdr",
+        ":SkMacros_hdr",
+        "//include/core:SkImageInfo_hdr",
+        "//include/core:SkImage_hdr",
+        "//include/core:SkPath_hdr",
+        "//include/core:SkRefCnt_hdr",
+        "//include/gpu:GrTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "GrVkTypesPriv_hdr",
+    hdrs = ["GrVkTypesPriv.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        "//include/core:SkRefCnt_hdr",
+        "//include/gpu/vk:GrVkTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkBitmaskEnum_hdr",
+    hdrs = ["SkBitmaskEnum.h"],
+    visibility = ["//:__subpackages__"],
+)
+
+generated_cc_atom(
+    name = "SkChecksum_hdr",
+    hdrs = ["SkChecksum.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkNoncopyable_hdr",
+        ":SkOpts_spi_hdr",
+        ":SkTLogic_hdr",
+        "//include/core:SkString_hdr",
+        "//include/core:SkTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkColorData_hdr",
+    hdrs = ["SkColorData.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkNx_hdr",
+        ":SkTo_hdr",
+        "//include/core:SkColorPriv_hdr",
+        "//include/core:SkColor_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkDeque_hdr",
+    hdrs = ["SkDeque.h"],
+    visibility = ["//:__subpackages__"],
+    deps = ["//include/core:SkTypes_hdr"],
+)
+
+generated_cc_atom(
+    name = "SkEncodedInfo_hdr",
+    hdrs = ["SkEncodedInfo.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        "//include/core:SkData_hdr",
+        "//include/core:SkImageInfo_hdr",
+        "//include/third_party/skcms:skcms_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkFixed_hdr",
+    hdrs = ["SkFixed.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkSafe_math_hdr",
+        ":SkTPin_hdr",
+        ":SkTo_hdr",
+        "//include/core:SkScalar_hdr",
+        "//include/core:SkTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkFloatBits_hdr",
+    hdrs = ["SkFloatBits.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkSafe_math_hdr",
+        "//include/core:SkTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkFloatingPoint_hdr",
+    hdrs = ["SkFloatingPoint.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkFloatBits_hdr",
+        ":SkSafe_math_hdr",
+        "//include/core:SkTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkHalf_hdr",
+    hdrs = ["SkHalf.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkNx_hdr",
+        "//include/core:SkTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkIDChangeListener_hdr",
+    hdrs = ["SkIDChangeListener.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkMutex_hdr",
+        ":SkTDArray_hdr",
+        "//include/core:SkRefCnt_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkImageInfoPriv_hdr",
+    hdrs = ["SkImageInfoPriv.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        "//include/core:SkColor_hdr",
+        "//include/core:SkImageInfo_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkMacros_hdr",
+    hdrs = ["SkMacros.h"],
+    visibility = ["//:__subpackages__"],
+)
+
+generated_cc_atom(
+    name = "SkMalloc_hdr",
+    hdrs = ["SkMalloc.h"],
+    visibility = ["//:__subpackages__"],
+    deps = ["//include/core:SkTypes_hdr"],
+)
+
+generated_cc_atom(
+    name = "SkMutex_hdr",
+    hdrs = ["SkMutex.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkMacros_hdr",
+        ":SkSemaphore_hdr",
+        ":SkThreadAnnotations_hdr",
+        ":SkThreadID_hdr",
+        "//include/core:SkTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkNoncopyable_hdr",
+    hdrs = ["SkNoncopyable.h"],
+    visibility = ["//:__subpackages__"],
+    deps = ["//include/core:SkTypes_hdr"],
+)
+
+generated_cc_atom(
+    name = "SkNx_hdr",
+    hdrs = ["SkNx.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkNx_neon_hdr",
+        ":SkNx_sse_hdr",
+        ":SkSafe_math_hdr",
+        "//include/core:SkScalar_hdr",
+        "//include/core:SkTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkNx_neon_hdr",
+    hdrs = ["SkNx_neon.h"],
+    visibility = ["//:__subpackages__"],
+)
+
+generated_cc_atom(
+    name = "SkNx_sse_hdr",
+    hdrs = ["SkNx_sse.h"],
+    visibility = ["//:__subpackages__"],
+    deps = ["//include/core:SkTypes_hdr"],
+)
+
+generated_cc_atom(
+    name = "SkOnce_hdr",
+    hdrs = ["SkOnce.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [":SkThreadAnnotations_hdr"],
+)
+
+generated_cc_atom(
+    name = "SkOpts_spi_hdr",
+    hdrs = ["SkOpts_spi.h"],
+    visibility = ["//:__subpackages__"],
+    deps = ["//include/core:SkTypes_hdr"],
+)
+
+generated_cc_atom(
+    name = "SkPathRef_hdr",
+    hdrs = ["SkPathRef.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkIDChangeListener_hdr",
+        ":SkMutex_hdr",
+        ":SkTDArray_hdr",
+        ":SkTemplates_hdr",
+        ":SkTo_hdr",
+        "//include/core:SkMatrix_hdr",
+        "//include/core:SkPoint_hdr",
+        "//include/core:SkRRect_hdr",
+        "//include/core:SkRect_hdr",
+        "//include/core:SkRefCnt_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkSLDefines_hdr",
+    hdrs = ["SkSLDefines.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkTArray_hdr",
+        "//include/core:SkTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkSLIRNode_hdr",
+    hdrs = ["SkSLIRNode.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkSLString_hdr",
+        ":SkTArray_hdr",
+        "//src/sksl:SkSLLexer_hdr",
+        "//src/sksl:SkSLModifiersPool_hdr",
+        "//src/sksl:SkSLPool_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkSLLayout_hdr",
+    hdrs = ["SkSLLayout.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [":SkSLString_hdr"],
+)
+
+generated_cc_atom(
+    name = "SkSLModifiers_hdr",
+    hdrs = ["SkSLModifiers.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [":SkSLLayout_hdr"],
+)
+
+generated_cc_atom(
+    name = "SkSLProgramElement_hdr",
+    hdrs = ["SkSLProgramElement.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [":SkSLIRNode_hdr"],
+)
+
+generated_cc_atom(
+    name = "SkSLProgramKind_hdr",
+    hdrs = ["SkSLProgramKind.h"],
+    visibility = ["//:__subpackages__"],
+)
+
+generated_cc_atom(
+    name = "SkSLSampleUsage_hdr",
+    hdrs = ["SkSLSampleUsage.h"],
+    visibility = ["//:__subpackages__"],
+    deps = ["//include/core:SkTypes_hdr"],
+)
+
+generated_cc_atom(
+    name = "SkSLStatement_hdr",
+    hdrs = ["SkSLStatement.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkSLIRNode_hdr",
+        ":SkSLSymbol_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkSLString_hdr",
+    hdrs = ["SkSLString.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkSLDefines_hdr",
+        "//include/core:SkStringView_hdr",
+        "//include/core:SkString_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkSLSymbol_hdr",
+    hdrs = ["SkSLSymbol.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkSLIRNode_hdr",
+        ":SkSLProgramElement_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkSafe32_hdr",
+    hdrs = ["SkSafe32.h"],
+    visibility = ["//:__subpackages__"],
+    deps = ["//include/core:SkTypes_hdr"],
+)
+
+generated_cc_atom(
+    name = "SkSafe_math_hdr",
+    hdrs = ["SkSafe_math.h"],
+    visibility = ["//:__subpackages__"],
+)
+
+generated_cc_atom(
+    name = "SkSemaphore_hdr",
+    hdrs = ["SkSemaphore.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkOnce_hdr",
+        ":SkThreadAnnotations_hdr",
+        "//include/core:SkTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkShadowFlags_hdr",
+    hdrs = ["SkShadowFlags.h"],
+    visibility = ["//:__subpackages__"],
+)
+
+generated_cc_atom(
+    name = "SkSpinlock_hdr",
+    hdrs = ["SkSpinlock.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkThreadAnnotations_hdr",
+        "//include/core:SkTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkTArray_hdr",
+    hdrs = ["SkTArray.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkMalloc_hdr",
+        ":SkSafe32_hdr",
+        ":SkTLogic_hdr",
+        ":SkTemplates_hdr",
+        ":SkTo_hdr",
+        "//include/core:SkMath_hdr",
+        "//include/core:SkTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkTDArray_hdr",
+    hdrs = ["SkTDArray.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkMalloc_hdr",
+        ":SkTo_hdr",
+        "//include/core:SkTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkTFitsIn_hdr",
+    hdrs = ["SkTFitsIn.h"],
+    visibility = ["//:__subpackages__"],
+)
+
+generated_cc_atom(
+    name = "SkTHash_hdr",
+    hdrs = ["SkTHash.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkChecksum_hdr",
+        ":SkTemplates_hdr",
+        "//include/core:SkTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkTLogic_hdr",
+    hdrs = ["SkTLogic.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [":SkTo_hdr"],
+)
+
+generated_cc_atom(
+    name = "SkTOptional_hdr",
+    hdrs = ["SkTOptional.h"],
+    visibility = ["//:__subpackages__"],
+    deps = ["//include/core:SkTypes_hdr"],
+)
+
+generated_cc_atom(
+    name = "SkTPin_hdr",
+    hdrs = ["SkTPin.h"],
+    visibility = ["//:__subpackages__"],
+)
+
+generated_cc_atom(
+    name = "SkTemplates_hdr",
+    hdrs = ["SkTemplates.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkMalloc_hdr",
+        ":SkTLogic_hdr",
+        "//include/core:SkTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkThreadAnnotations_hdr",
+    hdrs = ["SkThreadAnnotations.h"],
+    visibility = ["//:__subpackages__"],
+)
+
+generated_cc_atom(
+    name = "SkThreadID_hdr",
+    hdrs = ["SkThreadID.h"],
+    visibility = ["//:__subpackages__"],
+    deps = ["//include/core:SkTypes_hdr"],
+)
+
+generated_cc_atom(
+    name = "SkTo_hdr",
+    hdrs = ["SkTo.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkTFitsIn_hdr",
+        "//include/core:SkTypes_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkVx_hdr",
+    hdrs = ["SkVx.h"],
+    visibility = ["//:__subpackages__"],
+)
+
+generated_cc_atom(
+    name = "SkWeakRefCnt_hdr",
+    hdrs = ["SkWeakRefCnt.h"],
+    visibility = ["//:__subpackages__"],
+    deps = ["//include/core:SkRefCnt_hdr"],
+)
+
+generated_cc_atom(
+    name = "SkPaintParamsKey_hdr",
+    hdrs = ["SkPaintParamsKey.h"],
+    visibility = ["//:__subpackages__"],
+    deps = ["//include/core:SkTypes_hdr"],
+)
+
+generated_cc_atom(
+    name = "SkShaderCodeDictionary_hdr",
+    hdrs = ["SkShaderCodeDictionary.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":SkPaintParamsKey_hdr",
+        ":SkSpinlock_hdr",
+        ":SkUniquePaintParamsID_hdr",
+        "//src/core:SkArenaAlloc_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkUniquePaintParamsID_hdr",
+    hdrs = ["SkUniquePaintParamsID.h"],
+    visibility = ["//:__subpackages__"],
+    deps = ["//include/core:SkTypes_hdr"],
+)
diff --git a/src/deps/skia/include/private/GrContext_Base.h b/src/deps/skia/include/private/GrContext_Base.h
new file mode 100644
index 000000000..19c367da4
--- /dev/null
+++ b/src/deps/skia/include/private/GrContext_Base.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2019 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrContext_Base_DEFINED
+#define GrContext_Base_DEFINED
+
+#include "include/core/SkRefCnt.h"
+#include "include/gpu/GrBackendSurface.h"
+#include "include/gpu/GrContextOptions.h"
+#include "include/gpu/GrTypes.h"
+
+class GrBaseContextPriv;
+class GrCaps;
+class GrContextThreadSafeProxy;
+class GrDirectContext;
+class GrImageContext;
+class GrRecordingContext;
+
+class GrContext_Base : public SkRefCnt {
+public:
+    ~GrContext_Base() override;
+
+    /*
+     * Safely downcast to a GrDirectContext.
+     */
+    virtual GrDirectContext* asDirectContext() { return nullptr; }
+
+    /*
+     * The 3D API backing this context
+     */
+    SK_API GrBackendApi backend() const;
+
+    /*
+     * Retrieve the default GrBackendFormat for a given SkColorType and renderability.
+     * It is guaranteed that this backend format will be the one used by the GrContext
+     * SkColorType and SkSurfaceCharacterization-based createBackendTexture methods.
+     *
+     * The caller should check that the returned format is valid.
+     */
+    SK_API GrBackendFormat defaultBackendFormat(SkColorType, GrRenderable) const;
+
+    SK_API GrBackendFormat compressedBackendFormat(SkImage::CompressionType) const;
+
+    // TODO: When the public version is gone, rename to refThreadSafeProxy and add raw ptr ver.
+    sk_sp<GrContextThreadSafeProxy> threadSafeProxy();
+
+    // Provides access to functions that aren't part of the public API.
+    GrBaseContextPriv priv();
+    const GrBaseContextPriv priv() const;  // NOLINT(readability-const-return-type)
+
+protected:
+    friend class GrBaseContextPriv; // for hidden functions
+
+    GrContext_Base(sk_sp<GrContextThreadSafeProxy>);
+
+    virtual bool init();
+
+    /**
+     * An identifier for this context. The id is used by all compatible contexts. For example,
+     * if SkImages are created on one thread using an image creation context, then fed into a
+     * DDL Recorder on second thread (which has a recording context) and finally replayed on
+     * a third thread with a direct context, then all three contexts will report the same id.
+     * It is an error for an image to be used with contexts that report different ids.
+     */
+    uint32_t contextID() const;
+
+    bool matches(GrContext_Base* candidate) const {
+        return candidate && candidate->contextID() == this->contextID();
+    }
+
+    /*
+     * The options in effect for this context
+     */
+    const GrContextOptions& options() const;
+
+    const GrCaps* caps() const;
+    sk_sp<const GrCaps> refCaps() const;
+
+    virtual GrImageContext* asImageContext() { return nullptr; }
+    virtual GrRecordingContext* asRecordingContext() { return nullptr; }
+
+    sk_sp<GrContextThreadSafeProxy>         fThreadSafeProxy;
+
+private:
+    using INHERITED = SkRefCnt;
+};
+
+#endif
diff --git a/src/deps/skia/include/private/GrD3DTypesMinimal.h b/src/deps/skia/include/private/GrD3DTypesMinimal.h
new file mode 100644
index 000000000..049c07bff
--- /dev/null
+++ b/src/deps/skia/include/private/GrD3DTypesMinimal.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrD3DTypesMinimal_DEFINED
+#define GrD3DTypesMinimal_DEFINED
+
+// Minimal definitions of Direct3D types, without including d3d12.h
+
+#include "include/core/SkRefCnt.h"
+
+#include <dxgiformat.h>
+
+#include "include/gpu/GrTypes.h"
+
+struct ID3D12Resource;
+class GrD3DResourceState;
+typedef int GrD3DResourceStateEnum;
+struct GrD3DSurfaceInfo;
+struct GrD3DTextureResourceInfo;
+struct GrD3DTextureResourceSpec;
+struct GrD3DFenceInfo;
+
+// This struct is to used to store the the actual information about the Direct3D backend image on
+// GrBackendTexture and GrBackendRenderTarget. When a client calls getD3DTextureInfo on a
+// GrBackendTexture/RenderTarget, we use the GrD3DBackendSurfaceInfo to create a snapshot
+// GrD3DTextureResourceInfo object. Internally, this uses a ref count GrD3DResourceState object to
+// track the current D3D12_RESOURCE_STATES which can be shared with an internal GrD3DTextureResource
+// so that state updates can be seen by all users of the texture.
+struct GrD3DBackendSurfaceInfo {
+    GrD3DBackendSurfaceInfo(const GrD3DTextureResourceInfo& info, GrD3DResourceState* state);
+
+    void cleanup();
+
+    GrD3DBackendSurfaceInfo& operator=(const GrD3DBackendSurfaceInfo&) = delete;
+
+    // Assigns the passed in GrD3DBackendSurfaceInfo to this object. if isValid is true we will also
+    // attempt to unref the old fLayout on this object.
+    void assign(const GrD3DBackendSurfaceInfo&, bool isValid);
+
+    void setResourceState(GrD3DResourceStateEnum state);
+
+    sk_sp<GrD3DResourceState> getGrD3DResourceState() const;
+
+    GrD3DTextureResourceInfo snapTextureResourceInfo() const;
+
+    bool isProtected() const;
+#if GR_TEST_UTILS
+    bool operator==(const GrD3DBackendSurfaceInfo& that) const;
+#endif
+
+private:
+    GrD3DTextureResourceInfo* fTextureResourceInfo;
+    GrD3DResourceState* fResourceState;
+};
+
+struct GrD3DTextureResourceSpecHolder {
+public:
+    GrD3DTextureResourceSpecHolder(const GrD3DSurfaceInfo&);
+
+    void cleanup();
+
+    GrD3DSurfaceInfo getSurfaceInfo(uint32_t sampleCount,
+                                    uint32_t levelCount,
+                                    GrProtected isProtected) const;
+
+private:
+    GrD3DTextureResourceSpec* fSpec;
+};
+
+#endif
diff --git a/src/deps/skia/include/private/GrDawnTypesPriv.h b/src/deps/skia/include/private/GrDawnTypesPriv.h
new file mode 100644
index 000000000..5eacf2ea2
--- /dev/null
+++ b/src/deps/skia/include/private/GrDawnTypesPriv.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrDawnTypesPriv_DEFINED
+#define GrDawnTypesPriv_DEFINED
+
+#include "include/gpu/dawn/GrDawnTypes.h"
+
+struct GrDawnTextureSpec {
+    GrDawnTextureSpec() {}
+    GrDawnTextureSpec(const GrDawnSurfaceInfo& info) : fFormat(info.fFormat) {}
+
+    wgpu::TextureFormat fFormat;
+};
+
+GrDawnSurfaceInfo GrDawnTextureSpecToSurfaceInfo(const GrDawnTextureSpec& dawnSpec,
+                                                 uint32_t sampleCount,
+                                                 uint32_t levelCount,
+                                                 GrProtected isProtected);
+
+#endif
+
diff --git a/src/deps/skia/include/private/GrGLTypesPriv.h b/src/deps/skia/include/private/GrGLTypesPriv.h
new file mode 100644
index 000000000..4abef05c7
--- /dev/null
+++ b/src/deps/skia/include/private/GrGLTypesPriv.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2019 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "include/core/SkRefCnt.h"
+#include "include/gpu/gl/GrGLTypes.h"
+
+#ifndef GrGLTypesPriv_DEFINED
+#define GrGLTypesPriv_DEFINED
+
+static constexpr int kGrGLColorFormatCount = static_cast<int>(GrGLFormat::kLastColorFormat) + 1;
+
+class GrGLTextureParameters : public SkNVRefCnt<GrGLTextureParameters> {
+public:
+    // We currently consider texture parameters invalid on all textures
+    // GrContext::resetContext(). We use this type to track whether instances of
+    // GrGLTextureParameters were updated before or after the most recent resetContext(). At 10
+    // resets / frame and 60fps a 64bit timestamp will overflow in about a billion years.
+    // TODO: Require clients to use GrBackendTexture::glTextureParametersModified() to invalidate
+    // texture parameters and get rid of timestamp checking.
+    using ResetTimestamp = uint64_t;
+
+    // This initializes the params to have an expired timestamp. They'll be considered invalid the
+    // first time the texture is used unless set() is called.
+    GrGLTextureParameters() = default;
+
+    // This is texture parameter state that is overridden when a non-zero sampler object is bound.
+    struct SamplerOverriddenState {
+        SamplerOverriddenState();
+        void invalidate();
+
+        GrGLenum fMinFilter;
+        GrGLenum fMagFilter;
+        GrGLenum fWrapS;
+        GrGLenum fWrapT;
+        GrGLfloat fMinLOD;
+        GrGLfloat fMaxLOD;
+        // We always want the border color to be transparent black, so no need to store 4 floats.
+        // Just track if it's been invalidated and no longer the default
+        bool fBorderColorInvalid;
+    };
+
+    // Texture parameter state that is not overridden by a bound sampler object.
+    struct NonsamplerState {
+        NonsamplerState();
+        void invalidate();
+
+        GrGLint fBaseMipMapLevel;
+        GrGLint fMaxMipmapLevel;
+        bool    fSwizzleIsRGBA;
+    };
+
+    void invalidate();
+
+    ResetTimestamp resetTimestamp() const { return fResetTimestamp; }
+    const SamplerOverriddenState& samplerOverriddenState() const { return fSamplerOverriddenState; }
+    const NonsamplerState& nonsamplerState() const { return fNonsamplerState; }
+
+    // SamplerOverriddenState is optional because we don't track it when we're using sampler
+    // objects.
+    void set(const SamplerOverriddenState* samplerState,
+             const NonsamplerState& nonsamplerState,
+             ResetTimestamp currTimestamp);
+
+private:
+    static constexpr ResetTimestamp kExpiredTimestamp = 0;
+
+    SamplerOverriddenState fSamplerOverriddenState;
+    NonsamplerState fNonsamplerState;
+    ResetTimestamp fResetTimestamp = kExpiredTimestamp;
+};
+
+class GrGLBackendTextureInfo {
+public:
+    GrGLBackendTextureInfo(const GrGLTextureInfo& info, GrGLTextureParameters* params)
+            : fInfo(info), fParams(params) {}
+    GrGLBackendTextureInfo(const GrGLBackendTextureInfo&) = delete;
+    GrGLBackendTextureInfo& operator=(const GrGLBackendTextureInfo&) = delete;
+    const GrGLTextureInfo& info() const { return fInfo; }
+    GrGLTextureParameters* parameters() const { return fParams; }
+    sk_sp<GrGLTextureParameters> refParameters() const { return sk_ref_sp(fParams); }
+
+    void cleanup();
+    void assign(const GrGLBackendTextureInfo&, bool thisIsValid);
+
+private:
+    GrGLTextureInfo fInfo;
+    GrGLTextureParameters* fParams;
+};
+
+struct GrGLTextureSpec {
+    GrGLTextureSpec() : fTarget(0), fFormat(0) {}
+    GrGLTextureSpec(const GrGLSurfaceInfo& info) : fTarget(info.fTarget), fFormat(info.fFormat) {}
+
+    GrGLenum fTarget;
+    GrGLenum fFormat;
+};
+
+GrGLSurfaceInfo GrGLTextureSpecToSurfaceInfo(const GrGLTextureSpec& glSpec,
+                                             uint32_t sampleCount,
+                                             uint32_t levelCount,
+                                             GrProtected isProtected);
+
+#endif
diff --git a/src/deps/skia/include/private/GrImageContext.h b/src/deps/skia/include/private/GrImageContext.h
new file mode 100644
index 000000000..8a9f558f3
--- /dev/null
+++ b/src/deps/skia/include/private/GrImageContext.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2019 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrImageContext_DEFINED
+#define GrImageContext_DEFINED
+
+#include "include/private/GrContext_Base.h"
+#include "include/private/GrSingleOwner.h"
+
+class GrImageContextPriv;
+
+// This is now just a view on a ThreadSafeProxy, that SkImages can attempt to
+// downcast to a GrDirectContext as a backdoor to some operations. Once we remove the backdoors,
+// this goes away and SkImages just hold ThreadSafeProxies.
+class GrImageContext : public GrContext_Base {
+public:
+    ~GrImageContext() override;
+
+    // Provides access to functions that aren't part of the public API.
+    GrImageContextPriv priv();
+    const GrImageContextPriv priv() const;  // NOLINT(readability-const-return-type)
+
+protected:
+    friend class GrImageContextPriv; // for hidden functions
+
+    GrImageContext(sk_sp<GrContextThreadSafeProxy>);
+
+    SK_API virtual void abandonContext();
+    SK_API virtual bool abandoned();
+
+    /** This is only useful for debug purposes */
+    GrSingleOwner* singleOwner() const { return &fSingleOwner; }
+
+    GrImageContext* asImageContext() override { return this; }
+
+private:
+    // When making promise images, we currently need a placeholder GrImageContext instance to give
+    // to the SkImage that has no real power, just a wrapper around the ThreadSafeProxy.
+    // TODO: De-power SkImage to ThreadSafeProxy or at least figure out a way to share one instance.
+    static sk_sp<GrImageContext> MakeForPromiseImage(sk_sp<GrContextThreadSafeProxy>);
+
+    // In debug builds we guard against improper thread handling
+    // This guard is passed to the GrDrawingManager and, from there to all the
+    // GrSurfaceDrawContexts.  It is also passed to the GrResourceProvider and SkGpuDevice.
+    // TODO: Move this down to GrRecordingContext.
+    mutable GrSingleOwner            fSingleOwner;
+
+    using INHERITED = GrContext_Base;
+};
+
+#endif
diff --git a/src/deps/skia/include/private/GrMockTypesPriv.h b/src/deps/skia/include/private/GrMockTypesPriv.h
new file mode 100644
index 000000000..fc72c7fd9
--- /dev/null
+++ b/src/deps/skia/include/private/GrMockTypesPriv.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrMockTypesPriv_DEFINED
+#define GrMockTypesPriv_DEFINED
+
+#include "include/gpu/mock/GrMockTypes.h"
+
+struct GrMockTextureSpec {
+    GrMockTextureSpec()
+            : fColorType(GrColorType::kUnknown)
+             , fCompressionType(SkImage::CompressionType::kNone) {}
+    GrMockTextureSpec(const GrMockSurfaceInfo& info)
+            : fColorType(info.fColorType)
+            , fCompressionType(info.fCompressionType) {}
+
+    GrColorType fColorType = GrColorType::kUnknown;
+    SkImage::CompressionType fCompressionType = SkImage::CompressionType::kNone;
+};
+
+GrMockSurfaceInfo GrMockTextureSpecToSurfaceInfo(const GrMockTextureSpec& mockSpec,
+                                                 uint32_t sampleCount,
+                                                 uint32_t levelCount,
+                                                 GrProtected isProtected);
+
+#endif
+
diff --git a/src/deps/skia/include/private/GrMtlTypesPriv.h b/src/deps/skia/include/private/GrMtlTypesPriv.h
new file mode 100644
index 000000000..550d01760
--- /dev/null
+++ b/src/deps/skia/include/private/GrMtlTypesPriv.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2021 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrMtlTypesPriv_DEFINED
+#define GrMtlTypesPriv_DEFINED
+
+#include "include/gpu/GrTypes.h"
+#include "include/gpu/mtl/GrMtlTypes.h"
+
+///////////////////////////////////////////////////////////////////////////////
+
+#ifdef __APPLE__
+
+#include <TargetConditionals.h>
+
+#if defined(SK_BUILD_FOR_MAC)
+#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 110000
+#define GR_METAL_SDK_VERSION 230
+#elif __MAC_OS_X_VERSION_MAX_ALLOWED >= 101500
+#define GR_METAL_SDK_VERSION 220
+#elif __MAC_OS_X_VERSION_MAX_ALLOWED >= 101400
+#define GR_METAL_SDK_VERSION 210
+#else
+#error Must use at least 10.14 SDK to build Metal backend for MacOS
+#endif
+#else
+#if __IPHONE_OS_VERSION_MAX_ALLOWED >= 140000 || __TV_OS_VERSION_MAX_ALLOWED >= 140000
+#define GR_METAL_SDK_VERSION 230
+#elif __IPHONE_OS_VERSION_MAX_ALLOWED >= 130000 || __TV_OS_VERSION_MAX_ALLOWED >= 130000
+#define GR_METAL_SDK_VERSION 220
+#elif __IPHONE_OS_VERSION_MAX_ALLOWED >= 120000 || __TV_OS_VERSION_MAX_ALLOWED >= 120000
+#define GR_METAL_SDK_VERSION 210
+#else
+#error Must use at least 12.00 SDK to build Metal backend for iOS
+#endif
+#endif
+
+#if __has_feature(objc_arc) && __has_attribute(objc_externally_retained)
+#define GR_NORETAIN __attribute__((objc_externally_retained))
+#define GR_NORETAIN_BEGIN \
+    _Pragma("clang attribute push (__attribute__((objc_externally_retained)), apply_to=any(function,objc_method))")
+#define GR_NORETAIN_END _Pragma("clang attribute pop")
+#else
+#define GR_NORETAIN
+#define GR_NORETAIN_BEGIN
+#define GR_NORETAIN_END
+#endif
+
+struct GrMtlTextureSpec {
+    GrMtlTextureSpec()
+            : fFormat(0)
+            , fUsage(0)
+            , fStorageMode(0) {}
+    GrMtlTextureSpec(const GrMtlSurfaceInfo& info)
+            : fFormat(info.fFormat)
+            , fUsage(info.fUsage)
+            , fStorageMode(info.fStorageMode) {}
+
+    GrMTLPixelFormat fFormat;
+    GrMTLTextureUsage fUsage;
+    GrMTLStorageMode fStorageMode;
+};
+
+GrMtlSurfaceInfo GrMtlTextureSpecToSurfaceInfo(const GrMtlTextureSpec& mtlSpec,
+                                               uint32_t sampleCount,
+                                               uint32_t levelCount,
+                                               GrProtected isProtected);
+
+#endif  // __APPLE__
+
+#endif  // GrMtlTypesPriv_DEFINED
diff --git a/src/deps/skia/include/private/GrSingleOwner.h b/src/deps/skia/include/private/GrSingleOwner.h
new file mode 100644
index 000000000..f612bb5fc
--- /dev/null
+++ b/src/deps/skia/include/private/GrSingleOwner.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrSingleOwner_DEFINED
+#define GrSingleOwner_DEFINED
+
+#include "include/core/SkTypes.h"
+
+#ifdef SK_DEBUG
+#include "include/private/SkMutex.h"
+#include "include/private/SkThreadID.h"
+
+#define GR_ASSERT_SINGLE_OWNER(obj) \
+    GrSingleOwner::AutoEnforce debug_SingleOwner(obj, __FILE__, __LINE__);
+
+// This is a debug tool to verify an object is only being used from one thread at a time.
+class GrSingleOwner {
+public:
+     GrSingleOwner() : fOwner(kIllegalThreadID), fReentranceCount(0) {}
+
+     struct AutoEnforce {
+         AutoEnforce(GrSingleOwner* so, const char* file, int line)
+                : fFile(file), fLine(line), fSO(so) {
+             fSO->enter(file, line);
+         }
+         ~AutoEnforce() { fSO->exit(fFile, fLine); }
+
+         const char* fFile;
+         int fLine;
+         GrSingleOwner* fSO;
+     };
+
+private:
+     void enter(const char* file, int line) {
+         SkAutoMutexExclusive lock(fMutex);
+         SkThreadID self = SkGetThreadID();
+         SkASSERTF(fOwner == self || fOwner == kIllegalThreadID, "%s:%d Single owner failure.",
+                   file, line);
+         fReentranceCount++;
+         fOwner = self;
+     }
+
+     void exit(const char* file, int line) {
+         SkAutoMutexExclusive lock(fMutex);
+         SkASSERTF(fOwner == SkGetThreadID(), "%s:%d Single owner failure.", file, line);
+         fReentranceCount--;
+         if (fReentranceCount == 0) {
+             fOwner = kIllegalThreadID;
+         }
+     }
+
+     SkMutex fMutex;
+     SkThreadID fOwner    SK_GUARDED_BY(fMutex);
+     int fReentranceCount SK_GUARDED_BY(fMutex);
+};
+#else
+#define GR_ASSERT_SINGLE_OWNER(obj)
+class GrSingleOwner {}; // Provide a no-op implementation so we can pass pointers to constructors
+#endif
+
+#endif
diff --git a/src/deps/skia/include/private/GrTypesPriv.h b/src/deps/skia/include/private/GrTypesPriv.h
new file mode 100644
index 000000000..cba4b4c79
--- /dev/null
+++ b/src/deps/skia/include/private/GrTypesPriv.h
@@ -0,0 +1,1354 @@
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrTypesPriv_DEFINED
+#define GrTypesPriv_DEFINED
+
+#include <chrono>
+#include "include/core/SkImage.h"
+#include "include/core/SkImageInfo.h"
+#include "include/core/SkPath.h"
+#include "include/core/SkRefCnt.h"
+#include "include/gpu/GrTypes.h"
+#include "include/private/SkImageInfoPriv.h"
+#include "include/private/SkMacros.h"
+
+class GrBackendFormat;
+class GrCaps;
+class GrSurfaceProxy;
+
+// The old libstdc++ uses the draft name "monotonic_clock" rather than "steady_clock". This might
+// not actually be monotonic, depending on how libstdc++ was built. However, this is only currently
+// used for idle resource purging so it shouldn't cause a correctness problem.
+#if defined(__GLIBCXX__) && (__GLIBCXX__ < 20130000)
+using GrStdSteadyClock = std::chrono::monotonic_clock;
+#else
+using GrStdSteadyClock = std::chrono::steady_clock;
+#endif
+
+/**
+ *  divide, rounding up
+ */
+
+static inline constexpr size_t GrSizeDivRoundUp(size_t x, size_t y) { return (x + (y - 1)) / y; }
+
+/**
+ * Geometric primitives used for drawing.
+ */
+enum class GrPrimitiveType : uint8_t {
+    kTriangles,
+    kTriangleStrip,
+    kPoints,
+    kLines,          // 1 pix wide only
+    kLineStrip,      // 1 pix wide only
+    kPatches,
+    kPath
+};
+static constexpr int kNumGrPrimitiveTypes = (int)GrPrimitiveType::kPath + 1;
+
+static constexpr bool GrIsPrimTypeLines(GrPrimitiveType type) {
+    return GrPrimitiveType::kLines == type || GrPrimitiveType::kLineStrip == type;
+}
+
+enum class GrPrimitiveRestart : bool {
+    kNo = false,
+    kYes = true
+};
+
+/**
+ * Should a created surface be texturable?
+ */
+enum class GrTexturable : bool {
+    kNo = false,
+    kYes = true
+};
+
+// A DDL recorder has its own proxy provider and proxy cache. This enum indicates if
+// a given proxy provider is one of these special ones.
+enum class GrDDLProvider : bool {
+    kNo = false,
+    kYes = true
+};
+
+/**
+ *  Formats for masks, used by the font cache. Important that these are 0-based.
+ */
+enum GrMaskFormat {
+    kA8_GrMaskFormat,    //!< 1-byte per pixel
+    kA565_GrMaskFormat,  //!< 2-bytes per pixel, RGB represent 3-channel LCD coverage
+    kARGB_GrMaskFormat,  //!< 4-bytes per pixel, color format
+
+    kLast_GrMaskFormat = kARGB_GrMaskFormat
+};
+static const int kMaskFormatCount = kLast_GrMaskFormat + 1;
+
+/**
+ *  Return the number of bytes-per-pixel for the specified mask format.
+ */
+inline constexpr int GrMaskFormatBytesPerPixel(GrMaskFormat format) {
+    SkASSERT(format < kMaskFormatCount);
+    // kA8   (0) -> 1
+    // kA565 (1) -> 2
+    // kARGB (2) -> 4
+    static_assert(kA8_GrMaskFormat == 0, "enum_order_dependency");
+    static_assert(kA565_GrMaskFormat == 1, "enum_order_dependency");
+    static_assert(kARGB_GrMaskFormat == 2, "enum_order_dependency");
+
+    return SkTo<int>(1u << format);
+}
+
+/** Ownership rules for external GPU resources imported into Skia. */
+enum GrWrapOwnership {
+    /** Skia will assume the client will keep the resource alive and Skia will not free it. */
+    kBorrow_GrWrapOwnership,
+
+    /** Skia will assume ownership of the resource and free it. */
+    kAdopt_GrWrapOwnership,
+};
+
+enum class GrWrapCacheable : bool {
+    /**
+     * The wrapped resource will be removed from the cache as soon as it becomes purgeable. It may
+     * still be assigned and found by a unique key, but the presence of the key will not be used to
+     * keep the resource alive when it has no references.
+     */
+    kNo = false,
+    /**
+     * The wrapped resource is allowed to remain in the GrResourceCache when it has no references
+     * but has a unique key. Such resources should only be given unique keys when it is known that
+     * the key will eventually be removed from the resource or invalidated via the message bus.
+     */
+    kYes = true
+};
+
+enum class GrBudgetedType : uint8_t {
+    /** The resource is budgeted and is subject to purging under budget pressure. */
+    kBudgeted,
+    /**
+     * The resource is unbudgeted and is purged as soon as it has no refs regardless of whether
+     * it has a unique or scratch key.
+     */
+    kUnbudgetedUncacheable,
+    /**
+     * The resource is unbudgeted and is allowed to remain in the cache with no refs if it
+     * has a unique key. Scratch keys are ignored.
+     */
+    kUnbudgetedCacheable,
+};
+
+enum class GrScissorTest : bool {
+    kDisabled = false,
+    kEnabled = true
+};
+
+/*
+ * Used to say whether texture is backed by memory.
+ */
+enum class GrMemoryless : bool {
+    /**
+     * The texture will be allocated normally and will affect memory budgets.
+     */
+    kNo = false,
+    /**
+     * The texture will be not use GPU memory and will not affect memory budgets.
+     */
+    kYes = true
+};
+
+struct GrMipLevel {
+    const void* fPixels = nullptr;
+    size_t fRowBytes = 0;
+    // This may be used to keep fPixels from being freed while a GrMipLevel exists.
+    sk_sp<SkData> fOptionalStorage;
+};
+
+enum class GrSemaphoreWrapType {
+    kWillSignal,
+    kWillWait,
+};
+
+/**
+ * This enum is used to specify the load operation to be used when an OpsTask/GrOpsRenderPass
+ * begins execution.
+ */
+enum class GrLoadOp {
+    kLoad,
+    kClear,
+    kDiscard,
+};
+
+/**
+ * This enum is used to specify the store operation to be used when an OpsTask/GrOpsRenderPass
+ * ends execution.
+ */
+enum class GrStoreOp {
+    kStore,
+    kDiscard,
+};
+
+/**
+ * Used to control antialiasing in draw calls.
+ */
+enum class GrAA : bool {
+    kNo = false,
+    kYes = true
+};
+
+enum class GrFillRule : bool {
+    kNonzero,
+    kEvenOdd
+};
+
+inline GrFillRule GrFillRuleForPathFillType(SkPathFillType fillType) {
+    switch (fillType) {
+        case SkPathFillType::kWinding:
+        case SkPathFillType::kInverseWinding:
+            return GrFillRule::kNonzero;
+        case SkPathFillType::kEvenOdd:
+        case SkPathFillType::kInverseEvenOdd:
+            return GrFillRule::kEvenOdd;
+    }
+    SkUNREACHABLE;
+}
+
+inline GrFillRule GrFillRuleForSkPath(const SkPath& path) {
+    return GrFillRuleForPathFillType(path.getFillType());
+}
+
+/** This enum indicates the type of antialiasing to be performed. */
+enum class GrAAType : unsigned {
+    /** No antialiasing */
+    kNone,
+    /** Use fragment shader code to blend with a fractional pixel coverage. */
+    kCoverage,
+    /** Use normal MSAA. */
+    kMSAA,
+
+    kLast = kMSAA
+};
+static const int kGrAATypeCount = static_cast<int>(GrAAType::kLast) + 1;
+
+static constexpr bool GrAATypeIsHW(GrAAType type) {
+    switch (type) {
+        case GrAAType::kNone:
+            return false;
+        case GrAAType::kCoverage:
+            return false;
+        case GrAAType::kMSAA:
+            return true;
+    }
+    SkUNREACHABLE;
+}
+
+/**
+ * Some pixel configs are inherently clamped to [0,1], some are allowed to go outside that range,
+ * and some are FP but manually clamped in the XP.
+ */
+enum class GrClampType {
+    kAuto,    // Normalized, fixed-point configs
+    kManual,  // Clamped FP configs
+    kNone,    // Normal (unclamped) FP configs
+};
+
+/**
+ * A number of rectangle/quadrilateral drawing APIs can control anti-aliasing on a per edge basis.
+ * These masks specify which edges are AA'ed. The intent for this is to support tiling with seamless
+ * boundaries, where the inner edges are non-AA and the outer edges are AA. Regular draws (where AA
+ * is specified by GrAA) is almost equivalent to kNone or kAll, with the exception of how MSAA is
+ * handled.
+ *
+ * When tiling and there is MSAA, mixed edge rectangles are processed with MSAA, so in order for the
+ * tiled edges to remain seamless, inner tiles with kNone must also be processed with MSAA. In
+ * regular drawing, however, kNone should disable MSAA (if it's supported) to match the expected
+ * appearance.
+ *
+ * Therefore, APIs that use per-edge AA flags also take a GrAA value so that they can differentiate
+ * between the regular and tiling use case behaviors. Tiling operations should always pass
+ * GrAA::kYes while regular options should pass GrAA based on the SkPaint's anti-alias state.
+ *
+ * These values are identical to SkCanvas::QuadAAFlags.
+ */
+enum class GrQuadAAFlags {
+    kLeft   = 0b0001,
+    kTop    = 0b0010,
+    kRight  = 0b0100,
+    kBottom = 0b1000,
+
+    kNone = 0b0000,
+    kAll  = 0b1111,
+};
+
+GR_MAKE_BITFIELD_CLASS_OPS(GrQuadAAFlags)
+
+static inline GrQuadAAFlags SkToGrQuadAAFlags(unsigned flags) {
+    return static_cast<GrQuadAAFlags>(flags);
+}
+
+/**
+ * Types of shader-language-specific boxed variables we can create.
+ */
+enum GrSLType {
+    kVoid_GrSLType,
+    kBool_GrSLType,
+    kBool2_GrSLType,
+    kBool3_GrSLType,
+    kBool4_GrSLType,
+    kShort_GrSLType,
+    kShort2_GrSLType,
+    kShort3_GrSLType,
+    kShort4_GrSLType,
+    kUShort_GrSLType,
+    kUShort2_GrSLType,
+    kUShort3_GrSLType,
+    kUShort4_GrSLType,
+    kFloat_GrSLType,
+    kFloat2_GrSLType,
+    kFloat3_GrSLType,
+    kFloat4_GrSLType,
+    kFloat2x2_GrSLType,
+    kFloat3x3_GrSLType,
+    kFloat4x4_GrSLType,
+    kHalf_GrSLType,
+    kHalf2_GrSLType,
+    kHalf3_GrSLType,
+    kHalf4_GrSLType,
+    kHalf2x2_GrSLType,
+    kHalf3x3_GrSLType,
+    kHalf4x4_GrSLType,
+    kInt_GrSLType,
+    kInt2_GrSLType,
+    kInt3_GrSLType,
+    kInt4_GrSLType,
+    kUInt_GrSLType,
+    kUInt2_GrSLType,
+    kUInt3_GrSLType,
+    kUInt4_GrSLType,
+    kTexture2DSampler_GrSLType,
+    kTextureExternalSampler_GrSLType,
+    kTexture2DRectSampler_GrSLType,
+    kTexture2D_GrSLType,
+    kSampler_GrSLType,
+    kInput_GrSLType,
+
+    kLast_GrSLType = kInput_GrSLType
+};
+static const int kGrSLTypeCount = kLast_GrSLType + 1;
+
+/**
+ * The type of texture. Backends other than GL currently only use the 2D value but the type must
+ * still be known at the API-neutral layer as it used to determine whether MIP maps, renderability,
+ * and sampling parameters are legal for proxies that will be instantiated with wrapped textures.
+ */
+enum class GrTextureType {
+    kNone,
+    k2D,
+    /* Rectangle uses unnormalized texture coordinates. */
+    kRectangle,
+    kExternal
+};
+
+enum GrShaderType {
+    kVertex_GrShaderType,
+    kFragment_GrShaderType,
+
+    kLastkFragment_GrShaderType = kFragment_GrShaderType
+};
+static const int kGrShaderTypeCount = kLastkFragment_GrShaderType + 1;
+
+enum GrShaderFlags {
+    kNone_GrShaderFlags          = 0,
+    kVertex_GrShaderFlag         = 1 << 0,
+    kTessControl_GrShaderFlag    = 1 << 1,
+    kTessEvaluation_GrShaderFlag = 1 << 2,
+    kFragment_GrShaderFlag       = 1 << 3
+};
+SK_MAKE_BITFIELD_OPS(GrShaderFlags)
+
+/** Is the shading language type float (including vectors/matrices)? */
+static constexpr bool GrSLTypeIsFloatType(GrSLType type) {
+    switch (type) {
+        case kFloat_GrSLType:
+        case kFloat2_GrSLType:
+        case kFloat3_GrSLType:
+        case kFloat4_GrSLType:
+        case kFloat2x2_GrSLType:
+        case kFloat3x3_GrSLType:
+        case kFloat4x4_GrSLType:
+        case kHalf_GrSLType:
+        case kHalf2_GrSLType:
+        case kHalf3_GrSLType:
+        case kHalf4_GrSLType:
+        case kHalf2x2_GrSLType:
+        case kHalf3x3_GrSLType:
+        case kHalf4x4_GrSLType:
+            return true;
+
+        case kVoid_GrSLType:
+        case kTexture2DSampler_GrSLType:
+        case kTextureExternalSampler_GrSLType:
+        case kTexture2DRectSampler_GrSLType:
+        case kBool_GrSLType:
+        case kBool2_GrSLType:
+        case kBool3_GrSLType:
+        case kBool4_GrSLType:
+        case kShort_GrSLType:
+        case kShort2_GrSLType:
+        case kShort3_GrSLType:
+        case kShort4_GrSLType:
+        case kUShort_GrSLType:
+        case kUShort2_GrSLType:
+        case kUShort3_GrSLType:
+        case kUShort4_GrSLType:
+        case kInt_GrSLType:
+        case kInt2_GrSLType:
+        case kInt3_GrSLType:
+        case kInt4_GrSLType:
+        case kUInt_GrSLType:
+        case kUInt2_GrSLType:
+        case kUInt3_GrSLType:
+        case kUInt4_GrSLType:
+        case kTexture2D_GrSLType:
+        case kSampler_GrSLType:
+        case kInput_GrSLType:
+            return false;
+    }
+    SkUNREACHABLE;
+}
+
+/** Is the shading language type integral (including vectors)? */
+static constexpr bool GrSLTypeIsIntegralType(GrSLType type) {
+    switch (type) {
+        case kShort_GrSLType:
+        case kShort2_GrSLType:
+        case kShort3_GrSLType:
+        case kShort4_GrSLType:
+        case kUShort_GrSLType:
+        case kUShort2_GrSLType:
+        case kUShort3_GrSLType:
+        case kUShort4_GrSLType:
+        case kInt_GrSLType:
+        case kInt2_GrSLType:
+        case kInt3_GrSLType:
+        case kInt4_GrSLType:
+        case kUInt_GrSLType:
+        case kUInt2_GrSLType:
+        case kUInt3_GrSLType:
+        case kUInt4_GrSLType:
+            return true;
+
+        case kFloat_GrSLType:
+        case kFloat2_GrSLType:
+        case kFloat3_GrSLType:
+        case kFloat4_GrSLType:
+        case kFloat2x2_GrSLType:
+        case kFloat3x3_GrSLType:
+        case kFloat4x4_GrSLType:
+        case kHalf_GrSLType:
+        case kHalf2_GrSLType:
+        case kHalf3_GrSLType:
+        case kHalf4_GrSLType:
+        case kHalf2x2_GrSLType:
+        case kHalf3x3_GrSLType:
+        case kHalf4x4_GrSLType:
+        case kVoid_GrSLType:
+        case kTexture2DSampler_GrSLType:
+        case kTextureExternalSampler_GrSLType:
+        case kTexture2DRectSampler_GrSLType:
+        case kBool_GrSLType:
+        case kBool2_GrSLType:
+        case kBool3_GrSLType:
+        case kBool4_GrSLType:
+        case kTexture2D_GrSLType:
+        case kSampler_GrSLType:
+        case kInput_GrSLType:
+            return false;
+    }
+    SkUNREACHABLE;
+}
+
+/**
+ * Is the shading language type supported as a uniform (ie, does it have a corresponding set
+ * function on GrGLSLProgramDataManager)?
+ */
+static constexpr bool GrSLTypeCanBeUniformValue(GrSLType type) {
+    return GrSLTypeIsFloatType(type) || GrSLTypeIsIntegralType(type);
+}
+
+/** If the type represents a single value or vector return the vector length, else -1. */
+static constexpr int GrSLTypeVecLength(GrSLType type) {
+    switch (type) {
+        case kFloat_GrSLType:
+        case kHalf_GrSLType:
+        case kBool_GrSLType:
+        case kShort_GrSLType:
+        case kUShort_GrSLType:
+        case kInt_GrSLType:
+        case kUInt_GrSLType:
+            return 1;
+
+        case kFloat2_GrSLType:
+        case kHalf2_GrSLType:
+        case kBool2_GrSLType:
+        case kShort2_GrSLType:
+        case kUShort2_GrSLType:
+        case kInt2_GrSLType:
+        case kUInt2_GrSLType:
+            return 2;
+
+        case kFloat3_GrSLType:
+        case kHalf3_GrSLType:
+        case kBool3_GrSLType:
+        case kShort3_GrSLType:
+        case kUShort3_GrSLType:
+        case kInt3_GrSLType:
+        case kUInt3_GrSLType:
+            return 3;
+
+        case kFloat4_GrSLType:
+        case kHalf4_GrSLType:
+        case kBool4_GrSLType:
+        case kShort4_GrSLType:
+        case kUShort4_GrSLType:
+        case kInt4_GrSLType:
+        case kUInt4_GrSLType:
+            return 4;
+
+        case kFloat2x2_GrSLType:
+        case kFloat3x3_GrSLType:
+        case kFloat4x4_GrSLType:
+        case kHalf2x2_GrSLType:
+        case kHalf3x3_GrSLType:
+        case kHalf4x4_GrSLType:
+        case kVoid_GrSLType:
+        case kTexture2DSampler_GrSLType:
+        case kTextureExternalSampler_GrSLType:
+        case kTexture2DRectSampler_GrSLType:
+        case kTexture2D_GrSLType:
+        case kSampler_GrSLType:
+        case kInput_GrSLType:
+            return -1;
+    }
+    SkUNREACHABLE;
+}
+
+static inline GrSLType GrSLCombinedSamplerTypeForTextureType(GrTextureType type) {
+    switch (type) {
+        case GrTextureType::k2D:
+            return kTexture2DSampler_GrSLType;
+        case GrTextureType::kRectangle:
+            return kTexture2DRectSampler_GrSLType;
+        case GrTextureType::kExternal:
+            return kTextureExternalSampler_GrSLType;
+        default:
+            SK_ABORT("Unexpected texture type");
+    }
+}
+
+/** Rectangle and external textures only support the clamp wrap mode and do not support
+ *  MIP maps.
+ */
+static inline bool GrTextureTypeHasRestrictedSampling(GrTextureType type) {
+    switch (type) {
+        case GrTextureType::k2D:
+            return false;
+        case GrTextureType::kRectangle:
+            return true;
+        case GrTextureType::kExternal:
+            return true;
+        default:
+            SK_ABORT("Unexpected texture type");
+    }
+}
+
+static constexpr bool GrSLTypeIsCombinedSamplerType(GrSLType type) {
+    switch (type) {
+        case kTexture2DSampler_GrSLType:
+        case kTextureExternalSampler_GrSLType:
+        case kTexture2DRectSampler_GrSLType:
+            return true;
+
+        case kVoid_GrSLType:
+        case kFloat_GrSLType:
+        case kFloat2_GrSLType:
+        case kFloat3_GrSLType:
+        case kFloat4_GrSLType:
+        case kFloat2x2_GrSLType:
+        case kFloat3x3_GrSLType:
+        case kFloat4x4_GrSLType:
+        case kHalf_GrSLType:
+        case kHalf2_GrSLType:
+        case kHalf3_GrSLType:
+        case kHalf4_GrSLType:
+        case kHalf2x2_GrSLType:
+        case kHalf3x3_GrSLType:
+        case kHalf4x4_GrSLType:
+        case kInt_GrSLType:
+        case kInt2_GrSLType:
+        case kInt3_GrSLType:
+        case kInt4_GrSLType:
+        case kUInt_GrSLType:
+        case kUInt2_GrSLType:
+        case kUInt3_GrSLType:
+        case kUInt4_GrSLType:
+        case kBool_GrSLType:
+        case kBool2_GrSLType:
+        case kBool3_GrSLType:
+        case kBool4_GrSLType:
+        case kShort_GrSLType:
+        case kShort2_GrSLType:
+        case kShort3_GrSLType:
+        case kShort4_GrSLType:
+        case kUShort_GrSLType:
+        case kUShort2_GrSLType:
+        case kUShort3_GrSLType:
+        case kUShort4_GrSLType:
+        case kTexture2D_GrSLType:
+        case kSampler_GrSLType:
+        case kInput_GrSLType:
+            return false;
+    }
+    SkUNREACHABLE;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Types used to describe format of vertices in arrays.
+ */
+enum GrVertexAttribType {
+    kFloat_GrVertexAttribType = 0,
+    kFloat2_GrVertexAttribType,
+    kFloat3_GrVertexAttribType,
+    kFloat4_GrVertexAttribType,
+    kHalf_GrVertexAttribType,
+    kHalf2_GrVertexAttribType,
+    kHalf4_GrVertexAttribType,
+
+    kInt2_GrVertexAttribType,   // vector of 2 32-bit ints
+    kInt3_GrVertexAttribType,   // vector of 3 32-bit ints
+    kInt4_GrVertexAttribType,   // vector of 4 32-bit ints
+
+
+    kByte_GrVertexAttribType,  // signed byte
+    kByte2_GrVertexAttribType, // vector of 2 8-bit signed bytes
+    kByte4_GrVertexAttribType, // vector of 4 8-bit signed bytes
+    kUByte_GrVertexAttribType,  // unsigned byte
+    kUByte2_GrVertexAttribType, // vector of 2 8-bit unsigned bytes
+    kUByte4_GrVertexAttribType, // vector of 4 8-bit unsigned bytes
+
+    kUByte_norm_GrVertexAttribType,  // unsigned byte, e.g. coverage, 0 -> 0.0f, 255 -> 1.0f.
+    kUByte4_norm_GrVertexAttribType, // vector of 4 unsigned bytes, e.g. colors, 0 -> 0.0f,
+                                     // 255 -> 1.0f.
+
+    kShort2_GrVertexAttribType,       // vector of 2 16-bit shorts.
+    kShort4_GrVertexAttribType,       // vector of 4 16-bit shorts.
+
+    kUShort2_GrVertexAttribType,      // vector of 2 unsigned shorts. 0 -> 0, 65535 -> 65535.
+    kUShort2_norm_GrVertexAttribType, // vector of 2 unsigned shorts. 0 -> 0.0f, 65535 -> 1.0f.
+
+    kInt_GrVertexAttribType,
+    kUInt_GrVertexAttribType,
+
+    kUShort_norm_GrVertexAttribType,
+
+    kUShort4_norm_GrVertexAttribType, // vector of 4 unsigned shorts. 0 -> 0.0f, 65535 -> 1.0f.
+
+    kLast_GrVertexAttribType = kUShort4_norm_GrVertexAttribType
+};
+static const int kGrVertexAttribTypeCount = kLast_GrVertexAttribType + 1;
+
+//////////////////////////////////////////////////////////////////////////////
+
+/**
+ * We have coverage effects that clip rendering to the edge of some geometric primitive.
+ * This enum specifies how that clipping is performed. Not all factories that take a
+ * GrClipEdgeType will succeed with all values and it is up to the caller to verify success.
+ */
+enum class GrClipEdgeType {
+    kFillBW,
+    kFillAA,
+    kInverseFillBW,
+    kInverseFillAA,
+
+    kLast = kInverseFillAA
+};
+static const int kGrClipEdgeTypeCnt = (int) GrClipEdgeType::kLast + 1;
+
+static constexpr bool GrClipEdgeTypeIsFill(const GrClipEdgeType edgeType) {
+    return (GrClipEdgeType::kFillAA == edgeType || GrClipEdgeType::kFillBW == edgeType);
+}
+
+static constexpr bool GrClipEdgeTypeIsInverseFill(const GrClipEdgeType edgeType) {
+    return (GrClipEdgeType::kInverseFillAA == edgeType ||
+            GrClipEdgeType::kInverseFillBW == edgeType);
+}
+
+static constexpr bool GrClipEdgeTypeIsAA(const GrClipEdgeType edgeType) {
+    return (GrClipEdgeType::kFillBW != edgeType &&
+            GrClipEdgeType::kInverseFillBW != edgeType);
+}
+
+static inline GrClipEdgeType GrInvertClipEdgeType(const GrClipEdgeType edgeType) {
+    switch (edgeType) {
+        case GrClipEdgeType::kFillBW:
+            return GrClipEdgeType::kInverseFillBW;
+        case GrClipEdgeType::kFillAA:
+            return GrClipEdgeType::kInverseFillAA;
+        case GrClipEdgeType::kInverseFillBW:
+            return GrClipEdgeType::kFillBW;
+        case GrClipEdgeType::kInverseFillAA:
+            return GrClipEdgeType::kFillAA;
+    }
+    SkUNREACHABLE;
+}
+
+/**
+ * Indicates the type of pending IO operations that can be recorded for gpu resources.
+ */
+enum GrIOType {
+    kRead_GrIOType,
+    kWrite_GrIOType,
+    kRW_GrIOType
+};
+
+/**
+ * Indicates the type of data that a GPU buffer will be used for.
+ */
+enum class GrGpuBufferType {
+    kVertex,
+    kIndex,
+    kDrawIndirect,
+    kXferCpuToGpu,
+    kXferGpuToCpu,
+    kUniform,
+};
+static const int kGrGpuBufferTypeCount = static_cast<int>(GrGpuBufferType::kUniform) + 1;
+
+/**
+ * Provides a performance hint regarding the frequency at which a data store will be accessed.
+ */
+enum GrAccessPattern {
+    /** Data store will be respecified repeatedly and used many times. */
+    kDynamic_GrAccessPattern,
+    /** Data store will be specified once and used many times. (Thus disqualified from caching.) */
+    kStatic_GrAccessPattern,
+    /** Data store will be specified once and used at most a few times. (Also can't be cached.) */
+    kStream_GrAccessPattern,
+
+    kLast_GrAccessPattern = kStream_GrAccessPattern
+};
+
+// Flags shared between the GrSurface & GrSurfaceProxy class hierarchies
+enum class GrInternalSurfaceFlags {
+    kNone                           = 0,
+
+    // Texture-level
+
+    // Means the pixels in the texture are read-only. Cannot also be a GrRenderTarget[Proxy].
+    kReadOnly                       = 1 << 0,
+
+    // RT-level
+
+    // This flag is for use with GL only. It tells us that the internal render target wraps FBO 0.
+    kGLRTFBOIDIs0                   = 1 << 1,
+
+    // This means the render target is multisampled, and internally holds a non-msaa texture for
+    // resolving into. The render target resolves itself by blitting into this internal texture.
+    // (asTexture() might or might not return the internal texture, but if it does, we always
+    // resolve the render target before accessing this texture's data.)
+    kRequiresManualMSAAResolve      = 1 << 2,
+
+    // This means the pixels in the render target are write-only. This is used for Dawn and Metal
+    // swap chain targets which can be rendered to, but not read or copied.
+    kFramebufferOnly                = 1 << 3,
+
+    // This is a Vulkan only flag. If set the surface can be used as an input attachment in a
+    // shader. This is used for doing in shader blending where we want to sample from the same
+    // image we are drawing to.
+    kVkRTSupportsInputAttachment    = 1 << 4,
+};
+
+GR_MAKE_BITFIELD_CLASS_OPS(GrInternalSurfaceFlags)
+
+// 'GR_MAKE_BITFIELD_CLASS_OPS' defines the & operator on GrInternalSurfaceFlags to return bool.
+// We want to find the bitwise & with these masks, so we declare them as ints.
+constexpr static int kGrInternalTextureFlagsMask = static_cast<int>(
+        GrInternalSurfaceFlags::kReadOnly);
+
+// We don't include kVkRTSupportsInputAttachment in this mask since we check it manually. We don't
+// require that both the surface and proxy have matching values for this flag. Instead we require
+// if the proxy has it set then the surface must also have it set. All other flags listed here must
+// match on the proxy and surface.
+// TODO: Add back kFramebufferOnly flag here once we update SkSurfaceCharacterization to take it
+// as a flag. skbug.com/10672
+constexpr static int kGrInternalRenderTargetFlagsMask = static_cast<int>(
+        GrInternalSurfaceFlags::kGLRTFBOIDIs0 |
+        GrInternalSurfaceFlags::kRequiresManualMSAAResolve/* |
+        GrInternalSurfaceFlags::kFramebufferOnly*/);
+
+constexpr static int kGrInternalTextureRenderTargetFlagsMask =
+        kGrInternalTextureFlagsMask | kGrInternalRenderTargetFlagsMask;
+
+#ifdef SK_DEBUG
+// Takes a pointer to a GrCaps, and will suppress prints if required
+#define GrCapsDebugf(caps, ...)  if (!(caps)->suppressPrints()) SkDebugf(__VA_ARGS__)
+#else
+#define GrCapsDebugf(caps, ...) do {} while (0)
+#endif
+
+/**
+ * Specifies if the holder owns the backend, OpenGL or Vulkan, object.
+ */
+enum class GrBackendObjectOwnership : bool {
+    /** Holder does not destroy the backend object. */
+    kBorrowed = false,
+    /** Holder destroys the backend object. */
+    kOwned = true
+};
+
+/*
+ * Object for CPU-GPU synchronization
+ */
+typedef uint64_t GrFence;
+
+/**
+ * Used to include or exclude specific GPU path renderers for testing purposes.
+ */
+enum class GpuPathRenderers {
+    kNone              =   0,  // Always use software masks and/or DefaultPathRenderer.
+    kDashLine          =   1 << 0,
+    kAtlas             =   1 << 1,
+    kTessellation      =   1 << 2,
+    kCoverageCounting  =   1 << 3,
+    kAAHairline        =   1 << 4,
+    kAAConvex          =   1 << 5,
+    kAALinearizing     =   1 << 6,
+    kSmall             =   1 << 7,
+    kTriangulating     =   1 << 8,
+    kDefault           = ((1 << 9) - 1)  // All path renderers.
+};
+
+/**
+ * Used to describe the current state of Mips on a GrTexture
+ */
+enum class GrMipmapStatus {
+    kNotAllocated, // Mips have not been allocated
+    kDirty,        // Mips are allocated but the full mip tree does not have valid data
+    kValid,        // All levels fully allocated and have valid data in them
+};
+
+GR_MAKE_BITFIELD_CLASS_OPS(GpuPathRenderers)
+
+/**
+ * Like SkColorType this describes a layout of pixel data in CPU memory. It specifies the channels,
+ * their type, and width. This exists so that the GPU backend can have private types that have no
+ * analog in the public facing SkColorType enum and omit types not implemented in the GPU backend.
+ * It does not refer to a texture format and the mapping to texture formats may be many-to-many.
+ * It does not specify the sRGB encoding of the stored values. The components are listed in order of
+ * where they appear in memory. In other words the first component listed is in the low bits and
+ * the last component in the high bits.
+ */
+enum class GrColorType {
+    kUnknown,
+    kAlpha_8,
+    kBGR_565,
+    kABGR_4444,  // This name differs from SkColorType. kARGB_4444_SkColorType is misnamed.
+    kRGBA_8888,
+    kRGBA_8888_SRGB,
+    kRGB_888x,
+    kRG_88,
+    kBGRA_8888,
+    kRGBA_1010102,
+    kBGRA_1010102,
+    kGray_8,
+    kGrayAlpha_88,
+    kAlpha_F16,
+    kRGBA_F16,
+    kRGBA_F16_Clamped,
+    kRGBA_F32,
+
+    kAlpha_16,
+    kRG_1616,
+    kRG_F16,
+    kRGBA_16161616,
+
+    // Unusual types that come up after reading back in cases where we are reassigning the meaning
+    // of a texture format's channels to use for a particular color format but have to read back the
+    // data to a full RGBA quadruple. (e.g. using a R8 texture format as A8 color type but the API
+    // only supports reading to RGBA8.) None of these have SkColorType equivalents.
+    kAlpha_8xxx,
+    kAlpha_F32xxx,
+    kGray_8xxx,
+
+    // Types used to initialize backend textures.
+    kRGB_888,
+    kR_8,
+    kR_16,
+    kR_F16,
+    kGray_F16,
+    kBGRA_4444,
+    kARGB_4444,
+
+    kLast = kARGB_4444
+};
+
+static const int kGrColorTypeCnt = static_cast<int>(GrColorType::kLast) + 1;
+
+static constexpr SkColorType GrColorTypeToSkColorType(GrColorType ct) {
+    switch (ct) {
+        case GrColorType::kUnknown:          return kUnknown_SkColorType;
+        case GrColorType::kAlpha_8:          return kAlpha_8_SkColorType;
+        case GrColorType::kBGR_565:          return kRGB_565_SkColorType;
+        case GrColorType::kABGR_4444:        return kARGB_4444_SkColorType;
+        case GrColorType::kRGBA_8888:        return kRGBA_8888_SkColorType;
+        case GrColorType::kRGBA_8888_SRGB:   return kSRGBA_8888_SkColorType;
+        case GrColorType::kRGB_888x:         return kRGB_888x_SkColorType;
+        case GrColorType::kRG_88:            return kR8G8_unorm_SkColorType;
+        case GrColorType::kBGRA_8888:        return kBGRA_8888_SkColorType;
+        case GrColorType::kRGBA_1010102:     return kRGBA_1010102_SkColorType;
+        case GrColorType::kBGRA_1010102:     return kBGRA_1010102_SkColorType;
+        case GrColorType::kGray_8:           return kGray_8_SkColorType;
+        case GrColorType::kGrayAlpha_88:     return kUnknown_SkColorType;
+        case GrColorType::kAlpha_F16:        return kA16_float_SkColorType;
+        case GrColorType::kRGBA_F16:         return kRGBA_F16_SkColorType;
+        case GrColorType::kRGBA_F16_Clamped: return kRGBA_F16Norm_SkColorType;
+        case GrColorType::kRGBA_F32:         return kRGBA_F32_SkColorType;
+        case GrColorType::kAlpha_8xxx:       return kUnknown_SkColorType;
+        case GrColorType::kAlpha_F32xxx:     return kUnknown_SkColorType;
+        case GrColorType::kGray_8xxx:        return kUnknown_SkColorType;
+        case GrColorType::kAlpha_16:         return kA16_unorm_SkColorType;
+        case GrColorType::kRG_1616:          return kR16G16_unorm_SkColorType;
+        case GrColorType::kRGBA_16161616:    return kR16G16B16A16_unorm_SkColorType;
+        case GrColorType::kRG_F16:           return kR16G16_float_SkColorType;
+        case GrColorType::kRGB_888:          return kUnknown_SkColorType;
+        case GrColorType::kR_8:              return kUnknown_SkColorType;
+        case GrColorType::kR_16:             return kUnknown_SkColorType;
+        case GrColorType::kR_F16:            return kUnknown_SkColorType;
+        case GrColorType::kGray_F16:         return kUnknown_SkColorType;
+        case GrColorType::kARGB_4444:        return kUnknown_SkColorType;
+        case GrColorType::kBGRA_4444:        return kUnknown_SkColorType;
+    }
+    SkUNREACHABLE;
+}
+
+static constexpr GrColorType SkColorTypeToGrColorType(SkColorType ct) {
+    switch (ct) {
+        case kUnknown_SkColorType:            return GrColorType::kUnknown;
+        case kAlpha_8_SkColorType:            return GrColorType::kAlpha_8;
+        case kRGB_565_SkColorType:            return GrColorType::kBGR_565;
+        case kARGB_4444_SkColorType:          return GrColorType::kABGR_4444;
+        case kRGBA_8888_SkColorType:          return GrColorType::kRGBA_8888;
+        case kSRGBA_8888_SkColorType:         return GrColorType::kRGBA_8888_SRGB;
+        case kRGB_888x_SkColorType:           return GrColorType::kRGB_888x;
+        case kBGRA_8888_SkColorType:          return GrColorType::kBGRA_8888;
+        case kGray_8_SkColorType:             return GrColorType::kGray_8;
+        case kRGBA_F16Norm_SkColorType:       return GrColorType::kRGBA_F16_Clamped;
+        case kRGBA_F16_SkColorType:           return GrColorType::kRGBA_F16;
+        case kRGBA_1010102_SkColorType:       return GrColorType::kRGBA_1010102;
+        case kRGB_101010x_SkColorType:        return GrColorType::kUnknown;
+        case kBGRA_1010102_SkColorType:       return GrColorType::kBGRA_1010102;
+        case kBGR_101010x_SkColorType:        return GrColorType::kUnknown;
+        case kRGBA_F32_SkColorType:           return GrColorType::kRGBA_F32;
+        case kR8G8_unorm_SkColorType:         return GrColorType::kRG_88;
+        case kA16_unorm_SkColorType:          return GrColorType::kAlpha_16;
+        case kR16G16_unorm_SkColorType:       return GrColorType::kRG_1616;
+        case kA16_float_SkColorType:          return GrColorType::kAlpha_F16;
+        case kR16G16_float_SkColorType:       return GrColorType::kRG_F16;
+        case kR16G16B16A16_unorm_SkColorType: return GrColorType::kRGBA_16161616;
+    }
+    SkUNREACHABLE;
+}
+
+static constexpr uint32_t GrColorTypeChannelFlags(GrColorType ct) {
+    switch (ct) {
+        case GrColorType::kUnknown:          return 0;
+        case GrColorType::kAlpha_8:          return kAlpha_SkColorChannelFlag;
+        case GrColorType::kBGR_565:          return kRGB_SkColorChannelFlags;
+        case GrColorType::kABGR_4444:        return kRGBA_SkColorChannelFlags;
+        case GrColorType::kRGBA_8888:        return kRGBA_SkColorChannelFlags;
+        case GrColorType::kRGBA_8888_SRGB:   return kRGBA_SkColorChannelFlags;
+        case GrColorType::kRGB_888x:         return kRGB_SkColorChannelFlags;
+        case GrColorType::kRG_88:            return kRG_SkColorChannelFlags;
+        case GrColorType::kBGRA_8888:        return kRGBA_SkColorChannelFlags;
+        case GrColorType::kRGBA_1010102:     return kRGBA_SkColorChannelFlags;
+        case GrColorType::kBGRA_1010102:     return kRGBA_SkColorChannelFlags;
+        case GrColorType::kGray_8:           return kGray_SkColorChannelFlag;
+        case GrColorType::kGrayAlpha_88:     return kGrayAlpha_SkColorChannelFlags;
+        case GrColorType::kAlpha_F16:        return kAlpha_SkColorChannelFlag;
+        case GrColorType::kRGBA_F16:         return kRGBA_SkColorChannelFlags;
+        case GrColorType::kRGBA_F16_Clamped: return kRGBA_SkColorChannelFlags;
+        case GrColorType::kRGBA_F32:         return kRGBA_SkColorChannelFlags;
+        case GrColorType::kAlpha_8xxx:       return kAlpha_SkColorChannelFlag;
+        case GrColorType::kAlpha_F32xxx:     return kAlpha_SkColorChannelFlag;
+        case GrColorType::kGray_8xxx:        return kGray_SkColorChannelFlag;
+        case GrColorType::kAlpha_16:         return kAlpha_SkColorChannelFlag;
+        case GrColorType::kRG_1616:          return kRG_SkColorChannelFlags;
+        case GrColorType::kRGBA_16161616:    return kRGBA_SkColorChannelFlags;
+        case GrColorType::kRG_F16:           return kRG_SkColorChannelFlags;
+        case GrColorType::kRGB_888:          return kRGB_SkColorChannelFlags;
+        case GrColorType::kR_8:              return kRed_SkColorChannelFlag;
+        case GrColorType::kR_16:             return kRed_SkColorChannelFlag;
+        case GrColorType::kR_F16:            return kRed_SkColorChannelFlag;
+        case GrColorType::kGray_F16:         return kGray_SkColorChannelFlag;
+        case GrColorType::kARGB_4444:        return kRGBA_SkColorChannelFlags;
+        case GrColorType::kBGRA_4444:        return kRGBA_SkColorChannelFlags;
+    }
+    SkUNREACHABLE;
+}
+
+/**
+ * Describes the encoding of channel data in a GrColorType.
+ */
+enum class GrColorTypeEncoding {
+    kUnorm,
+    kSRGBUnorm,
+    // kSnorm,
+    kFloat,
+    // kSint
+    // kUint
+};
+
+/**
+ * Describes a GrColorType by how many bits are used for each color component and how they are
+ * encoded. Currently all the non-zero channels share a single GrColorTypeEncoding. This could be
+ * expanded to store separate encodings and to indicate which bits belong to which components.
+ */
+class GrColorFormatDesc {
+public:
+    static constexpr GrColorFormatDesc MakeRGBA(int rgba, GrColorTypeEncoding e) {
+        return {rgba, rgba, rgba, rgba, 0, e};
+    }
+
+    static constexpr GrColorFormatDesc MakeRGBA(int rgb, int a, GrColorTypeEncoding e) {
+        return {rgb, rgb, rgb, a, 0, e};
+    }
+
+    static constexpr GrColorFormatDesc MakeRGB(int rgb, GrColorTypeEncoding e) {
+        return {rgb, rgb, rgb, 0, 0, e};
+    }
+
+    static constexpr GrColorFormatDesc MakeRGB(int r, int g, int b, GrColorTypeEncoding e) {
+        return {r, g, b, 0, 0, e};
+    }
+
+    static constexpr GrColorFormatDesc MakeAlpha(int a, GrColorTypeEncoding e) {
+        return {0, 0, 0, a, 0, e};
+    }
+
+    static constexpr GrColorFormatDesc MakeR(int r, GrColorTypeEncoding e) {
+        return {r, 0, 0, 0, 0, e};
+    }
+
+    static constexpr GrColorFormatDesc MakeRG(int rg, GrColorTypeEncoding e) {
+        return {rg, rg, 0, 0, 0, e};
+    }
+
+    static constexpr GrColorFormatDesc MakeGray(int grayBits, GrColorTypeEncoding e) {
+        return {0, 0, 0, 0, grayBits, e};
+    }
+
+    static constexpr GrColorFormatDesc MakeGrayAlpha(int grayAlpha, GrColorTypeEncoding e) {
+        return {0, 0, 0, 0, grayAlpha, e};
+    }
+
+    static constexpr GrColorFormatDesc MakeInvalid() { return {}; }
+
+    constexpr int r() const { return fRBits; }
+    constexpr int g() const { return fGBits; }
+    constexpr int b() const { return fBBits; }
+    constexpr int a() const { return fABits; }
+    constexpr int operator[](int c) const {
+        switch (c) {
+            case 0: return this->r();
+            case 1: return this->g();
+            case 2: return this->b();
+            case 3: return this->a();
+        }
+        SkUNREACHABLE;
+    }
+
+    constexpr int gray() const { return fGrayBits; }
+
+    constexpr GrColorTypeEncoding encoding() const { return fEncoding; }
+
+private:
+    int fRBits = 0;
+    int fGBits = 0;
+    int fBBits = 0;
+    int fABits = 0;
+    int fGrayBits = 0;
+    GrColorTypeEncoding fEncoding = GrColorTypeEncoding::kUnorm;
+
+    constexpr GrColorFormatDesc() = default;
+
+    constexpr GrColorFormatDesc(int r, int g, int b, int a, int gray, GrColorTypeEncoding encoding)
+            : fRBits(r), fGBits(g), fBBits(b), fABits(a), fGrayBits(gray), fEncoding(encoding) {
+        SkASSERT(r >= 0 && g >= 0 && b >= 0 && a >= 0 && gray >= 0);
+        SkASSERT(!gray || (!r && !g && !b));
+        SkASSERT(r || g || b || a || gray);
+    }
+};
+
+static constexpr GrColorFormatDesc GrGetColorTypeDesc(GrColorType ct) {
+    switch (ct) {
+        case GrColorType::kUnknown:
+            return GrColorFormatDesc::MakeInvalid();
+        case GrColorType::kAlpha_8:
+            return GrColorFormatDesc::MakeAlpha(8, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kBGR_565:
+            return GrColorFormatDesc::MakeRGB(5, 6, 5, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kABGR_4444:
+            return GrColorFormatDesc::MakeRGBA(4, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kRGBA_8888:
+            return GrColorFormatDesc::MakeRGBA(8, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kRGBA_8888_SRGB:
+            return GrColorFormatDesc::MakeRGBA(8, GrColorTypeEncoding::kSRGBUnorm);
+        case GrColorType::kRGB_888x:
+            return GrColorFormatDesc::MakeRGB(8, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kRG_88:
+            return GrColorFormatDesc::MakeRG(8, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kBGRA_8888:
+            return GrColorFormatDesc::MakeRGBA(8, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kRGBA_1010102:
+            return GrColorFormatDesc::MakeRGBA(10, 2, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kBGRA_1010102:
+            return GrColorFormatDesc::MakeRGBA(10, 2, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kGray_8:
+            return GrColorFormatDesc::MakeGray(8, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kGrayAlpha_88:
+            return GrColorFormatDesc::MakeGrayAlpha(8, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kAlpha_F16:
+            return GrColorFormatDesc::MakeAlpha(16, GrColorTypeEncoding::kFloat);
+        case GrColorType::kRGBA_F16:
+            return GrColorFormatDesc::MakeRGBA(16, GrColorTypeEncoding::kFloat);
+        case GrColorType::kRGBA_F16_Clamped:
+            return GrColorFormatDesc::MakeRGBA(16, GrColorTypeEncoding::kFloat);
+        case GrColorType::kRGBA_F32:
+            return GrColorFormatDesc::MakeRGBA(32, GrColorTypeEncoding::kFloat);
+        case GrColorType::kAlpha_8xxx:
+            return GrColorFormatDesc::MakeAlpha(8, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kAlpha_F32xxx:
+            return GrColorFormatDesc::MakeAlpha(32, GrColorTypeEncoding::kFloat);
+        case GrColorType::kGray_8xxx:
+            return GrColorFormatDesc::MakeGray(8, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kAlpha_16:
+            return GrColorFormatDesc::MakeAlpha(16, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kRG_1616:
+            return GrColorFormatDesc::MakeRG(16, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kRGBA_16161616:
+            return GrColorFormatDesc::MakeRGBA(16, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kRG_F16:
+            return GrColorFormatDesc::MakeRG(16, GrColorTypeEncoding::kFloat);
+        case GrColorType::kRGB_888:
+            return GrColorFormatDesc::MakeRGB(8, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kR_8:
+            return GrColorFormatDesc::MakeR(8, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kR_16:
+            return GrColorFormatDesc::MakeR(16, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kR_F16:
+            return GrColorFormatDesc::MakeR(16, GrColorTypeEncoding::kFloat);
+        case GrColorType::kGray_F16:
+            return GrColorFormatDesc::MakeGray(16, GrColorTypeEncoding::kFloat);
+        case GrColorType::kARGB_4444:
+            return GrColorFormatDesc::MakeRGBA(4, GrColorTypeEncoding::kUnorm);
+        case GrColorType::kBGRA_4444:
+            return GrColorFormatDesc::MakeRGBA(4, GrColorTypeEncoding::kUnorm);
+    }
+    SkUNREACHABLE;
+}
+
+static constexpr GrClampType GrColorTypeClampType(GrColorType colorType) {
+    if (GrGetColorTypeDesc(colorType).encoding() == GrColorTypeEncoding::kUnorm ||
+        GrGetColorTypeDesc(colorType).encoding() == GrColorTypeEncoding::kSRGBUnorm) {
+        return GrClampType::kAuto;
+    }
+    return GrColorType::kRGBA_F16_Clamped == colorType ? GrClampType::kManual : GrClampType::kNone;
+}
+
+// Consider a color type "wider" than n if it has more than n bits for any its representable
+// channels.
+static constexpr bool GrColorTypeIsWiderThan(GrColorType colorType, int n) {
+    SkASSERT(n > 0);
+    auto desc = GrGetColorTypeDesc(colorType);
+    return (desc.r() && desc.r() > n )||
+           (desc.g() && desc.g() > n) ||
+           (desc.b() && desc.b() > n) ||
+           (desc.a() && desc.a() > n) ||
+           (desc.gray() && desc.gray() > n);
+}
+
+static constexpr bool GrColorTypeIsAlphaOnly(GrColorType ct) {
+    return GrColorTypeChannelFlags(ct) == kAlpha_SkColorChannelFlag;
+}
+
+static constexpr bool GrColorTypeHasAlpha(GrColorType ct) {
+    return GrColorTypeChannelFlags(ct) & kAlpha_SkColorChannelFlag;
+}
+
+static constexpr size_t GrColorTypeBytesPerPixel(GrColorType ct) {
+    switch (ct) {
+        case GrColorType::kUnknown:          return 0;
+        case GrColorType::kAlpha_8:          return 1;
+        case GrColorType::kBGR_565:          return 2;
+        case GrColorType::kABGR_4444:        return 2;
+        case GrColorType::kRGBA_8888:        return 4;
+        case GrColorType::kRGBA_8888_SRGB:   return 4;
+        case GrColorType::kRGB_888x:         return 4;
+        case GrColorType::kRG_88:            return 2;
+        case GrColorType::kBGRA_8888:        return 4;
+        case GrColorType::kRGBA_1010102:     return 4;
+        case GrColorType::kBGRA_1010102:     return 4;
+        case GrColorType::kGray_8:           return 1;
+        case GrColorType::kGrayAlpha_88:     return 2;
+        case GrColorType::kAlpha_F16:        return 2;
+        case GrColorType::kRGBA_F16:         return 8;
+        case GrColorType::kRGBA_F16_Clamped: return 8;
+        case GrColorType::kRGBA_F32:         return 16;
+        case GrColorType::kAlpha_8xxx:       return 4;
+        case GrColorType::kAlpha_F32xxx:     return 16;
+        case GrColorType::kGray_8xxx:        return 4;
+        case GrColorType::kAlpha_16:         return 2;
+        case GrColorType::kRG_1616:          return 4;
+        case GrColorType::kRGBA_16161616:    return 8;
+        case GrColorType::kRG_F16:           return 4;
+        case GrColorType::kRGB_888:          return 3;
+        case GrColorType::kR_8:              return 1;
+        case GrColorType::kR_16:             return 2;
+        case GrColorType::kR_F16:            return 2;
+        case GrColorType::kGray_F16:         return 2;
+        case GrColorType::kARGB_4444:        return 2;
+        case GrColorType::kBGRA_4444:        return 2;
+    }
+    SkUNREACHABLE;
+}
+
+// In general we try to not mix CompressionType and ColorType, but currently SkImage still requires
+// an SkColorType even for CompressedTypes so we need some conversion.
+static constexpr SkColorType GrCompressionTypeToSkColorType(SkImage::CompressionType compression) {
+    switch (compression) {
+        case SkImage::CompressionType::kNone:            return kUnknown_SkColorType;
+        case SkImage::CompressionType::kETC2_RGB8_UNORM: return kRGB_888x_SkColorType;
+        case SkImage::CompressionType::kBC1_RGB8_UNORM:  return kRGB_888x_SkColorType;
+        case SkImage::CompressionType::kBC1_RGBA8_UNORM: return kRGBA_8888_SkColorType;
+    }
+
+    SkUNREACHABLE;
+}
+
+static constexpr GrColorType GrMaskFormatToColorType(GrMaskFormat format) {
+    switch (format) {
+        case kA8_GrMaskFormat:
+            return GrColorType::kAlpha_8;
+        case kA565_GrMaskFormat:
+            return GrColorType::kBGR_565;
+        case kARGB_GrMaskFormat:
+            return GrColorType::kRGBA_8888;
+    }
+    SkUNREACHABLE;
+}
+
+/**
+ * Ref-counted object that calls a callback from its destructor.
+ */
+class GrRefCntedCallback : public SkNVRefCnt<GrRefCntedCallback> {
+public:
+    using Context = void*;
+    using Callback = void (*)(Context);
+
+    static sk_sp<GrRefCntedCallback> Make(Callback proc, Context ctx) {
+        if (!proc) {
+            return nullptr;
+        }
+        return sk_sp<GrRefCntedCallback>(new GrRefCntedCallback(proc, ctx));
+    }
+
+    ~GrRefCntedCallback() { fReleaseProc(fReleaseCtx); }
+
+    Context context() const { return fReleaseCtx; }
+
+private:
+    GrRefCntedCallback(Callback proc, Context ctx) : fReleaseProc(proc), fReleaseCtx(ctx) {}
+    GrRefCntedCallback(const GrRefCntedCallback&) = delete;
+    GrRefCntedCallback(GrRefCntedCallback&&) = delete;
+    GrRefCntedCallback& operator=(const GrRefCntedCallback&) = delete;
+    GrRefCntedCallback& operator=(GrRefCntedCallback&&) = delete;
+
+    Callback fReleaseProc;
+    Context fReleaseCtx;
+};
+
+enum class GrDstSampleFlags {
+    kNone = 0,
+    kRequiresTextureBarrier =   1 << 0,
+    kAsInputAttachment = 1 << 1,
+};
+GR_MAKE_BITFIELD_CLASS_OPS(GrDstSampleFlags)
+
+using GrVisitProxyFunc = std::function<void(GrSurfaceProxy*, GrMipmapped)>;
+
+#if defined(SK_DEBUG) || GR_TEST_UTILS || defined(SK_ENABLE_DUMP_GPU)
+static constexpr const char* GrBackendApiToStr(GrBackendApi api) {
+    switch (api) {
+        case GrBackendApi::kOpenGL:   return "OpenGL";
+        case GrBackendApi::kVulkan:   return "Vulkan";
+        case GrBackendApi::kMetal:    return "Metal";
+        case GrBackendApi::kDirect3D: return "Direct3D";
+        case GrBackendApi::kDawn:     return "Dawn";
+        case GrBackendApi::kMock:     return "Mock";
+    }
+    SkUNREACHABLE;
+}
+
+static constexpr const char* GrColorTypeToStr(GrColorType ct) {
+    switch (ct) {
+        case GrColorType::kUnknown:          return "kUnknown";
+        case GrColorType::kAlpha_8:          return "kAlpha_8";
+        case GrColorType::kBGR_565:          return "kRGB_565";
+        case GrColorType::kABGR_4444:        return "kABGR_4444";
+        case GrColorType::kRGBA_8888:        return "kRGBA_8888";
+        case GrColorType::kRGBA_8888_SRGB:   return "kRGBA_8888_SRGB";
+        case GrColorType::kRGB_888x:         return "kRGB_888x";
+        case GrColorType::kRG_88:            return "kRG_88";
+        case GrColorType::kBGRA_8888:        return "kBGRA_8888";
+        case GrColorType::kRGBA_1010102:     return "kRGBA_1010102";
+        case GrColorType::kBGRA_1010102:     return "kBGRA_1010102";
+        case GrColorType::kGray_8:           return "kGray_8";
+        case GrColorType::kGrayAlpha_88:     return "kGrayAlpha_88";
+        case GrColorType::kAlpha_F16:        return "kAlpha_F16";
+        case GrColorType::kRGBA_F16:         return "kRGBA_F16";
+        case GrColorType::kRGBA_F16_Clamped: return "kRGBA_F16_Clamped";
+        case GrColorType::kRGBA_F32:         return "kRGBA_F32";
+        case GrColorType::kAlpha_8xxx:       return "kAlpha_8xxx";
+        case GrColorType::kAlpha_F32xxx:     return "kAlpha_F32xxx";
+        case GrColorType::kGray_8xxx:        return "kGray_8xxx";
+        case GrColorType::kAlpha_16:         return "kAlpha_16";
+        case GrColorType::kRG_1616:          return "kRG_1616";
+        case GrColorType::kRGBA_16161616:    return "kRGBA_16161616";
+        case GrColorType::kRG_F16:           return "kRG_F16";
+        case GrColorType::kRGB_888:          return "kRGB_888";
+        case GrColorType::kR_8:              return "kR_8";
+        case GrColorType::kR_16:             return "kR_16";
+        case GrColorType::kR_F16:            return "kR_F16";
+        case GrColorType::kGray_F16:         return "kGray_F16";
+        case GrColorType::kARGB_4444:        return "kARGB_4444";
+        case GrColorType::kBGRA_4444:        return "kBGRA_4444";
+    }
+    SkUNREACHABLE;
+}
+
+static constexpr const char* GrCompressionTypeToStr(SkImage::CompressionType compression) {
+    switch (compression) {
+        case SkImage::CompressionType::kNone:            return "kNone";
+        case SkImage::CompressionType::kETC2_RGB8_UNORM: return "kETC2_RGB8_UNORM";
+        case SkImage::CompressionType::kBC1_RGB8_UNORM:  return "kBC1_RGB8_UNORM";
+        case SkImage::CompressionType::kBC1_RGBA8_UNORM: return "kBC1_RGBA8_UNORM";
+    }
+    SkUNREACHABLE;
+}
+#endif
+
+#endif
diff --git a/src/deps/skia/include/private/GrVkTypesPriv.h b/src/deps/skia/include/private/GrVkTypesPriv.h
new file mode 100644
index 000000000..cec98c404
--- /dev/null
+++ b/src/deps/skia/include/private/GrVkTypesPriv.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrVkTypesPriv_DEFINED
+#define GrVkTypesPriv_DEFINED
+
+#include "include/core/SkRefCnt.h"
+#include "include/gpu/vk/GrVkTypes.h"
+
+class GrBackendSurfaceMutableStateImpl;
+
+// This struct is to used to store the the actual information about the vulkan backend image on the
+// GrBackendTexture and GrBackendRenderTarget. When a client calls getVkImageInfo on a
+// GrBackendTexture/RenderTarget, we use the GrVkBackendSurfaceInfo to create a snapshot
+// GrVkImgeInfo object. Internally, this uses a ref count GrVkImageLayout object to track the
+// current VkImageLayout which can be shared with an internal GrVkImage so that layout updates can
+// be seen by all users of the image.
+struct GrVkBackendSurfaceInfo {
+    GrVkBackendSurfaceInfo(GrVkImageInfo info) : fImageInfo(info) {}
+
+    void cleanup();
+
+    GrVkBackendSurfaceInfo& operator=(const GrVkBackendSurfaceInfo&) = delete;
+
+    // Assigns the passed in GrVkBackendSurfaceInfo to this object. if isValid is true we will also
+    // attempt to unref the old fLayout on this object.
+    void assign(const GrVkBackendSurfaceInfo&, bool isValid);
+
+    GrVkImageInfo snapImageInfo(const GrBackendSurfaceMutableStateImpl*) const;
+
+    bool isProtected() const { return fImageInfo.fProtected == GrProtected::kYes; }
+#if GR_TEST_UTILS
+    bool operator==(const GrVkBackendSurfaceInfo& that) const;
+#endif
+
+private:
+    GrVkImageInfo    fImageInfo;
+};
+
+class GrVkSharedImageInfo {
+public:
+    GrVkSharedImageInfo(VkImageLayout layout, uint32_t queueFamilyIndex)
+            : fLayout(layout)
+            , fQueueFamilyIndex(queueFamilyIndex) {}
+
+    GrVkSharedImageInfo& operator=(const GrVkSharedImageInfo& that) {
+        fLayout = that.getImageLayout();
+        fQueueFamilyIndex = that.getQueueFamilyIndex();
+        return *this;
+    }
+
+     void setImageLayout(VkImageLayout layout) {
+        // Defaulting to use std::memory_order_seq_cst
+        fLayout.store(layout);
+    }
+
+    VkImageLayout getImageLayout() const {
+        // Defaulting to use std::memory_order_seq_cst
+        return fLayout.load();
+    }
+
+    void setQueueFamilyIndex(uint32_t queueFamilyIndex) {
+        // Defaulting to use std::memory_order_seq_cst
+        fQueueFamilyIndex.store(queueFamilyIndex);
+    }
+
+    uint32_t getQueueFamilyIndex() const {
+        // Defaulting to use std::memory_order_seq_cst
+        return fQueueFamilyIndex.load();
+    }
+
+private:
+    std::atomic<VkImageLayout> fLayout;
+    std::atomic<uint32_t> fQueueFamilyIndex;
+};
+
+struct GrVkImageSpec {
+    GrVkImageSpec()
+            : fImageTiling(VK_IMAGE_TILING_OPTIMAL)
+            , fFormat(VK_FORMAT_UNDEFINED)
+            , fImageUsageFlags(0)
+            , fSharingMode(VK_SHARING_MODE_EXCLUSIVE) {}
+
+    GrVkImageSpec(const GrVkSurfaceInfo& info)
+            : fImageTiling(info.fImageTiling)
+            , fFormat(info.fFormat)
+            , fImageUsageFlags(info.fImageUsageFlags)
+            , fYcbcrConversionInfo(info.fYcbcrConversionInfo)
+            , fSharingMode(info.fSharingMode) {}
+
+    VkImageTiling fImageTiling;
+    VkFormat fFormat;
+    VkImageUsageFlags fImageUsageFlags;
+    GrVkYcbcrConversionInfo fYcbcrConversionInfo;
+    VkSharingMode fSharingMode;
+};
+
+GrVkSurfaceInfo GrVkImageSpecToSurfaceInfo(const GrVkImageSpec& vkSpec,
+                                           uint32_t sampleCount,
+                                           uint32_t levelCount,
+                                           GrProtected isProtected);
+
+#endif
diff --git a/src/deps/skia/include/private/OWNERS b/src/deps/skia/include/private/OWNERS
new file mode 100644
index 000000000..7cf12a2a7
--- /dev/null
+++ b/src/deps/skia/include/private/OWNERS
@@ -0,0 +1,4 @@
+# include/ has a restricted set of reviewers (to limit changes to public API)
+# Files in this directory follow the same rules as the rest of Skia, though:
+
+file:../../OWNERS
diff --git a/src/deps/skia/include/private/SkBitmaskEnum.h b/src/deps/skia/include/private/SkBitmaskEnum.h
new file mode 100644
index 000000000..b25045359
--- /dev/null
+++ b/src/deps/skia/include/private/SkBitmaskEnum.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef SkEnumOperators_DEFINED
+#define SkEnumOperators_DEFINED
+
+#include <type_traits>
+
+namespace sknonstd {
+template <typename T> struct is_bitmask_enum : std::false_type {};
+
+template <typename E>
+std::enable_if_t<sknonstd::is_bitmask_enum<E>::value, bool> constexpr Any(E e) {
+    return static_cast<std::underlying_type_t<E>>(e) != 0;
+}
+}  // namespace sknonstd
+
+template <typename E>
+std::enable_if_t<sknonstd::is_bitmask_enum<E>::value, E> constexpr operator|(E l, E r) {
+    using U = std::underlying_type_t<E>;
+    return static_cast<E>(static_cast<U>(l) | static_cast<U>(r));
+}
+
+template <typename E>
+std::enable_if_t<sknonstd::is_bitmask_enum<E>::value, E&> constexpr operator|=(E& l, E r) {
+    return l = l | r;
+}
+
+template <typename E>
+std::enable_if_t<sknonstd::is_bitmask_enum<E>::value, E> constexpr operator&(E l, E r) {
+    using U = std::underlying_type_t<E>;
+    return static_cast<E>(static_cast<U>(l) & static_cast<U>(r));
+}
+
+template <typename E>
+std::enable_if_t<sknonstd::is_bitmask_enum<E>::value, E&> constexpr operator&=(E& l, E r) {
+    return l = l & r;
+}
+
+template <typename E>
+std::enable_if_t<sknonstd::is_bitmask_enum<E>::value, E> constexpr operator^(E l, E r) {
+    using U = std::underlying_type_t<E>;
+    return static_cast<E>(static_cast<U>(l) ^ static_cast<U>(r));
+}
+
+template <typename E>
+std::enable_if_t<sknonstd::is_bitmask_enum<E>::value, E&> constexpr operator^=(E& l, E r) {
+    return l = l ^ r;
+}
+
+template <typename E>
+std::enable_if_t<sknonstd::is_bitmask_enum<E>::value, E> constexpr operator~(E e) {
+    return static_cast<E>(~static_cast<std::underlying_type_t<E>>(e));
+}
+
+#endif  // SkEnumOperators_DEFINED
diff --git a/src/deps/skia/include/private/SkChecksum.h b/src/deps/skia/include/private/SkChecksum.h
new file mode 100644
index 000000000..6339239d6
--- /dev/null
+++ b/src/deps/skia/include/private/SkChecksum.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2012 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkChecksum_DEFINED
+#define SkChecksum_DEFINED
+
+#include "include/core/SkString.h"
+#include "include/core/SkTypes.h"
+#include "include/private/SkNoncopyable.h"
+#include "include/private/SkOpts_spi.h"
+#include "include/private/SkTLogic.h"
+
+class SkChecksum : SkNoncopyable {
+public:
+    /**
+     * uint32_t -> uint32_t hash, useful for when you're about to trucate this hash but you
+     * suspect its low bits aren't well mixed.
+     *
+     * This is the Murmur3 finalizer.
+     */
+    static uint32_t Mix(uint32_t hash) {
+        hash ^= hash >> 16;
+        hash *= 0x85ebca6b;
+        hash ^= hash >> 13;
+        hash *= 0xc2b2ae35;
+        hash ^= hash >> 16;
+        return hash;
+    }
+
+    /**
+     * uint32_t -> uint32_t hash, useful for when you're about to trucate this hash but you
+     * suspect its low bits aren't well mixed.
+     *
+     *  This version is 2-lines cheaper than Mix, but seems to be sufficient for the font cache.
+     */
+    static uint32_t CheapMix(uint32_t hash) {
+        hash ^= hash >> 16;
+        hash *= 0x85ebca6b;
+        hash ^= hash >> 16;
+        return hash;
+    }
+};
+
+// SkGoodHash should usually be your first choice in hashing data.
+// It should be both reasonably fast and high quality.
+struct SkGoodHash {
+    template <typename K>
+    std::enable_if_t<sizeof(K) == 4, uint32_t> operator()(const K& k) const {
+        return SkChecksum::Mix(*(const uint32_t*)&k);
+    }
+
+    template <typename K>
+    std::enable_if_t<sizeof(K) != 4, uint32_t> operator()(const K& k) const {
+        return SkOpts::hash_fn(&k, sizeof(K), 0);
+    }
+
+    uint32_t operator()(const SkString& k) const {
+        return SkOpts::hash_fn(k.c_str(), k.size(), 0);
+    }
+};
+
+#endif
diff --git a/src/deps/skia/include/private/SkColorData.h b/src/deps/skia/include/private/SkColorData.h
new file mode 100644
index 000000000..a59e7b044
--- /dev/null
+++ b/src/deps/skia/include/private/SkColorData.h
@@ -0,0 +1,441 @@
+/*
+ * Copyright 2006 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkColorData_DEFINED
+#define SkColorData_DEFINED
+
+#include "include/core/SkColor.h"
+#include "include/core/SkColorPriv.h"
+#include "include/private/SkNx.h"
+#include "include/private/SkTo.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////
+// Convert a 16bit pixel to a 32bit pixel
+
+#define SK_R16_BITS     5
+#define SK_G16_BITS     6
+#define SK_B16_BITS     5
+
+#define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
+#define SK_G16_SHIFT    (SK_B16_BITS)
+#define SK_B16_SHIFT    0
+
+#define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
+#define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
+#define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
+
+#define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
+#define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
+#define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
+
+static inline unsigned SkR16ToR32(unsigned r) {
+    return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
+}
+
+static inline unsigned SkG16ToG32(unsigned g) {
+    return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
+}
+
+static inline unsigned SkB16ToB32(unsigned b) {
+    return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
+}
+
+#define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
+#define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
+#define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
+
+//////////////////////////////////////////////////////////////////////////////
+
+#define SkASSERT_IS_BYTE(x)     SkASSERT(0 == ((x) & ~0xFFu))
+
+// Reverse the bytes coorsponding to RED and BLUE in a packed pixels. Note the
+// pair of them are in the same 2 slots in both RGBA and BGRA, thus there is
+// no need to pass in the colortype to this function.
+static inline uint32_t SkSwizzle_RB(uint32_t c) {
+    static const uint32_t kRBMask = (0xFF << SK_R32_SHIFT) | (0xFF << SK_B32_SHIFT);
+
+    unsigned c0 = (c >> SK_R32_SHIFT) & 0xFF;
+    unsigned c1 = (c >> SK_B32_SHIFT) & 0xFF;
+    return (c & ~kRBMask) | (c0 << SK_B32_SHIFT) | (c1 << SK_R32_SHIFT);
+}
+
+static inline uint32_t SkPackARGB_as_RGBA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
+    SkASSERT_IS_BYTE(a);
+    SkASSERT_IS_BYTE(r);
+    SkASSERT_IS_BYTE(g);
+    SkASSERT_IS_BYTE(b);
+    return (a << SK_RGBA_A32_SHIFT) | (r << SK_RGBA_R32_SHIFT) |
+           (g << SK_RGBA_G32_SHIFT) | (b << SK_RGBA_B32_SHIFT);
+}
+
+static inline uint32_t SkPackARGB_as_BGRA(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
+    SkASSERT_IS_BYTE(a);
+    SkASSERT_IS_BYTE(r);
+    SkASSERT_IS_BYTE(g);
+    SkASSERT_IS_BYTE(b);
+    return (a << SK_BGRA_A32_SHIFT) | (r << SK_BGRA_R32_SHIFT) |
+           (g << SK_BGRA_G32_SHIFT) | (b << SK_BGRA_B32_SHIFT);
+}
+
+static inline SkPMColor SkSwizzle_RGBA_to_PMColor(uint32_t c) {
+#ifdef SK_PMCOLOR_IS_RGBA
+    return c;
+#else
+    return SkSwizzle_RB(c);
+#endif
+}
+
+static inline SkPMColor SkSwizzle_BGRA_to_PMColor(uint32_t c) {
+#ifdef SK_PMCOLOR_IS_BGRA
+    return c;
+#else
+    return SkSwizzle_RB(c);
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+///@{
+/** See ITU-R Recommendation BT.709 at http://www.itu.int/rec/R-REC-BT.709/ .*/
+#define SK_ITU_BT709_LUM_COEFF_R (0.2126f)
+#define SK_ITU_BT709_LUM_COEFF_G (0.7152f)
+#define SK_ITU_BT709_LUM_COEFF_B (0.0722f)
+///@}
+
+///@{
+/** A float value which specifies this channel's contribution to luminance. */
+#define SK_LUM_COEFF_R SK_ITU_BT709_LUM_COEFF_R
+#define SK_LUM_COEFF_G SK_ITU_BT709_LUM_COEFF_G
+#define SK_LUM_COEFF_B SK_ITU_BT709_LUM_COEFF_B
+///@}
+
+/** Computes the luminance from the given r, g, and b in accordance with
+    SK_LUM_COEFF_X. For correct results, r, g, and b should be in linear space.
+*/
+static inline U8CPU SkComputeLuminance(U8CPU r, U8CPU g, U8CPU b) {
+    //The following is
+    //r * SK_LUM_COEFF_R + g * SK_LUM_COEFF_G + b * SK_LUM_COEFF_B
+    //with SK_LUM_COEFF_X in 1.8 fixed point (rounding adjusted to sum to 256).
+    return (r * 54 + g * 183 + b * 19) >> 8;
+}
+
+/** Calculates 256 - (value * alpha256) / 255 in range [0,256],
+ *  for [0,255] value and [0,256] alpha256.
+ */
+static inline U16CPU SkAlphaMulInv256(U16CPU value, U16CPU alpha256) {
+    unsigned prod = 0xFFFF - value * alpha256;
+    return (prod + (prod >> 8)) >> 8;
+}
+
+//  The caller may want negative values, so keep all params signed (int)
+//  so we don't accidentally slip into unsigned math and lose the sign
+//  extension when we shift (in SkAlphaMul)
+static inline int SkAlphaBlend(int src, int dst, int scale256) {
+    SkASSERT((unsigned)scale256 <= 256);
+    return dst + SkAlphaMul(src - dst, scale256);
+}
+
+static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) {
+    SkASSERT(r <= SK_R16_MASK);
+    SkASSERT(g <= SK_G16_MASK);
+    SkASSERT(b <= SK_B16_MASK);
+
+    return SkToU16((r << SK_R16_SHIFT) | (g << SK_G16_SHIFT) | (b << SK_B16_SHIFT));
+}
+
+#define SK_R16_MASK_IN_PLACE        (SK_R16_MASK << SK_R16_SHIFT)
+#define SK_G16_MASK_IN_PLACE        (SK_G16_MASK << SK_G16_SHIFT)
+#define SK_B16_MASK_IN_PLACE        (SK_B16_MASK << SK_B16_SHIFT)
+
+///////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Abstract 4-byte interpolation, implemented on top of SkPMColor
+ * utility functions. Third parameter controls blending of the first two:
+ *   (src, dst, 0) returns dst
+ *   (src, dst, 0xFF) returns src
+ *   scale is [0..256], unlike SkFourByteInterp which takes [0..255]
+ */
+static inline SkPMColor SkFourByteInterp256(SkPMColor src, SkPMColor dst, int scale) {
+    unsigned a = SkTo<uint8_t>(SkAlphaBlend(SkGetPackedA32(src), SkGetPackedA32(dst), scale));
+    unsigned r = SkTo<uint8_t>(SkAlphaBlend(SkGetPackedR32(src), SkGetPackedR32(dst), scale));
+    unsigned g = SkTo<uint8_t>(SkAlphaBlend(SkGetPackedG32(src), SkGetPackedG32(dst), scale));
+    unsigned b = SkTo<uint8_t>(SkAlphaBlend(SkGetPackedB32(src), SkGetPackedB32(dst), scale));
+
+    return SkPackARGB32(a, r, g, b);
+}
+
+/**
+ * Abstract 4-byte interpolation, implemented on top of SkPMColor
+ * utility functions. Third parameter controls blending of the first two:
+ *   (src, dst, 0) returns dst
+ *   (src, dst, 0xFF) returns src
+ */
+static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst, U8CPU srcWeight) {
+    int scale = (int)SkAlpha255To256(srcWeight);
+    return SkFourByteInterp256(src, dst, scale);
+}
+
+/**
+ * 0xAARRGGBB -> 0x00AA00GG, 0x00RR00BB
+ */
+static inline void SkSplay(uint32_t color, uint32_t* ag, uint32_t* rb) {
+    const uint32_t mask = 0x00FF00FF;
+    *ag = (color >> 8) & mask;
+    *rb = color & mask;
+}
+
+/**
+ * 0xAARRGGBB -> 0x00AA00GG00RR00BB
+ * (note, ARGB -> AGRB)
+ */
+static inline uint64_t SkSplay(uint32_t color) {
+    const uint32_t mask = 0x00FF00FF;
+    uint64_t agrb = (color >> 8) & mask;  // 0x0000000000AA00GG
+    agrb <<= 32;                          // 0x00AA00GG00000000
+    agrb |= color & mask;                 // 0x00AA00GG00RR00BB
+    return agrb;
+}
+
+/**
+ * 0xAAxxGGxx, 0xRRxxBBxx-> 0xAARRGGBB
+ */
+static inline uint32_t SkUnsplay(uint32_t ag, uint32_t rb) {
+    const uint32_t mask = 0xFF00FF00;
+    return (ag & mask) | ((rb & mask) >> 8);
+}
+
+/**
+ * 0xAAxxGGxxRRxxBBxx -> 0xAARRGGBB
+ * (note, AGRB -> ARGB)
+ */
+static inline uint32_t SkUnsplay(uint64_t agrb) {
+    const uint32_t mask = 0xFF00FF00;
+    return SkPMColor(
+        ((agrb & mask) >> 8) |   // 0x00RR00BB
+        ((agrb >> 32) & mask));  // 0xAARRGGBB
+}
+
+static inline SkPMColor SkFastFourByteInterp256_32(SkPMColor src, SkPMColor dst, unsigned scale) {
+    SkASSERT(scale <= 256);
+
+    // Two 8-bit blends per two 32-bit registers, with space to make sure the math doesn't collide.
+    uint32_t src_ag, src_rb, dst_ag, dst_rb;
+    SkSplay(src, &src_ag, &src_rb);
+    SkSplay(dst, &dst_ag, &dst_rb);
+
+    const uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;
+    const uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;
+
+    return SkUnsplay(ret_ag, ret_rb);
+}
+
+static inline SkPMColor SkFastFourByteInterp256_64(SkPMColor src, SkPMColor dst, unsigned scale) {
+    SkASSERT(scale <= 256);
+    // Four 8-bit blends in one 64-bit register, with space to make sure the math doesn't collide.
+    return SkUnsplay(SkSplay(src) * scale + (256-scale) * SkSplay(dst));
+}
+
+// TODO(mtklein): Replace slow versions with fast versions, using scale + (scale>>7) everywhere.
+
+/**
+ * Same as SkFourByteInterp256, but faster.
+ */
+static inline SkPMColor SkFastFourByteInterp256(SkPMColor src, SkPMColor dst, unsigned scale) {
+    // On a 64-bit machine, _64 is about 10% faster than _32, but ~40% slower on a 32-bit machine.
+    if (sizeof(void*) == 4) {
+        return SkFastFourByteInterp256_32(src, dst, scale);
+    } else {
+        return SkFastFourByteInterp256_64(src, dst, scale);
+    }
+}
+
+/**
+ * Nearly the same as SkFourByteInterp, but faster and a touch more accurate, due to better
+ * srcWeight scaling to [0, 256].
+ */
+static inline SkPMColor SkFastFourByteInterp(SkPMColor src, SkPMColor dst, U8CPU srcWeight) {
+    SkASSERT(srcWeight <= 255);
+    // scale = srcWeight + (srcWeight >> 7) is more accurate than
+    // scale = srcWeight + 1, but 7% slower
+    return SkFastFourByteInterp256(src, dst, srcWeight + (srcWeight >> 7));
+}
+
+/**
+ * Interpolates between colors src and dst using [0,256] scale.
+ */
+static inline SkPMColor SkPMLerp(SkPMColor src, SkPMColor dst, unsigned scale) {
+    return SkFastFourByteInterp256(src, dst, scale);
+}
+
+static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
+    SkASSERT((unsigned)aa <= 255);
+
+    unsigned src_scale = SkAlpha255To256(aa);
+    unsigned dst_scale = SkAlphaMulInv256(SkGetPackedA32(src), src_scale);
+
+    const uint32_t mask = 0xFF00FF;
+
+    uint32_t src_rb = (src & mask) * src_scale;
+    uint32_t src_ag = ((src >> 8) & mask) * src_scale;
+
+    uint32_t dst_rb = (dst & mask) * dst_scale;
+    uint32_t dst_ag = ((dst >> 8) & mask) * dst_scale;
+
+    return (((src_rb + dst_rb) >> 8) & mask) | ((src_ag + dst_ag) & ~mask);
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////
+// Convert a 32bit pixel to a 16bit pixel (no dither)
+
+#define SkR32ToR16_MACRO(r)   ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS))
+#define SkG32ToG16_MACRO(g)   ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS))
+#define SkB32ToB16_MACRO(b)   ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
+
+#ifdef SK_DEBUG
+    static inline unsigned SkR32ToR16(unsigned r) {
+        SkR32Assert(r);
+        return SkR32ToR16_MACRO(r);
+    }
+    static inline unsigned SkG32ToG16(unsigned g) {
+        SkG32Assert(g);
+        return SkG32ToG16_MACRO(g);
+    }
+    static inline unsigned SkB32ToB16(unsigned b) {
+        SkB32Assert(b);
+        return SkB32ToB16_MACRO(b);
+    }
+#else
+    #define SkR32ToR16(r)   SkR32ToR16_MACRO(r)
+    #define SkG32ToG16(g)   SkG32ToG16_MACRO(g)
+    #define SkB32ToB16(b)   SkB32ToB16_MACRO(b)
+#endif
+
+static inline U16CPU SkPixel32ToPixel16(SkPMColor c) {
+    unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
+    unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
+    unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
+    return r | g | b;
+}
+
+static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
+    return  (SkR32ToR16(r) << SK_R16_SHIFT) |
+            (SkG32ToG16(g) << SK_G16_SHIFT) |
+            (SkB32ToB16(b) << SK_B16_SHIFT);
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////
+
+/*  SrcOver the 32bit src color with the 16bit dst, returning a 16bit value
+    (with dirt in the high 16bits, so caller beware).
+*/
+static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) {
+    unsigned sr = SkGetPackedR32(src);
+    unsigned sg = SkGetPackedG32(src);
+    unsigned sb = SkGetPackedB32(src);
+
+    unsigned dr = SkGetPackedR16(dst);
+    unsigned dg = SkGetPackedG16(dst);
+    unsigned db = SkGetPackedB16(dst);
+
+    unsigned isa = 255 - SkGetPackedA32(src);
+
+    dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS);
+    dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS);
+    db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS);
+
+    return SkPackRGB16(dr, dg, db);
+}
+
+static inline SkColor SkPixel16ToColor(U16CPU src) {
+    SkASSERT(src == SkToU16(src));
+
+    unsigned    r = SkPacked16ToR32(src);
+    unsigned    g = SkPacked16ToG32(src);
+    unsigned    b = SkPacked16ToB32(src);
+
+    SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
+    SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
+    SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
+
+    return SkColorSetRGB(r, g, b);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+typedef uint16_t SkPMColor16;
+
+// Put in OpenGL order (r g b a)
+#define SK_A4444_SHIFT    0
+#define SK_R4444_SHIFT    12
+#define SK_G4444_SHIFT    8
+#define SK_B4444_SHIFT    4
+
+static inline U8CPU SkReplicateNibble(unsigned nib) {
+    SkASSERT(nib <= 0xF);
+    return (nib << 4) | nib;
+}
+
+#define SkGetPackedA4444(c)     (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF)
+#define SkGetPackedR4444(c)     (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF)
+#define SkGetPackedG4444(c)     (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF)
+#define SkGetPackedB4444(c)     (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF)
+
+#define SkPacked4444ToA32(c)    SkReplicateNibble(SkGetPackedA4444(c))
+
+static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) {
+    uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) |
+                 (SkGetPackedR4444(c) << SK_R32_SHIFT) |
+                 (SkGetPackedG4444(c) << SK_G32_SHIFT) |
+                 (SkGetPackedB4444(c) << SK_B32_SHIFT);
+    return d | (d << 4);
+}
+
+static inline Sk4f swizzle_rb(const Sk4f& x) {
+    return SkNx_shuffle<2, 1, 0, 3>(x);
+}
+
+static inline Sk4f swizzle_rb_if_bgra(const Sk4f& x) {
+#ifdef SK_PMCOLOR_IS_BGRA
+    return swizzle_rb(x);
+#else
+    return x;
+#endif
+}
+
+static inline Sk4f Sk4f_fromL32(uint32_t px) {
+    return SkNx_cast<float>(Sk4b::Load(&px)) * (1 / 255.0f);
+}
+
+static inline uint32_t Sk4f_toL32(const Sk4f& px) {
+    Sk4f v = px;
+
+#if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
+    // SkNx_cast<uint8_t, int32_t>() pins, and we don't anticipate giant floats
+#elif !defined(SKNX_NO_SIMD) && defined(SK_ARM_HAS_NEON)
+    // SkNx_cast<uint8_t, int32_t>() pins, and so does Sk4f_round().
+#else
+    // No guarantee of a pin.
+    v = Sk4f::Max(0, Sk4f::Min(v, 1));
+#endif
+
+    uint32_t l32;
+    SkNx_cast<uint8_t>(Sk4f_round(v * 255.0f)).store(&l32);
+    return l32;
+}
+
+using SkPMColor4f = SkRGBA4f<kPremul_SkAlphaType>;
+
+constexpr SkPMColor4f SK_PMColor4fTRANSPARENT = { 0, 0, 0, 0 };
+constexpr SkPMColor4f SK_PMColor4fBLACK = { 0, 0, 0, 1 };
+constexpr SkPMColor4f SK_PMColor4fWHITE = { 1, 1, 1, 1 };
+constexpr SkPMColor4f SK_PMColor4fILLEGAL = { SK_FloatNegativeInfinity,
+                                              SK_FloatNegativeInfinity,
+                                              SK_FloatNegativeInfinity,
+                                              SK_FloatNegativeInfinity };
+
+#endif
diff --git a/src/deps/skia/include/private/SkDeque.h b/src/deps/skia/include/private/SkDeque.h
new file mode 100644
index 000000000..8adc39c1c
--- /dev/null
+++ b/src/deps/skia/include/private/SkDeque.h
@@ -0,0 +1,141 @@
+
+/*
+ * Copyright 2006 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+
+#ifndef SkDeque_DEFINED
+#define SkDeque_DEFINED
+
+#include "include/core/SkTypes.h"
+
+/*
+ * The deque class works by blindly creating memory space of a specified element
+ * size. It manages the memory as a doubly linked list of blocks each of which
+ * can contain multiple elements. Pushes and pops add/remove blocks from the
+ * beginning/end of the list as necessary while each block tracks the used
+ * portion of its memory.
+ * One behavior to be aware of is that the pops do not immediately remove an
+ * empty block from the beginning/end of the list (Presumably so push/pop pairs
+ * on the block boundaries don't cause thrashing). This can result in the first/
+ * last element not residing in the first/last block.
+ */
+class SK_API SkDeque {
+public:
+    /**
+     * elemSize specifies the size of each individual element in the deque
+     * allocCount specifies how many elements are to be allocated as a block
+     */
+    explicit SkDeque(size_t elemSize, int allocCount = 1);
+    SkDeque(size_t elemSize, void* storage, size_t storageSize, int allocCount = 1);
+    ~SkDeque();
+
+    bool    empty() const { return 0 == fCount; }
+    int     count() const { return fCount; }
+    size_t  elemSize() const { return fElemSize; }
+
+    const void* front() const { return fFront; }
+    const void* back() const  { return fBack; }
+
+    void* front() {
+        return (void*)((const SkDeque*)this)->front();
+    }
+
+    void* back() {
+        return (void*)((const SkDeque*)this)->back();
+    }
+
+    /**
+     * push_front and push_back return a pointer to the memory space
+     * for the new element
+     */
+    void* push_front();
+    void* push_back();
+
+    void pop_front();
+    void pop_back();
+
+private:
+    struct Block;
+
+public:
+    class Iter {
+    public:
+        enum IterStart {
+            kFront_IterStart,
+            kBack_IterStart,
+        };
+
+        /**
+         * Creates an uninitialized iterator. Must be reset()
+         */
+        Iter();
+
+        Iter(const SkDeque& d, IterStart startLoc);
+        void* next();
+        void* prev();
+
+        void reset(const SkDeque& d, IterStart startLoc);
+
+    private:
+        SkDeque::Block* fCurBlock;
+        char*           fPos;
+        size_t          fElemSize;
+    };
+
+    // Inherit privately from Iter to prevent access to reverse iteration
+    class F2BIter : private Iter {
+    public:
+        F2BIter() {}
+
+        /**
+         * Wrap Iter's 2 parameter ctor to force initialization to the
+         * beginning of the deque
+         */
+        F2BIter(const SkDeque& d) : INHERITED(d, kFront_IterStart) {}
+
+        using Iter::next;
+
+        /**
+         * Wrap Iter::reset to force initialization to the beginning of the
+         * deque
+         */
+        void reset(const SkDeque& d) {
+            this->INHERITED::reset(d, kFront_IterStart);
+        }
+
+    private:
+        using INHERITED = Iter;
+    };
+
+private:
+    // allow unit test to call numBlocksAllocated
+    friend class DequeUnitTestHelper;
+
+    void*   fFront;
+    void*   fBack;
+
+    Block*  fFrontBlock;
+    Block*  fBackBlock;
+    size_t  fElemSize;
+    void*   fInitialStorage;
+    int     fCount;             // number of elements in the deque
+    int     fAllocCount;        // number of elements to allocate per block
+
+    Block*  allocateBlock(int allocCount);
+    void    freeBlock(Block* block);
+
+    /**
+     * This returns the number of chunk blocks allocated by the deque. It
+     * can be used to gauge the effectiveness of the selected allocCount.
+     */
+    int  numBlocksAllocated() const;
+
+    SkDeque(const SkDeque&) = delete;
+    SkDeque& operator=(const SkDeque&) = delete;
+};
+
+#endif
diff --git a/src/deps/skia/include/private/SkEncodedInfo.h b/src/deps/skia/include/private/SkEncodedInfo.h
new file mode 100644
index 000000000..92400d956
--- /dev/null
+++ b/src/deps/skia/include/private/SkEncodedInfo.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkEncodedInfo_DEFINED
+#define SkEncodedInfo_DEFINED
+
+#include <memory>
+
+#include "include/core/SkData.h"
+#include "include/core/SkImageInfo.h"
+#include "include/third_party/skcms/skcms.h"
+
+struct SkEncodedInfo {
+public:
+    class ICCProfile {
+    public:
+        static std::unique_ptr<ICCProfile> Make(sk_sp<SkData>);
+        static std::unique_ptr<ICCProfile> Make(const skcms_ICCProfile&);
+
+        const skcms_ICCProfile* profile() const { return &fProfile; }
+    private:
+        ICCProfile(const skcms_ICCProfile&, sk_sp<SkData> = nullptr);
+
+        skcms_ICCProfile fProfile;
+        sk_sp<SkData>    fData;
+    };
+
+    enum Alpha {
+        kOpaque_Alpha,
+        kUnpremul_Alpha,
+
+        // Each pixel is either fully opaque or fully transparent.
+        // There is no difference between requesting kPremul or kUnpremul.
+        kBinary_Alpha,
+    };
+
+    /*
+     * We strive to make the number of components per pixel obvious through
+     * our naming conventions.
+     * Ex: kRGB has 3 components.  kRGBA has 4 components.
+     *
+     * This sometimes results in redundant Alpha and Color information.
+     * Ex: kRGB images must also be kOpaque.
+     */
+    enum Color {
+        // PNG, WBMP
+        kGray_Color,
+
+        // PNG
+        kGrayAlpha_Color,
+
+        // PNG with Skia-specific sBIT
+        // Like kGrayAlpha, except this expects to be treated as
+        // kAlpha_8_SkColorType, which ignores the gray component. If
+        // decoded to full color (e.g. kN32), the gray component is respected
+        // (so it can share code with kGrayAlpha).
+        kXAlpha_Color,
+
+        // PNG
+        // 565 images may be encoded to PNG by specifying the number of
+        // significant bits for each channel.  This is a strange 565
+        // representation because the image is still encoded with 8 bits per
+        // component.
+        k565_Color,
+
+        // PNG, GIF, BMP
+        kPalette_Color,
+
+        // PNG, RAW
+        kRGB_Color,
+        kRGBA_Color,
+
+        // BMP
+        kBGR_Color,
+        kBGRX_Color,
+        kBGRA_Color,
+
+        // JPEG, WEBP
+        kYUV_Color,
+
+        // WEBP
+        kYUVA_Color,
+
+        // JPEG
+        // Photoshop actually writes inverted CMYK data into JPEGs, where zero
+        // represents 100% ink coverage.  For this reason, we treat CMYK JPEGs
+        // as having inverted CMYK.  libjpeg-turbo warns that this may break
+        // other applications, but the CMYK JPEGs we see on the web expect to
+        // be treated as inverted CMYK.
+        kInvertedCMYK_Color,
+        kYCCK_Color,
+    };
+
+    static SkEncodedInfo Make(int width, int height, Color color, Alpha alpha,
+            int bitsPerComponent) {
+        return Make(width, height, color, alpha, bitsPerComponent, nullptr);
+    }
+
+    static SkEncodedInfo Make(int width, int height, Color color, Alpha alpha,
+            int bitsPerComponent, std::unique_ptr<ICCProfile> profile) {
+        SkASSERT(1 == bitsPerComponent ||
+                 2 == bitsPerComponent ||
+                 4 == bitsPerComponent ||
+                 8 == bitsPerComponent ||
+                 16 == bitsPerComponent);
+
+        switch (color) {
+            case kGray_Color:
+                SkASSERT(kOpaque_Alpha == alpha);
+                break;
+            case kGrayAlpha_Color:
+                SkASSERT(kOpaque_Alpha != alpha);
+                break;
+            case kPalette_Color:
+                SkASSERT(16 != bitsPerComponent);
+                break;
+            case kRGB_Color:
+            case kBGR_Color:
+            case kBGRX_Color:
+                SkASSERT(kOpaque_Alpha == alpha);
+                SkASSERT(bitsPerComponent >= 8);
+                break;
+            case kYUV_Color:
+            case kInvertedCMYK_Color:
+            case kYCCK_Color:
+                SkASSERT(kOpaque_Alpha == alpha);
+                SkASSERT(8 == bitsPerComponent);
+                break;
+            case kRGBA_Color:
+                SkASSERT(bitsPerComponent >= 8);
+                break;
+            case kBGRA_Color:
+            case kYUVA_Color:
+                SkASSERT(8 == bitsPerComponent);
+                break;
+            case kXAlpha_Color:
+                SkASSERT(kUnpremul_Alpha == alpha);
+                SkASSERT(8 == bitsPerComponent);
+                break;
+            case k565_Color:
+                SkASSERT(kOpaque_Alpha == alpha);
+                SkASSERT(8 == bitsPerComponent);
+                break;
+            default:
+                SkASSERT(false);
+                break;
+        }
+
+        return SkEncodedInfo(width, height, color, alpha, bitsPerComponent, std::move(profile));
+    }
+
+    /*
+     * Returns a recommended SkImageInfo.
+     *
+     * TODO: Leave this up to the client.
+     */
+    SkImageInfo makeImageInfo() const {
+        auto ct =  kGray_Color == fColor ? kGray_8_SkColorType   :
+                 kXAlpha_Color == fColor ? kAlpha_8_SkColorType  :
+                    k565_Color == fColor ? kRGB_565_SkColorType  :
+                                           kN32_SkColorType      ;
+        auto alpha = kOpaque_Alpha == fAlpha ? kOpaque_SkAlphaType
+                                             : kUnpremul_SkAlphaType;
+        sk_sp<SkColorSpace> cs = fProfile ? SkColorSpace::Make(*fProfile->profile())
+                                          : nullptr;
+        if (!cs) {
+            cs = SkColorSpace::MakeSRGB();
+        }
+        return SkImageInfo::Make(fWidth, fHeight, ct, alpha, std::move(cs));
+    }
+
+    int   width() const { return fWidth;  }
+    int  height() const { return fHeight; }
+    Color color() const { return fColor;  }
+    Alpha alpha() const { return fAlpha;  }
+    bool opaque() const { return fAlpha == kOpaque_Alpha; }
+    const skcms_ICCProfile* profile() const {
+        if (!fProfile) return nullptr;
+        return fProfile->profile();
+    }
+
+    uint8_t bitsPerComponent() const { return fBitsPerComponent; }
+
+    uint8_t bitsPerPixel() const {
+        switch (fColor) {
+            case kGray_Color:
+                return fBitsPerComponent;
+            case kXAlpha_Color:
+            case kGrayAlpha_Color:
+                return 2 * fBitsPerComponent;
+            case kPalette_Color:
+                return fBitsPerComponent;
+            case kRGB_Color:
+            case kBGR_Color:
+            case kYUV_Color:
+            case k565_Color:
+                return 3 * fBitsPerComponent;
+            case kRGBA_Color:
+            case kBGRA_Color:
+            case kBGRX_Color:
+            case kYUVA_Color:
+            case kInvertedCMYK_Color:
+            case kYCCK_Color:
+                return 4 * fBitsPerComponent;
+            default:
+                SkASSERT(false);
+                return 0;
+        }
+    }
+
+    SkEncodedInfo(const SkEncodedInfo& orig) = delete;
+    SkEncodedInfo& operator=(const SkEncodedInfo&) = delete;
+
+    SkEncodedInfo(SkEncodedInfo&& orig) = default;
+    SkEncodedInfo& operator=(SkEncodedInfo&&) = default;
+
+    // Explicit copy method, to avoid accidental copying.
+    SkEncodedInfo copy() const {
+        auto copy = SkEncodedInfo::Make(fWidth, fHeight, fColor, fAlpha, fBitsPerComponent);
+        if (fProfile) {
+            copy.fProfile = std::make_unique<ICCProfile>(*fProfile);
+        }
+        return copy;
+    }
+
+private:
+    SkEncodedInfo(int width, int height, Color color, Alpha alpha,
+            uint8_t bitsPerComponent, std::unique_ptr<ICCProfile> profile)
+        : fWidth(width)
+        , fHeight(height)
+        , fColor(color)
+        , fAlpha(alpha)
+        , fBitsPerComponent(bitsPerComponent)
+        , fProfile(std::move(profile))
+    {}
+
+    int                         fWidth;
+    int                         fHeight;
+    Color                       fColor;
+    Alpha                       fAlpha;
+    uint8_t                     fBitsPerComponent;
+    std::unique_ptr<ICCProfile> fProfile;
+};
+
+#endif
diff --git a/src/deps/skia/include/private/SkFixed.h b/src/deps/skia/include/private/SkFixed.h
new file mode 100644
index 000000000..e34c19f2a
--- /dev/null
+++ b/src/deps/skia/include/private/SkFixed.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2006 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkFixed_DEFINED
+#define SkFixed_DEFINED
+
+#include "include/core/SkScalar.h"
+#include "include/core/SkTypes.h"
+#include "include/private/SkSafe_math.h"
+#include "include/private/SkTPin.h"
+#include "include/private/SkTo.h"
+
+/** \file SkFixed.h
+
+    Types and macros for 16.16 fixed point
+*/
+
+/** 32 bit signed integer used to represent fractions values with 16 bits to the right of the decimal point
+*/
+typedef int32_t             SkFixed;
+#define SK_Fixed1           (1 << 16)
+#define SK_FixedHalf        (1 << 15)
+#define SK_FixedQuarter     (1 << 14)
+#define SK_FixedMax         (0x7FFFFFFF)
+#define SK_FixedMin         (-SK_FixedMax)
+#define SK_FixedPI          (0x3243F)
+#define SK_FixedSqrt2       (92682)
+#define SK_FixedTanPIOver8  (0x6A0A)
+#define SK_FixedRoot2Over2  (0xB505)
+
+// NOTE: SkFixedToFloat is exact. SkFloatToFixed seems to lack a rounding step. For all fixed-point
+// values, this version is as accurate as possible for (fixed -> float -> fixed). Rounding reduces
+// accuracy if the intermediate floats are in the range that only holds integers (adding 0.5f to an
+// odd integer then snaps to nearest even). Using double for the rounding math gives maximum
+// accuracy for (float -> fixed -> float), but that's usually overkill.
+#define SkFixedToFloat(x)   ((x) * 1.52587890625e-5f)
+#define SkFloatToFixed(x)   sk_float_saturate2int((x) * SK_Fixed1)
+
+#ifdef SK_DEBUG
+    static inline SkFixed SkFloatToFixed_Check(float x) {
+        int64_t n64 = (int64_t)(x * SK_Fixed1);
+        SkFixed n32 = (SkFixed)n64;
+        SkASSERT(n64 == n32);
+        return n32;
+    }
+#else
+    #define SkFloatToFixed_Check(x) SkFloatToFixed(x)
+#endif
+
+#define SkFixedToDouble(x)  ((x) * 1.52587890625e-5)
+#define SkDoubleToFixed(x)  ((SkFixed)((x) * SK_Fixed1))
+
+/** Converts an integer to a SkFixed, asserting that the result does not overflow
+    a 32 bit signed integer
+*/
+#ifdef SK_DEBUG
+    inline SkFixed SkIntToFixed(int n)
+    {
+        SkASSERT(n >= -32768 && n <= 32767);
+        // Left shifting a negative value has undefined behavior in C, so we cast to unsigned before
+        // shifting.
+        return (SkFixed)( (unsigned)n << 16 );
+    }
+#else
+    // Left shifting a negative value has undefined behavior in C, so we cast to unsigned before
+    // shifting. Then we force the cast to SkFixed to ensure that the answer is signed (like the
+    // debug version).
+    #define SkIntToFixed(n)     (SkFixed)((unsigned)(n) << 16)
+#endif
+
+#define SkFixedRoundToInt(x)    (((x) + SK_FixedHalf) >> 16)
+#define SkFixedCeilToInt(x)     (((x) + SK_Fixed1 - 1) >> 16)
+#define SkFixedFloorToInt(x)    ((x) >> 16)
+
+static inline SkFixed SkFixedRoundToFixed(SkFixed x) {
+    return (SkFixed)( (uint32_t)(x + SK_FixedHalf) & 0xFFFF0000 );
+}
+static inline SkFixed SkFixedCeilToFixed(SkFixed x) {
+    return (SkFixed)( (uint32_t)(x + SK_Fixed1 - 1) & 0xFFFF0000 );
+}
+static inline SkFixed SkFixedFloorToFixed(SkFixed x) {
+    return (SkFixed)( (uint32_t)x & 0xFFFF0000 );
+}
+
+#define SkFixedAbs(x)       SkAbs32(x)
+#define SkFixedAve(a, b)    (((a) + (b)) >> 1)
+
+// The divide may exceed 32 bits. Clamp to a signed 32 bit result.
+#define SkFixedDiv(numer, denom) \
+    SkToS32(SkTPin<int64_t>((SkLeftShift((int64_t)(numer), 16) / (denom)), SK_MinS32, SK_MaxS32))
+
+static inline SkFixed SkFixedMul(SkFixed a, SkFixed b) {
+    return (SkFixed)((int64_t)a * b >> 16);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Platform-specific alternatives to our portable versions.
+
+// The VCVT float-to-fixed instruction is part of the VFPv3 instruction set.
+#if defined(__ARM_VFPV3__)
+    /* This does not handle NaN or other obscurities, but is faster than
+       than (int)(x*65536).  When built on Android with -Os, needs forcing
+       to inline or we lose the speed benefit.
+    */
+    SK_ALWAYS_INLINE SkFixed SkFloatToFixed_arm(float x)
+    {
+        int32_t y;
+        asm("vcvt.s32.f32 %0, %0, #16": "+w"(x));
+        memcpy(&y, &x, sizeof(y));
+        return y;
+    }
+    #undef SkFloatToFixed
+    #define SkFloatToFixed(x)  SkFloatToFixed_arm(x)
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+
+#define SkFixedToScalar(x)          SkFixedToFloat(x)
+#define SkScalarToFixed(x)          SkFloatToFixed(x)
+
+///////////////////////////////////////////////////////////////////////////////
+
+typedef int64_t SkFixed3232;   // 32.32
+
+#define SkFixed3232Max            SK_MaxS64
+#define SkFixed3232Min            (-SkFixed3232Max)
+
+#define SkIntToFixed3232(x)       (SkLeftShift((SkFixed3232)(x), 32))
+#define SkFixed3232ToInt(x)       ((int)((x) >> 32))
+#define SkFixedToFixed3232(x)     (SkLeftShift((SkFixed3232)(x), 16))
+#define SkFixed3232ToFixed(x)     ((SkFixed)((x) >> 16))
+#define SkFloatToFixed3232(x)     sk_float_saturate2int64((x) * (65536.0f * 65536.0f))
+#define SkFixed3232ToFloat(x)     (x * (1 / (65536.0f * 65536.0f)))
+
+#define SkScalarToFixed3232(x)    SkFloatToFixed3232(x)
+
+#endif
diff --git a/src/deps/skia/include/private/SkFloatBits.h b/src/deps/skia/include/private/SkFloatBits.h
new file mode 100644
index 000000000..89eea4b9e
--- /dev/null
+++ b/src/deps/skia/include/private/SkFloatBits.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2008 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkFloatBits_DEFINED
+#define SkFloatBits_DEFINED
+
+#include "include/core/SkTypes.h"
+#include "include/private/SkSafe_math.h"
+
+#include <float.h>
+
+/** Convert a sign-bit int (i.e. float interpreted as int) into a 2s compliement
+    int. This also converts -0 (0x80000000) to 0. Doing this to a float allows
+    it to be compared using normal C operators (<, <=, etc.)
+*/
+static inline int32_t SkSignBitTo2sCompliment(int32_t x) {
+    if (x < 0) {
+        x &= 0x7FFFFFFF;
+        x = -x;
+    }
+    return x;
+}
+
+/** Convert a 2s compliment int to a sign-bit (i.e. int interpreted as float).
+    This undoes the result of SkSignBitTo2sCompliment().
+ */
+static inline int32_t Sk2sComplimentToSignBit(int32_t x) {
+    int sign = x >> 31;
+    // make x positive
+    x = (x ^ sign) - sign;
+    // set the sign bit as needed
+    x |= SkLeftShift(sign, 31);
+    return x;
+}
+
+union SkFloatIntUnion {
+    float   fFloat;
+    int32_t fSignBitInt;
+};
+
+// Helper to see a float as its bit pattern (w/o aliasing warnings)
+static inline int32_t SkFloat2Bits(float x) {
+    SkFloatIntUnion data;
+    data.fFloat = x;
+    return data.fSignBitInt;
+}
+
+// Helper to see a bit pattern as a float (w/o aliasing warnings)
+static inline float SkBits2Float(int32_t floatAsBits) {
+    SkFloatIntUnion data;
+    data.fSignBitInt = floatAsBits;
+    return data.fFloat;
+}
+
+constexpr int32_t gFloatBits_exponent_mask = 0x7F800000;
+constexpr int32_t gFloatBits_matissa_mask  = 0x007FFFFF;
+
+static inline bool SkFloatBits_IsFinite(int32_t bits) {
+    return (bits & gFloatBits_exponent_mask) != gFloatBits_exponent_mask;
+}
+
+static inline bool SkFloatBits_IsInf(int32_t bits) {
+    return ((bits & gFloatBits_exponent_mask) == gFloatBits_exponent_mask) &&
+            (bits & gFloatBits_matissa_mask) == 0;
+}
+
+/** Return the float as a 2s compliment int. Just to be used to compare floats
+    to each other or against positive float-bit-constants (like 0). This does
+    not return the int equivalent of the float, just something cheaper for
+    compares-only.
+ */
+static inline int32_t SkFloatAs2sCompliment(float x) {
+    return SkSignBitTo2sCompliment(SkFloat2Bits(x));
+}
+
+/** Return the 2s compliment int as a float. This undos the result of
+    SkFloatAs2sCompliment
+ */
+static inline float Sk2sComplimentAsFloat(int32_t x) {
+    return SkBits2Float(Sk2sComplimentToSignBit(x));
+}
+
+//  Scalar wrappers for float-bit routines
+
+#define SkScalarAs2sCompliment(x)    SkFloatAs2sCompliment(x)
+
+#endif
diff --git a/src/deps/skia/include/private/SkFloatingPoint.h b/src/deps/skia/include/private/SkFloatingPoint.h
new file mode 100644
index 000000000..fbabd0ebc
--- /dev/null
+++ b/src/deps/skia/include/private/SkFloatingPoint.h
@@ -0,0 +1,272 @@
+/*
+ * Copyright 2006 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkFloatingPoint_DEFINED
+#define SkFloatingPoint_DEFINED
+
+#include "include/core/SkTypes.h"
+#include "include/private/SkFloatBits.h"
+#include "include/private/SkSafe_math.h"
+#include <float.h>
+#include <math.h>
+#include <cmath>
+#include <cstring>
+#include <limits>
+
+
+#if defined(SK_LEGACY_FLOAT_RSQRT)
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
+    #include <xmmintrin.h>
+#elif defined(SK_ARM_HAS_NEON)
+    #include <arm_neon.h>
+#endif
+#endif
+
+constexpr float SK_FloatSqrt2 = 1.41421356f;
+constexpr float SK_FloatPI    = 3.14159265f;
+constexpr double SK_DoublePI  = 3.14159265358979323846264338327950288;
+
+// C++98 cmath std::pow seems to be the earliest portable way to get float pow.
+// However, on Linux including cmath undefines isfinite.
+// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14608
+static inline float sk_float_pow(float base, float exp) {
+    return powf(base, exp);
+}
+
+#define sk_float_sqrt(x)        sqrtf(x)
+#define sk_float_sin(x)         sinf(x)
+#define sk_float_cos(x)         cosf(x)
+#define sk_float_tan(x)         tanf(x)
+#define sk_float_floor(x)       floorf(x)
+#define sk_float_ceil(x)        ceilf(x)
+#define sk_float_trunc(x)       truncf(x)
+#ifdef SK_BUILD_FOR_MAC
+#    define sk_float_acos(x)    static_cast<float>(acos(x))
+#    define sk_float_asin(x)    static_cast<float>(asin(x))
+#else
+#    define sk_float_acos(x)    acosf(x)
+#    define sk_float_asin(x)    asinf(x)
+#endif
+#define sk_float_atan2(y,x)     atan2f(y,x)
+#define sk_float_abs(x)         fabsf(x)
+#define sk_float_copysign(x, y) copysignf(x, y)
+#define sk_float_mod(x,y)       fmodf(x,y)
+#define sk_float_exp(x)         expf(x)
+#define sk_float_log(x)         logf(x)
+
+constexpr float sk_float_degrees_to_radians(float degrees) {
+    return degrees * (SK_FloatPI / 180);
+}
+
+constexpr float sk_float_radians_to_degrees(float radians) {
+    return radians * (180 / SK_FloatPI);
+}
+
+#define sk_float_round(x) sk_float_floor((x) + 0.5f)
+
+// can't find log2f on android, but maybe that just a tool bug?
+#ifdef SK_BUILD_FOR_ANDROID
+    static inline float sk_float_log2(float x) {
+        const double inv_ln_2 = 1.44269504088896;
+        return (float)(log(x) * inv_ln_2);
+    }
+#else
+    #define sk_float_log2(x)        log2f(x)
+#endif
+
+static inline bool sk_float_isfinite(float x) {
+    return SkFloatBits_IsFinite(SkFloat2Bits(x));
+}
+
+static inline bool sk_floats_are_finite(float a, float b) {
+    return sk_float_isfinite(a) && sk_float_isfinite(b);
+}
+
+static inline bool sk_floats_are_finite(const float array[], int count) {
+    float prod = 0;
+    for (int i = 0; i < count; ++i) {
+        prod *= array[i];
+    }
+    // At this point, prod will either be NaN or 0
+    return prod == 0;   // if prod is NaN, this check will return false
+}
+
+static inline bool sk_float_isinf(float x) {
+    return SkFloatBits_IsInf(SkFloat2Bits(x));
+}
+
+static inline bool sk_float_isnan(float x) {
+    return !(x == x);
+}
+
+#define sk_double_isnan(a)          sk_float_isnan(a)
+
+#define SK_MaxS32FitsInFloat    2147483520
+#define SK_MinS32FitsInFloat    -SK_MaxS32FitsInFloat
+
+#define SK_MaxS64FitsInFloat    (SK_MaxS64 >> (63-24) << (63-24))   // 0x7fffff8000000000
+#define SK_MinS64FitsInFloat    -SK_MaxS64FitsInFloat
+
+/**
+ *  Return the closest int for the given float. Returns SK_MaxS32FitsInFloat for NaN.
+ */
+static inline int sk_float_saturate2int(float x) {
+    x = x < SK_MaxS32FitsInFloat ? x : SK_MaxS32FitsInFloat;
+    x = x > SK_MinS32FitsInFloat ? x : SK_MinS32FitsInFloat;
+    return (int)x;
+}
+
+/**
+ *  Return the closest int for the given double. Returns SK_MaxS32 for NaN.
+ */
+static inline int sk_double_saturate2int(double x) {
+    x = x < SK_MaxS32 ? x : SK_MaxS32;
+    x = x > SK_MinS32 ? x : SK_MinS32;
+    return (int)x;
+}
+
+/**
+ *  Return the closest int64_t for the given float. Returns SK_MaxS64FitsInFloat for NaN.
+ */
+static inline int64_t sk_float_saturate2int64(float x) {
+    x = x < SK_MaxS64FitsInFloat ? x : SK_MaxS64FitsInFloat;
+    x = x > SK_MinS64FitsInFloat ? x : SK_MinS64FitsInFloat;
+    return (int64_t)x;
+}
+
+#define sk_float_floor2int(x)   sk_float_saturate2int(sk_float_floor(x))
+#define sk_float_round2int(x)   sk_float_saturate2int(sk_float_floor((x) + 0.5f))
+#define sk_float_ceil2int(x)    sk_float_saturate2int(sk_float_ceil(x))
+
+#define sk_float_floor2int_no_saturate(x)   (int)sk_float_floor(x)
+#define sk_float_round2int_no_saturate(x)   (int)sk_float_floor((x) + 0.5f)
+#define sk_float_ceil2int_no_saturate(x)    (int)sk_float_ceil(x)
+
+#define sk_double_floor(x)          floor(x)
+#define sk_double_round(x)          floor((x) + 0.5)
+#define sk_double_ceil(x)           ceil(x)
+#define sk_double_floor2int(x)      (int)floor(x)
+#define sk_double_round2int(x)      (int)floor((x) + 0.5)
+#define sk_double_ceil2int(x)       (int)ceil(x)
+
+// Cast double to float, ignoring any warning about too-large finite values being cast to float.
+// Clang thinks this is undefined, but it's actually implementation defined to return either
+// the largest float or infinity (one of the two bracketing representable floats).  Good enough!
+SK_ATTRIBUTE(no_sanitize("float-cast-overflow"))
+static inline float sk_double_to_float(double x) {
+    return static_cast<float>(x);
+}
+
+#define SK_FloatNaN                 std::numeric_limits<float>::quiet_NaN()
+#define SK_FloatInfinity            (+std::numeric_limits<float>::infinity())
+#define SK_FloatNegativeInfinity    (-std::numeric_limits<float>::infinity())
+
+#define SK_DoubleNaN                std::numeric_limits<double>::quiet_NaN()
+
+// Returns false if any of the floats are outside of [0...1]
+// Returns true if count is 0
+bool sk_floats_are_unit(const float array[], size_t count);
+
+#if defined(SK_LEGACY_FLOAT_RSQRT)
+static inline float sk_float_rsqrt_portable(float x) {
+    // Get initial estimate.
+    int i;
+    memcpy(&i, &x, 4);
+    i = 0x5F1FFFF9 - (i>>1);
+    float estimate;
+    memcpy(&estimate, &i, 4);
+
+    // One step of Newton's method to refine.
+    const float estimate_sq = estimate*estimate;
+    estimate *= 0.703952253f*(2.38924456f-x*estimate_sq);
+    return estimate;
+}
+
+// Fast, approximate inverse square root.
+// Compare to name-brand "1.0f / sk_float_sqrt(x)".  Should be around 10x faster on SSE, 2x on NEON.
+static inline float sk_float_rsqrt(float x) {
+// We want all this inlined, so we'll inline SIMD and just take the hit when we don't know we've got
+// it at compile time.  This is going to be too fast to productively hide behind a function pointer.
+//
+// We do one step of Newton's method to refine the estimates in the NEON and portable paths.  No
+// refinement is faster, but very innacurate.  Two steps is more accurate, but slower than 1/sqrt.
+//
+// Optimized constants in the portable path courtesy of http://rrrola.wz.cz/inv_sqrt.html
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
+    return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(x)));
+#elif defined(SK_ARM_HAS_NEON)
+    // Get initial estimate.
+    const float32x2_t xx = vdup_n_f32(x);  // Clever readers will note we're doing everything 2x.
+    float32x2_t estimate = vrsqrte_f32(xx);
+
+    // One step of Newton's method to refine.
+    const float32x2_t estimate_sq = vmul_f32(estimate, estimate);
+    estimate = vmul_f32(estimate, vrsqrts_f32(xx, estimate_sq));
+    return vget_lane_f32(estimate, 0);  // 1 will work fine too; the answer's in both places.
+#else
+    return sk_float_rsqrt_portable(x);
+#endif
+}
+#else
+
+static inline float sk_float_rsqrt_portable(float x) { return 1.0f / sk_float_sqrt(x); }
+static inline float sk_float_rsqrt         (float x) { return 1.0f / sk_float_sqrt(x); }
+
+#endif
+
+// Returns the log2 of the provided value, were that value to be rounded up to the next power of 2.
+// Returns 0 if value <= 0:
+// Never returns a negative number, even if value is NaN.
+//
+//     sk_float_nextlog2((-inf..1]) -> 0
+//     sk_float_nextlog2((1..2]) -> 1
+//     sk_float_nextlog2((2..4]) -> 2
+//     sk_float_nextlog2((4..8]) -> 3
+//     ...
+static inline int sk_float_nextlog2(float x) {
+    uint32_t bits = (uint32_t)SkFloat2Bits(x);
+    bits += (1u << 23) - 1u;  // Increment the exponent for non-powers-of-2.
+    int exp = ((int32_t)bits >> 23) - 127;
+    return exp & ~(exp >> 31);  // Return 0 for negative or denormalized floats, and exponents < 0.
+}
+
+// This is the number of significant digits we can print in a string such that when we read that
+// string back we get the floating point number we expect.  The minimum value C requires is 6, but
+// most compilers support 9
+#ifdef FLT_DECIMAL_DIG
+#define SK_FLT_DECIMAL_DIG FLT_DECIMAL_DIG
+#else
+#define SK_FLT_DECIMAL_DIG 9
+#endif
+
+// IEEE defines how float divide behaves for non-finite values and zero-denoms, but C does not
+// so we have a helper that suppresses the possible undefined-behavior warnings.
+
+SK_ATTRIBUTE(no_sanitize("float-divide-by-zero"))
+static inline float sk_ieee_float_divide(float numer, float denom) {
+    return numer / denom;
+}
+
+SK_ATTRIBUTE(no_sanitize("float-divide-by-zero"))
+static inline double sk_ieee_double_divide(double numer, double denom) {
+    return numer / denom;
+}
+
+// While we clean up divide by zero, we'll replace places that do divide by zero with this TODO.
+static inline float sk_ieee_float_divide_TODO_IS_DIVIDE_BY_ZERO_SAFE_HERE(float n, float d) {
+    return sk_ieee_float_divide(n,d);
+}
+
+static inline float sk_fmaf(float f, float m, float a) {
+#if defined(FP_FAST_FMA)
+    return std::fmaf(f,m,a);
+#else
+    return f*m+a;
+#endif
+}
+
+#endif
diff --git a/src/deps/skia/include/private/SkHalf.h b/src/deps/skia/include/private/SkHalf.h
new file mode 100644
index 000000000..d95189131
--- /dev/null
+++ b/src/deps/skia/include/private/SkHalf.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2014 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkHalf_DEFINED
+#define SkHalf_DEFINED
+
+#include "include/core/SkTypes.h"
+#include "include/private/SkNx.h"
+
+// 16-bit floating point value
+// format is 1 bit sign, 5 bits exponent, 10 bits mantissa
+// only used for storage
+typedef uint16_t SkHalf;
+
+static constexpr uint16_t SK_HalfMin     = 0x0400; // 2^-14  (minimum positive normal value)
+static constexpr uint16_t SK_HalfMax     = 0x7bff; // 65504
+static constexpr uint16_t SK_HalfEpsilon = 0x1400; // 2^-10
+static constexpr uint16_t SK_Half1       = 0x3C00; // 1
+
+// convert between half and single precision floating point
+float SkHalfToFloat(SkHalf h);
+SkHalf SkFloatToHalf(float f);
+
+// Convert between half and single precision floating point,
+// assuming inputs and outputs are both finite, and may
+// flush values which would be denormal half floats to zero.
+static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t);
+static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f&);
+
+// ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ //
+
+// Like the serial versions in SkHalf.cpp, these are based on
+// https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
+
+// GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use inline assembly.
+
+static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t rgba) {
+    Sk4h hs = Sk4h::Load(&rgba);
+#if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)
+    float32x4_t fs;
+    asm ("fcvtl %[fs].4s, %[hs].4h   \n"   // vcvt_f32_f16(...)
+        : [fs] "=w" (fs)                   // =w: write-only NEON register
+        : [hs] "w" (hs.fVec));             //  w: read-only NEON register
+    return fs;
+#else
+    Sk4i bits     = SkNx_cast<int>(hs),  // Expand to 32 bit.
+         sign     = bits & 0x00008000,   // Save the sign bit for later...
+         positive = bits ^ sign,         // ...but strip it off for now.
+         is_norm  = 0x03ff < positive;   // Exponent > 0?
+
+    // For normal half floats, extend the mantissa by 13 zero bits,
+    // then adjust the exponent from 15 bias to 127 bias.
+    Sk4i norm = (positive << 13) + ((127 - 15) << 23);
+
+    Sk4i merged = (sign << 16) | (norm & is_norm);
+    return Sk4f::Load(&merged);
+#endif
+}
+
+static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f& fs) {
+#if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)
+    float32x4_t vec = fs.fVec;
+    asm ("fcvtn %[vec].4h, %[vec].4s  \n"   // vcvt_f16_f32(vec)
+        : [vec] "+w" (vec));                // +w: read-write NEON register
+    return vreinterpret_u16_f32(vget_low_f32(vec));
+#else
+    Sk4i bits         = Sk4i::Load(&fs),
+         sign         = bits & 0x80000000,      // Save the sign bit for later...
+         positive     = bits ^ sign,            // ...but strip it off for now.
+         will_be_norm = 0x387fdfff < positive;  // greater than largest denorm half?
+
+    // For normal half floats, adjust the exponent from 127 bias to 15 bias,
+    // then drop the bottom 13 mantissa bits.
+    Sk4i norm = (positive - ((127 - 15) << 23)) >> 13;
+
+    Sk4i merged = (sign >> 16) | (will_be_norm & norm);
+    return SkNx_cast<uint16_t>(merged);
+#endif
+}
+
+#endif
diff --git a/src/deps/skia/include/private/SkIDChangeListener.h b/src/deps/skia/include/private/SkIDChangeListener.h
new file mode 100644
index 000000000..f7a5900e0
--- /dev/null
+++ b/src/deps/skia/include/private/SkIDChangeListener.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkIDChangeListener_DEFINED
+#define SkIDChangeListener_DEFINED
+
+#include "include/core/SkRefCnt.h"
+#include "include/private/SkMutex.h"
+#include "include/private/SkTDArray.h"
+
+#include <atomic>
+
+/**
+ * Used to be notified when a gen/unique ID is invalidated, typically to preemptively purge
+ * associated items from a cache that are no longer reachable. The listener can
+ * be marked for deregistration if the cached item is remove before the listener is
+ * triggered. This prevents unbounded listener growth when cache items are routinely
+ * removed before the gen ID/unique ID is invalidated.
+ */
+class SkIDChangeListener : public SkRefCnt {
+public:
+    SkIDChangeListener();
+
+    ~SkIDChangeListener() override;
+
+    virtual void changed() = 0;
+
+    /**
+     * Mark the listener is no longer needed. It should be removed and changed() should not be
+     * called.
+     */
+    void markShouldDeregister() { fShouldDeregister.store(true, std::memory_order_relaxed); }
+
+    /** Indicates whether markShouldDeregister was called. */
+    bool shouldDeregister() { return fShouldDeregister.load(std::memory_order_acquire); }
+
+    /** Manages a list of SkIDChangeListeners. */
+    class List {
+    public:
+        List();
+
+        ~List();
+
+        /**
+         * Add a new listener to the list. It must not already be deregistered. Also clears out
+         * previously deregistered listeners.
+         */
+        void add(sk_sp<SkIDChangeListener> listener) SK_EXCLUDES(fMutex);
+
+        /**
+         * The number of registered listeners (including deregisterd listeners that are yet-to-be
+         * removed.
+         */
+        int count() const SK_EXCLUDES(fMutex);
+
+        /** Calls changed() on all listeners that haven't been deregistered and resets the list. */
+        void changed() SK_EXCLUDES(fMutex);
+
+        /** Resets without calling changed() on the listeners. */
+        void reset() SK_EXCLUDES(fMutex);
+
+    private:
+        mutable SkMutex fMutex;
+        SkTDArray<SkIDChangeListener*> fListeners SK_GUARDED_BY(fMutex);  // pointers are reffed
+    };
+
+private:
+    std::atomic<bool> fShouldDeregister;
+};
+
+#endif
diff --git a/src/deps/skia/include/private/SkImageInfoPriv.h b/src/deps/skia/include/private/SkImageInfoPriv.h
new file mode 100644
index 000000000..5e4abb82c
--- /dev/null
+++ b/src/deps/skia/include/private/SkImageInfoPriv.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkImageInfoPriv_DEFINED
+#define SkImageInfoPriv_DEFINED
+
+#include "include/core/SkColor.h"
+#include "include/core/SkImageInfo.h"
+
+static inline uint32_t SkColorTypeChannelFlags(SkColorType ct) {
+    switch (ct) {
+        case kUnknown_SkColorType:            return 0;
+        case kAlpha_8_SkColorType:            return kAlpha_SkColorChannelFlag;
+        case kRGB_565_SkColorType:            return kRGB_SkColorChannelFlags;
+        case kARGB_4444_SkColorType:          return kRGBA_SkColorChannelFlags;
+        case kRGBA_8888_SkColorType:          return kRGBA_SkColorChannelFlags;
+        case kRGB_888x_SkColorType:           return kRGB_SkColorChannelFlags;
+        case kBGRA_8888_SkColorType:          return kRGBA_SkColorChannelFlags;
+        case kRGBA_1010102_SkColorType:       return kRGBA_SkColorChannelFlags;
+        case kRGB_101010x_SkColorType:        return kRGB_SkColorChannelFlags;
+        case kBGRA_1010102_SkColorType:       return kRGBA_SkColorChannelFlags;
+        case kBGR_101010x_SkColorType:        return kRGB_SkColorChannelFlags;
+        case kGray_8_SkColorType:             return kGray_SkColorChannelFlag;
+        case kRGBA_F16Norm_SkColorType:       return kRGBA_SkColorChannelFlags;
+        case kRGBA_F16_SkColorType:           return kRGBA_SkColorChannelFlags;
+        case kRGBA_F32_SkColorType:           return kRGBA_SkColorChannelFlags;
+        case kR8G8_unorm_SkColorType:         return kRG_SkColorChannelFlags;
+        case kA16_unorm_SkColorType:          return kAlpha_SkColorChannelFlag;
+        case kR16G16_unorm_SkColorType:       return kRG_SkColorChannelFlags;
+        case kA16_float_SkColorType:          return kAlpha_SkColorChannelFlag;
+        case kR16G16_float_SkColorType:       return kRG_SkColorChannelFlags;
+        case kR16G16B16A16_unorm_SkColorType: return kRGBA_SkColorChannelFlags;
+        case kSRGBA_8888_SkColorType:         return kRGBA_SkColorChannelFlags;
+    }
+    SkUNREACHABLE;
+}
+
+static inline bool SkColorTypeIsAlphaOnly(SkColorType ct) {
+    return SkColorTypeChannelFlags(ct) == kAlpha_SkColorChannelFlag;
+}
+
+static inline bool SkAlphaTypeIsValid(unsigned value) {
+    return value <= kLastEnum_SkAlphaType;
+}
+
+static int SkColorTypeShiftPerPixel(SkColorType ct) {
+    switch (ct) {
+        case kUnknown_SkColorType:            return 0;
+        case kAlpha_8_SkColorType:            return 0;
+        case kRGB_565_SkColorType:            return 1;
+        case kARGB_4444_SkColorType:          return 1;
+        case kRGBA_8888_SkColorType:          return 2;
+        case kRGB_888x_SkColorType:           return 2;
+        case kBGRA_8888_SkColorType:          return 2;
+        case kRGBA_1010102_SkColorType:       return 2;
+        case kRGB_101010x_SkColorType:        return 2;
+        case kBGRA_1010102_SkColorType:       return 2;
+        case kBGR_101010x_SkColorType:        return 2;
+        case kGray_8_SkColorType:             return 0;
+        case kRGBA_F16Norm_SkColorType:       return 3;
+        case kRGBA_F16_SkColorType:           return 3;
+        case kRGBA_F32_SkColorType:           return 4;
+        case kR8G8_unorm_SkColorType:         return 1;
+        case kA16_unorm_SkColorType:          return 1;
+        case kR16G16_unorm_SkColorType:       return 2;
+        case kA16_float_SkColorType:          return 1;
+        case kR16G16_float_SkColorType:       return 2;
+        case kR16G16B16A16_unorm_SkColorType: return 3;
+        case kSRGBA_8888_SkColorType:         return 2;
+    }
+    SkUNREACHABLE;
+}
+
+static inline size_t SkColorTypeMinRowBytes(SkColorType ct, int width) {
+    return (size_t)(width * SkColorTypeBytesPerPixel(ct));
+}
+
+static inline bool SkColorTypeIsValid(unsigned value) {
+    return value <= kLastEnum_SkColorType;
+}
+
+static inline size_t SkColorTypeComputeOffset(SkColorType ct, int x, int y, size_t rowBytes) {
+    if (kUnknown_SkColorType == ct) {
+        return 0;
+    }
+    return (size_t)y * rowBytes + ((size_t)x << SkColorTypeShiftPerPixel(ct));
+}
+
+static inline bool SkColorTypeIsNormalized(SkColorType ct) {
+    switch (ct) {
+        case kUnknown_SkColorType:
+        case kAlpha_8_SkColorType:
+        case kRGB_565_SkColorType:
+        case kARGB_4444_SkColorType:
+        case kRGBA_8888_SkColorType:
+        case kRGB_888x_SkColorType:
+        case kBGRA_8888_SkColorType:
+        case kRGBA_1010102_SkColorType:
+        case kRGB_101010x_SkColorType:
+        case kBGRA_1010102_SkColorType:
+        case kBGR_101010x_SkColorType:
+        case kGray_8_SkColorType:
+        case kRGBA_F16Norm_SkColorType:
+        case kR8G8_unorm_SkColorType:
+        case kA16_unorm_SkColorType:
+        case kA16_float_SkColorType:          /*subtle... alpha is always [0,1]*/
+        case kR16G16_unorm_SkColorType:
+        case kR16G16B16A16_unorm_SkColorType:
+        case kSRGBA_8888_SkColorType: return true;
+
+        case kRGBA_F16_SkColorType:
+        case kRGBA_F32_SkColorType:
+        case kR16G16_float_SkColorType:       return false;
+    }
+    SkUNREACHABLE;
+}
+
+static inline int SkColorTypeMaxBitsPerChannel(SkColorType ct) {
+    switch (ct) {
+        case kUnknown_SkColorType:
+            return 0;
+
+        case kARGB_4444_SkColorType:
+            return 4;
+
+        case kRGB_565_SkColorType:
+            return 6;
+
+        case kAlpha_8_SkColorType:
+        case kRGBA_8888_SkColorType:
+        case kRGB_888x_SkColorType:
+        case kBGRA_8888_SkColorType:
+        case kGray_8_SkColorType:
+        case kR8G8_unorm_SkColorType:
+        case kSRGBA_8888_SkColorType:
+            return 8;
+
+        case kRGBA_1010102_SkColorType:
+        case kRGB_101010x_SkColorType:
+        case kBGRA_1010102_SkColorType:
+        case kBGR_101010x_SkColorType:
+            return 10;
+
+        case kRGBA_F16Norm_SkColorType:
+        case kA16_unorm_SkColorType:
+        case kA16_float_SkColorType:
+        case kR16G16_unorm_SkColorType:
+        case kR16G16B16A16_unorm_SkColorType:
+        case kRGBA_F16_SkColorType:
+        case kR16G16_float_SkColorType:
+            return 16;
+
+        case kRGBA_F32_SkColorType:
+            return 32;
+    }
+    SkUNREACHABLE;
+}
+
+/**
+ *  Returns true if |info| contains a valid colorType and alphaType.
+ */
+static inline bool SkColorInfoIsValid(const SkColorInfo& info) {
+    return info.colorType() != kUnknown_SkColorType && info.alphaType() != kUnknown_SkAlphaType;
+}
+
+/**
+ *  Returns true if |info| contains a valid combination of width, height and colorInfo.
+ */
+static inline bool SkImageInfoIsValid(const SkImageInfo& info) {
+    if (info.width() <= 0 || info.height() <= 0) {
+        return false;
+    }
+
+    const int kMaxDimension = SK_MaxS32 >> 2;
+    if (info.width() > kMaxDimension || info.height() > kMaxDimension) {
+        return false;
+    }
+
+    return SkColorInfoIsValid(info.colorInfo());
+}
+
+/**
+ *  Returns true if Skia has defined a pixel conversion from the |src| to the |dst|.
+ *  Returns false otherwise.
+ */
+static inline bool SkImageInfoValidConversion(const SkImageInfo& dst, const SkImageInfo& src) {
+    return SkImageInfoIsValid(dst) && SkImageInfoIsValid(src);
+}
+#endif  // SkImageInfoPriv_DEFINED
diff --git a/src/deps/skia/include/private/SkMacros.h b/src/deps/skia/include/private/SkMacros.h
new file mode 100644
index 000000000..7732d44d7
--- /dev/null
+++ b/src/deps/skia/include/private/SkMacros.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef SkMacros_DEFINED
+#define SkMacros_DEFINED
+
+/*
+ *  Usage:  SK_MACRO_CONCAT(a, b)   to construct the symbol ab
+ *
+ *  SK_MACRO_CONCAT_IMPL_PRIV just exists to make this work. Do not use directly
+ *
+ */
+#define SK_MACRO_CONCAT(X, Y)           SK_MACRO_CONCAT_IMPL_PRIV(X, Y)
+#define SK_MACRO_CONCAT_IMPL_PRIV(X, Y)  X ## Y
+
+/*
+ *  Usage: SK_MACRO_APPEND_LINE(foo)    to make foo123, where 123 is the current
+ *                                      line number. Easy way to construct
+ *                                      unique names for local functions or
+ *                                      variables.
+ */
+#define SK_MACRO_APPEND_LINE(name)  SK_MACRO_CONCAT(name, __LINE__)
+
+#define SK_MACRO_APPEND_COUNTER(name) SK_MACRO_CONCAT(name, __COUNTER__)
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Can be used to bracket data types that must be dense, e.g. hash keys.
+#if defined(__clang__)  // This should work on GCC too, but GCC diagnostic pop didn't seem to work!
+    #define SK_BEGIN_REQUIRE_DENSE _Pragma("GCC diagnostic push") \
+                                   _Pragma("GCC diagnostic error \"-Wpadded\"")
+    #define SK_END_REQUIRE_DENSE   _Pragma("GCC diagnostic pop")
+#else
+    #define SK_BEGIN_REQUIRE_DENSE
+    #define SK_END_REQUIRE_DENSE
+#endif
+
+#define SK_INIT_TO_AVOID_WARNING    = 0
+
+////////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Defines overloaded bitwise operators to make it easier to use an enum as a
+ * bitfield.
+ */
+#define SK_MAKE_BITFIELD_OPS(X) \
+    inline X operator |(X a, X b) { \
+        return (X) (+a | +b); \
+    } \
+    inline X& operator |=(X& a, X b) { \
+        return (a = a | b); \
+    } \
+    inline X operator &(X a, X b) { \
+        return (X) (+a & +b); \
+    } \
+    inline X& operator &=(X& a, X b) { \
+        return (a = a & b); \
+    } \
+    template <typename T> \
+    inline X operator &(T a, X b) { \
+        return (X) (+a & +b); \
+    } \
+    template <typename T> \
+    inline X operator &(X a, T b) { \
+        return (X) (+a & +b); \
+    } \
+
+#define SK_DECL_BITFIELD_OPS_FRIENDS(X) \
+    friend X operator |(X a, X b); \
+    friend X& operator |=(X& a, X b); \
+    \
+    friend X operator &(X a, X b); \
+    friend X& operator &=(X& a, X b); \
+    \
+    template <typename T> \
+    friend X operator &(T a, X b); \
+    \
+    template <typename T> \
+    friend X operator &(X a, T b); \
+
+#endif  // SkMacros_DEFINED
diff --git a/src/deps/skia/include/private/SkMalloc.h b/src/deps/skia/include/private/SkMalloc.h
new file mode 100644
index 000000000..033294cf8
--- /dev/null
+++ b/src/deps/skia/include/private/SkMalloc.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkMalloc_DEFINED
+#define SkMalloc_DEFINED
+
+#include <cstddef>
+#include <cstring>
+
+#include "include/core/SkTypes.h"
+
+/*
+    memory wrappers to be implemented by the porting layer (platform)
+*/
+
+
+/** Free memory returned by sk_malloc(). It is safe to pass null. */
+SK_API extern void sk_free(void*);
+
+/**
+ *  Called internally if we run out of memory. The platform implementation must
+ *  not return, but should either throw an exception or otherwise exit.
+ */
+SK_API extern void sk_out_of_memory(void);
+
+enum {
+    /**
+     *  If this bit is set, the returned buffer must be zero-initialized. If this bit is not set
+     *  the buffer can be uninitialized.
+     */
+    SK_MALLOC_ZERO_INITIALIZE   = 1 << 0,
+
+    /**
+     *  If this bit is set, the implementation must throw/crash/quit if the request cannot
+     *  be fulfilled. If this bit is not set, then it should return nullptr on failure.
+     */
+    SK_MALLOC_THROW             = 1 << 1,
+};
+/**
+ *  Return a block of memory (at least 4-byte aligned) of at least the specified size.
+ *  If the requested memory cannot be returned, either return nullptr or throw/exit, depending
+ *  on the SK_MALLOC_THROW bit. If the allocation succeeds, the memory will be zero-initialized
+ *  if the SK_MALLOC_ZERO_INITIALIZE bit was set.
+ *
+ *  To free the memory, call sk_free()
+ */
+SK_API extern void* sk_malloc_flags(size_t size, unsigned flags);
+
+/** Same as standard realloc(), but this one never returns null on failure. It will throw
+ *  an exception if it fails.
+ */
+SK_API extern void* sk_realloc_throw(void* buffer, size_t size);
+
+static inline void* sk_malloc_throw(size_t size) {
+    return sk_malloc_flags(size, SK_MALLOC_THROW);
+}
+
+static inline void* sk_calloc_throw(size_t size) {
+    return sk_malloc_flags(size, SK_MALLOC_THROW | SK_MALLOC_ZERO_INITIALIZE);
+}
+
+static inline void* sk_calloc_canfail(size_t size) {
+#if defined(SK_BUILD_FOR_FUZZER)
+    // To reduce the chance of OOM, pretend we can't allocate more than 200kb.
+    if (size > 200000) {
+        return nullptr;
+    }
+#endif
+    return sk_malloc_flags(size, SK_MALLOC_ZERO_INITIALIZE);
+}
+
+// Performs a safe multiply count * elemSize, checking for overflow
+SK_API extern void* sk_calloc_throw(size_t count, size_t elemSize);
+SK_API extern void* sk_malloc_throw(size_t count, size_t elemSize);
+SK_API extern void* sk_realloc_throw(void* buffer, size_t count, size_t elemSize);
+
+/**
+ *  These variants return nullptr on failure
+ */
+static inline void* sk_malloc_canfail(size_t size) {
+#if defined(SK_BUILD_FOR_FUZZER)
+    // To reduce the chance of OOM, pretend we can't allocate more than 200kb.
+    if (size > 200000) {
+        return nullptr;
+    }
+#endif
+    return sk_malloc_flags(size, 0);
+}
+SK_API extern void* sk_malloc_canfail(size_t count, size_t elemSize);
+
+// bzero is safer than memset, but we can't rely on it, so... sk_bzero()
+static inline void sk_bzero(void* buffer, size_t size) {
+    // Please c.f. sk_careful_memcpy.  It's undefined behavior to call memset(null, 0, 0).
+    if (size) {
+        memset(buffer, 0, size);
+    }
+}
+
+/**
+ *  sk_careful_memcpy() is just like memcpy(), but guards against undefined behavior.
+ *
+ * It is undefined behavior to call memcpy() with null dst or src, even if len is 0.
+ * If an optimizer is "smart" enough, it can exploit this to do unexpected things.
+ *     memcpy(dst, src, 0);
+ *     if (src) {
+ *         printf("%x\n", *src);
+ *     }
+ * In this code the compiler can assume src is not null and omit the if (src) {...} check,
+ * unconditionally running the printf, crashing the program if src really is null.
+ * Of the compilers we pay attention to only GCC performs this optimization in practice.
+ */
+static inline void* sk_careful_memcpy(void* dst, const void* src, size_t len) {
+    // When we pass >0 len we had better already be passing valid pointers.
+    // So we just need to skip calling memcpy when len == 0.
+    if (len) {
+        memcpy(dst,src,len);
+    }
+    return dst;
+}
+
+static inline void* sk_careful_memmove(void* dst, const void* src, size_t len) {
+    // When we pass >0 len we had better already be passing valid pointers.
+    // So we just need to skip calling memcpy when len == 0.
+    if (len) {
+        memmove(dst,src,len);
+    }
+    return dst;
+}
+
+static inline int sk_careful_memcmp(const void* a, const void* b, size_t len) {
+    // When we pass >0 len we had better already be passing valid pointers.
+    // So we just need to skip calling memcmp when len == 0.
+    if (len == 0) {
+        return 0;   // we treat zero-length buffers as "equal"
+    }
+    return memcmp(a, b, len);
+}
+
+#endif  // SkMalloc_DEFINED
diff --git a/src/deps/skia/include/private/SkMutex.h b/src/deps/skia/include/private/SkMutex.h
new file mode 100644
index 000000000..096f3ebc9
--- /dev/null
+++ b/src/deps/skia/include/private/SkMutex.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkMutex_DEFINED
+#define SkMutex_DEFINED
+
+#include "include/core/SkTypes.h"
+#include "include/private/SkMacros.h"
+#include "include/private/SkSemaphore.h"
+#include "include/private/SkThreadAnnotations.h"
+#include "include/private/SkThreadID.h"
+
+class SK_CAPABILITY("mutex") SkMutex {
+public:
+    constexpr SkMutex() = default;
+
+    void acquire() SK_ACQUIRE() {
+        fSemaphore.wait();
+        SkDEBUGCODE(fOwner = SkGetThreadID();)
+    }
+
+    void release() SK_RELEASE_CAPABILITY() {
+        this->assertHeld();
+        SkDEBUGCODE(fOwner = kIllegalThreadID;)
+        fSemaphore.signal();
+    }
+
+    void assertHeld() SK_ASSERT_CAPABILITY(this) {
+        SkASSERT(fOwner == SkGetThreadID());
+    }
+
+private:
+    SkSemaphore fSemaphore{1};
+    SkDEBUGCODE(SkThreadID fOwner{kIllegalThreadID};)
+};
+
+class SK_SCOPED_CAPABILITY SkAutoMutexExclusive {
+public:
+    SkAutoMutexExclusive(SkMutex& mutex) SK_ACQUIRE(mutex) : fMutex(mutex) { fMutex.acquire(); }
+    ~SkAutoMutexExclusive() SK_RELEASE_CAPABILITY() { fMutex.release(); }
+
+    SkAutoMutexExclusive(const SkAutoMutexExclusive&) = delete;
+    SkAutoMutexExclusive(SkAutoMutexExclusive&&) = delete;
+
+    SkAutoMutexExclusive& operator=(const SkAutoMutexExclusive&) = delete;
+    SkAutoMutexExclusive& operator=(SkAutoMutexExclusive&&) = delete;
+
+private:
+    SkMutex& fMutex;
+};
+
+#endif  // SkMutex_DEFINED
diff --git a/src/deps/skia/include/private/SkNoncopyable.h b/src/deps/skia/include/private/SkNoncopyable.h
new file mode 100644
index 000000000..bda5d50bb
--- /dev/null
+++ b/src/deps/skia/include/private/SkNoncopyable.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2006 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkNoncopyable_DEFINED
+#define SkNoncopyable_DEFINED
+
+#include "include/core/SkTypes.h"
+
+/** \class SkNoncopyable
+
+    SkNoncopyable is the base class for objects that do not want to
+    be copied. It hides its copy-constructor and its assignment-operator.
+*/
+class SK_API SkNoncopyable {
+public:
+    SkNoncopyable() = default;
+
+    SkNoncopyable(SkNoncopyable&&) = default;
+    SkNoncopyable& operator =(SkNoncopyable&&) = default;
+
+private:
+    SkNoncopyable(const SkNoncopyable&) = delete;
+    SkNoncopyable& operator=(const SkNoncopyable&) = delete;
+};
+
+#endif
diff --git a/src/deps/skia/include/private/SkNx.h b/src/deps/skia/include/private/SkNx.h
new file mode 100644
index 000000000..cf41bb0c9
--- /dev/null
+++ b/src/deps/skia/include/private/SkNx.h
@@ -0,0 +1,430 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkNx_DEFINED
+#define SkNx_DEFINED
+
+#include "include/core/SkScalar.h"
+#include "include/core/SkTypes.h"
+#include "include/private/SkSafe_math.h"
+
+#include <algorithm>
+#include <limits>
+#include <type_traits>
+
+// Every single SkNx method wants to be fully inlined.  (We know better than MSVC).
+#define AI SK_ALWAYS_INLINE
+
+namespace {  // NOLINT(google-build-namespaces)
+
+// The default SkNx<N,T> just proxies down to a pair of SkNx<N/2, T>.
+template <int N, typename T>
+struct SkNx {
+    typedef SkNx<N/2, T> Half;
+
+    Half fLo, fHi;
+
+    AI SkNx() = default;
+    AI SkNx(const Half& lo, const Half& hi) : fLo(lo), fHi(hi) {}
+
+    AI SkNx(T v) : fLo(v), fHi(v) {}
+
+    AI SkNx(T a, T b)           : fLo(a)  , fHi(b)   { static_assert(N==2, ""); }
+    AI SkNx(T a, T b, T c, T d) : fLo(a,b), fHi(c,d) { static_assert(N==4, ""); }
+    AI SkNx(T a, T b, T c, T d,  T e, T f, T g, T h) : fLo(a,b,c,d), fHi(e,f,g,h) {
+        static_assert(N==8, "");
+    }
+    AI SkNx(T a, T b, T c, T d,  T e, T f, T g, T h,
+            T i, T j, T k, T l,  T m, T n, T o, T p)
+        : fLo(a,b,c,d, e,f,g,h), fHi(i,j,k,l, m,n,o,p) {
+        static_assert(N==16, "");
+    }
+
+    AI T operator[](int k) const {
+        SkASSERT(0 <= k && k < N);
+        return k < N/2 ? fLo[k] : fHi[k-N/2];
+    }
+
+    AI static SkNx Load(const void* vptr) {
+        auto ptr = (const char*)vptr;
+        return { Half::Load(ptr), Half::Load(ptr + N/2*sizeof(T)) };
+    }
+    AI void store(void* vptr) const {
+        auto ptr = (char*)vptr;
+        fLo.store(ptr);
+        fHi.store(ptr + N/2*sizeof(T));
+    }
+
+    AI static void Load4(const void* vptr, SkNx* a, SkNx* b, SkNx* c, SkNx* d) {
+        auto ptr = (const char*)vptr;
+        Half al, bl, cl, dl,
+             ah, bh, ch, dh;
+        Half::Load4(ptr                  , &al, &bl, &cl, &dl);
+        Half::Load4(ptr + 4*N/2*sizeof(T), &ah, &bh, &ch, &dh);
+        *a = SkNx{al, ah};
+        *b = SkNx{bl, bh};
+        *c = SkNx{cl, ch};
+        *d = SkNx{dl, dh};
+    }
+    AI static void Load3(const void* vptr, SkNx* a, SkNx* b, SkNx* c) {
+        auto ptr = (const char*)vptr;
+        Half al, bl, cl,
+             ah, bh, ch;
+        Half::Load3(ptr                  , &al, &bl, &cl);
+        Half::Load3(ptr + 3*N/2*sizeof(T), &ah, &bh, &ch);
+        *a = SkNx{al, ah};
+        *b = SkNx{bl, bh};
+        *c = SkNx{cl, ch};
+    }
+    AI static void Load2(const void* vptr, SkNx* a, SkNx* b) {
+        auto ptr = (const char*)vptr;
+        Half al, bl,
+             ah, bh;
+        Half::Load2(ptr                  , &al, &bl);
+        Half::Load2(ptr + 2*N/2*sizeof(T), &ah, &bh);
+        *a = SkNx{al, ah};
+        *b = SkNx{bl, bh};
+    }
+    AI static void Store4(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
+        auto ptr = (char*)vptr;
+        Half::Store4(ptr,                   a.fLo, b.fLo, c.fLo, d.fLo);
+        Half::Store4(ptr + 4*N/2*sizeof(T), a.fHi, b.fHi, c.fHi, d.fHi);
+    }
+    AI static void Store3(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c) {
+        auto ptr = (char*)vptr;
+        Half::Store3(ptr,                   a.fLo, b.fLo, c.fLo);
+        Half::Store3(ptr + 3*N/2*sizeof(T), a.fHi, b.fHi, c.fHi);
+    }
+    AI static void Store2(void* vptr, const SkNx& a, const SkNx& b) {
+        auto ptr = (char*)vptr;
+        Half::Store2(ptr,                   a.fLo, b.fLo);
+        Half::Store2(ptr + 2*N/2*sizeof(T), a.fHi, b.fHi);
+    }
+
+    AI T min() const { return std::min(fLo.min(), fHi.min()); }
+    AI T max() const { return std::max(fLo.max(), fHi.max()); }
+    AI bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); }
+    AI bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); }
+
+    AI SkNx    abs() const { return { fLo.   abs(), fHi.   abs() }; }
+    AI SkNx   sqrt() const { return { fLo.  sqrt(), fHi.  sqrt() }; }
+    AI SkNx  floor() const { return { fLo. floor(), fHi. floor() }; }
+
+    AI SkNx operator!() const { return { !fLo, !fHi }; }
+    AI SkNx operator-() const { return { -fLo, -fHi }; }
+    AI SkNx operator~() const { return { ~fLo, ~fHi }; }
+
+    AI SkNx operator<<(int bits) const { return { fLo << bits, fHi << bits }; }
+    AI SkNx operator>>(int bits) const { return { fLo >> bits, fHi >> bits }; }
+
+    AI SkNx operator+(const SkNx& y) const { return { fLo + y.fLo, fHi + y.fHi }; }
+    AI SkNx operator-(const SkNx& y) const { return { fLo - y.fLo, fHi - y.fHi }; }
+    AI SkNx operator*(const SkNx& y) const { return { fLo * y.fLo, fHi * y.fHi }; }
+    AI SkNx operator/(const SkNx& y) const { return { fLo / y.fLo, fHi / y.fHi }; }
+
+    AI SkNx operator&(const SkNx& y) const { return { fLo & y.fLo, fHi & y.fHi }; }
+    AI SkNx operator|(const SkNx& y) const { return { fLo | y.fLo, fHi | y.fHi }; }
+    AI SkNx operator^(const SkNx& y) const { return { fLo ^ y.fLo, fHi ^ y.fHi }; }
+
+    AI SkNx operator==(const SkNx& y) const { return { fLo == y.fLo, fHi == y.fHi }; }
+    AI SkNx operator!=(const SkNx& y) const { return { fLo != y.fLo, fHi != y.fHi }; }
+    AI SkNx operator<=(const SkNx& y) const { return { fLo <= y.fLo, fHi <= y.fHi }; }
+    AI SkNx operator>=(const SkNx& y) const { return { fLo >= y.fLo, fHi >= y.fHi }; }
+    AI SkNx operator< (const SkNx& y) const { return { fLo <  y.fLo, fHi <  y.fHi }; }
+    AI SkNx operator> (const SkNx& y) const { return { fLo >  y.fLo, fHi >  y.fHi }; }
+
+    AI SkNx saturatedAdd(const SkNx& y) const {
+        return { fLo.saturatedAdd(y.fLo), fHi.saturatedAdd(y.fHi) };
+    }
+
+    AI SkNx mulHi(const SkNx& m) const {
+        return { fLo.mulHi(m.fLo), fHi.mulHi(m.fHi) };
+    }
+    AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+        return { fLo.thenElse(t.fLo, e.fLo), fHi.thenElse(t.fHi, e.fHi) };
+    }
+    AI static SkNx Min(const SkNx& x, const SkNx& y) {
+        return { Half::Min(x.fLo, y.fLo), Half::Min(x.fHi, y.fHi) };
+    }
+    AI static SkNx Max(const SkNx& x, const SkNx& y) {
+        return { Half::Max(x.fLo, y.fLo), Half::Max(x.fHi, y.fHi) };
+    }
+};
+
+// The N -> N/2 recursion bottoms out at N == 1, a scalar value.
+template <typename T>
+struct SkNx<1,T> {
+    T fVal;
+
+    AI SkNx() = default;
+    AI SkNx(T v) : fVal(v) {}
+
+    // Android complains against unused parameters, so we guard it
+    AI T operator[](int SkDEBUGCODE(k)) const {
+        SkASSERT(k == 0);
+        return fVal;
+    }
+
+    AI static SkNx Load(const void* ptr) {
+        SkNx v;
+        memcpy(&v, ptr, sizeof(T));
+        return v;
+    }
+    AI void store(void* ptr) const { memcpy(ptr, &fVal, sizeof(T)); }
+
+    AI static void Load4(const void* vptr, SkNx* a, SkNx* b, SkNx* c, SkNx* d) {
+        auto ptr = (const char*)vptr;
+        *a = Load(ptr + 0*sizeof(T));
+        *b = Load(ptr + 1*sizeof(T));
+        *c = Load(ptr + 2*sizeof(T));
+        *d = Load(ptr + 3*sizeof(T));
+    }
+    AI static void Load3(const void* vptr, SkNx* a, SkNx* b, SkNx* c) {
+        auto ptr = (const char*)vptr;
+        *a = Load(ptr + 0*sizeof(T));
+        *b = Load(ptr + 1*sizeof(T));
+        *c = Load(ptr + 2*sizeof(T));
+    }
+    AI static void Load2(const void* vptr, SkNx* a, SkNx* b) {
+        auto ptr = (const char*)vptr;
+        *a = Load(ptr + 0*sizeof(T));
+        *b = Load(ptr + 1*sizeof(T));
+    }
+    AI static void Store4(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
+        auto ptr = (char*)vptr;
+        a.store(ptr + 0*sizeof(T));
+        b.store(ptr + 1*sizeof(T));
+        c.store(ptr + 2*sizeof(T));
+        d.store(ptr + 3*sizeof(T));
+    }
+    AI static void Store3(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c) {
+        auto ptr = (char*)vptr;
+        a.store(ptr + 0*sizeof(T));
+        b.store(ptr + 1*sizeof(T));
+        c.store(ptr + 2*sizeof(T));
+    }
+    AI static void Store2(void* vptr, const SkNx& a, const SkNx& b) {
+        auto ptr = (char*)vptr;
+        a.store(ptr + 0*sizeof(T));
+        b.store(ptr + 1*sizeof(T));
+    }
+
+    AI T min() const { return fVal; }
+    AI T max() const { return fVal; }
+    AI bool anyTrue() const { return fVal != 0; }
+    AI bool allTrue() const { return fVal != 0; }
+
+    AI SkNx    abs() const { return Abs(fVal); }
+    AI SkNx   sqrt() const { return Sqrt(fVal); }
+    AI SkNx  floor() const { return Floor(fVal); }
+
+    AI SkNx operator!() const { return !fVal; }
+    AI SkNx operator-() const { return -fVal; }
+    AI SkNx operator~() const { return FromBits(~ToBits(fVal)); }
+
+    AI SkNx operator<<(int bits) const { return fVal << bits; }
+    AI SkNx operator>>(int bits) const { return fVal >> bits; }
+
+    AI SkNx operator+(const SkNx& y) const { return fVal + y.fVal; }
+    AI SkNx operator-(const SkNx& y) const { return fVal - y.fVal; }
+    AI SkNx operator*(const SkNx& y) const { return fVal * y.fVal; }
+    AI SkNx operator/(const SkNx& y) const { return fVal / y.fVal; }
+
+    AI SkNx operator&(const SkNx& y) const { return FromBits(ToBits(fVal) & ToBits(y.fVal)); }
+    AI SkNx operator|(const SkNx& y) const { return FromBits(ToBits(fVal) | ToBits(y.fVal)); }
+    AI SkNx operator^(const SkNx& y) const { return FromBits(ToBits(fVal) ^ ToBits(y.fVal)); }
+
+    AI SkNx operator==(const SkNx& y) const { return FromBits(fVal == y.fVal ? ~0 : 0); }
+    AI SkNx operator!=(const SkNx& y) const { return FromBits(fVal != y.fVal ? ~0 : 0); }
+    AI SkNx operator<=(const SkNx& y) const { return FromBits(fVal <= y.fVal ? ~0 : 0); }
+    AI SkNx operator>=(const SkNx& y) const { return FromBits(fVal >= y.fVal ? ~0 : 0); }
+    AI SkNx operator< (const SkNx& y) const { return FromBits(fVal <  y.fVal ? ~0 : 0); }
+    AI SkNx operator> (const SkNx& y) const { return FromBits(fVal >  y.fVal ? ~0 : 0); }
+
+    AI static SkNx Min(const SkNx& x, const SkNx& y) { return x.fVal < y.fVal ? x : y; }
+    AI static SkNx Max(const SkNx& x, const SkNx& y) { return x.fVal > y.fVal ? x : y; }
+
+    AI SkNx saturatedAdd(const SkNx& y) const {
+        static_assert(std::is_unsigned<T>::value, "");
+        T sum = fVal + y.fVal;
+        return sum < fVal ? std::numeric_limits<T>::max() : sum;
+    }
+
+    AI SkNx mulHi(const SkNx& m) const {
+        static_assert(std::is_unsigned<T>::value, "");
+        static_assert(sizeof(T) <= 4, "");
+        return static_cast<T>((static_cast<uint64_t>(fVal) * m.fVal) >> (sizeof(T)*8));
+    }
+
+    AI SkNx thenElse(const SkNx& t, const SkNx& e) const { return fVal != 0 ? t : e; }
+
+private:
+    // Helper functions to choose the right float/double methods.  (In <cmath> madness lies...)
+    AI static int     Abs(int val) { return  val < 0 ? -val : val; }
+
+    AI static float   Abs(float val) { return  ::fabsf(val); }
+    AI static float  Sqrt(float val) { return  ::sqrtf(val); }
+    AI static float Floor(float val) { return ::floorf(val); }
+
+    AI static double   Abs(double val) { return  ::fabs(val); }
+    AI static double  Sqrt(double val) { return  ::sqrt(val); }
+    AI static double Floor(double val) { return ::floor(val); }
+
+    // Helper functions for working with floats/doubles as bit patterns.
+    template <typename U>
+    AI static U ToBits(U v) { return v; }
+    AI static int32_t ToBits(float  v) { int32_t bits; memcpy(&bits, &v, sizeof(v)); return bits; }
+    AI static int64_t ToBits(double v) { int64_t bits; memcpy(&bits, &v, sizeof(v)); return bits; }
+
+    template <typename Bits>
+    AI static T FromBits(Bits bits) {
+        static_assert(std::is_pod<T   >::value &&
+                      std::is_pod<Bits>::value &&
+                      sizeof(T) <= sizeof(Bits), "");
+        T val;
+        memcpy(&val, &bits, sizeof(T));
+        return val;
+    }
+};
+
+// Allow scalars on the left or right of binary operators, and things like +=, &=, etc.
+#define V template <int N, typename T> AI static SkNx<N,T>
+    V operator+ (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) +  y; }
+    V operator- (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) -  y; }
+    V operator* (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) *  y; }
+    V operator/ (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) /  y; }
+    V operator& (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) &  y; }
+    V operator| (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) |  y; }
+    V operator^ (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) ^  y; }
+    V operator==(T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) == y; }
+    V operator!=(T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) != y; }
+    V operator<=(T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) <= y; }
+    V operator>=(T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) >= y; }
+    V operator< (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) <  y; }
+    V operator> (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) >  y; }
+
+    V operator+ (const SkNx<N,T>& x, T y) { return x +  SkNx<N,T>(y); }
+    V operator- (const SkNx<N,T>& x, T y) { return x -  SkNx<N,T>(y); }
+    V operator* (const SkNx<N,T>& x, T y) { return x *  SkNx<N,T>(y); }
+    V operator/ (const SkNx<N,T>& x, T y) { return x /  SkNx<N,T>(y); }
+    V operator& (const SkNx<N,T>& x, T y) { return x &  SkNx<N,T>(y); }
+    V operator| (const SkNx<N,T>& x, T y) { return x |  SkNx<N,T>(y); }
+    V operator^ (const SkNx<N,T>& x, T y) { return x ^  SkNx<N,T>(y); }
+    V operator==(const SkNx<N,T>& x, T y) { return x == SkNx<N,T>(y); }
+    V operator!=(const SkNx<N,T>& x, T y) { return x != SkNx<N,T>(y); }
+    V operator<=(const SkNx<N,T>& x, T y) { return x <= SkNx<N,T>(y); }
+    V operator>=(const SkNx<N,T>& x, T y) { return x >= SkNx<N,T>(y); }
+    V operator< (const SkNx<N,T>& x, T y) { return x <  SkNx<N,T>(y); }
+    V operator> (const SkNx<N,T>& x, T y) { return x >  SkNx<N,T>(y); }
+
+    V& operator<<=(SkNx<N,T>& x, int bits) { return (x = x << bits); }
+    V& operator>>=(SkNx<N,T>& x, int bits) { return (x = x >> bits); }
+
+    V& operator +=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x + y); }
+    V& operator -=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x - y); }
+    V& operator *=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x * y); }
+    V& operator /=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x / y); }
+    V& operator &=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x & y); }
+    V& operator |=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x | y); }
+    V& operator ^=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x ^ y); }
+
+    V& operator +=(SkNx<N,T>& x, T y) { return (x = x + SkNx<N,T>(y)); }
+    V& operator -=(SkNx<N,T>& x, T y) { return (x = x - SkNx<N,T>(y)); }
+    V& operator *=(SkNx<N,T>& x, T y) { return (x = x * SkNx<N,T>(y)); }
+    V& operator /=(SkNx<N,T>& x, T y) { return (x = x / SkNx<N,T>(y)); }
+    V& operator &=(SkNx<N,T>& x, T y) { return (x = x & SkNx<N,T>(y)); }
+    V& operator |=(SkNx<N,T>& x, T y) { return (x = x | SkNx<N,T>(y)); }
+    V& operator ^=(SkNx<N,T>& x, T y) { return (x = x ^ SkNx<N,T>(y)); }
+#undef V
+
+// SkNx<N,T> ~~> SkNx<N/2,T> + SkNx<N/2,T>
+template <int N, typename T>
+AI static void SkNx_split(const SkNx<N,T>& v, SkNx<N/2,T>* lo, SkNx<N/2,T>* hi) {
+    *lo = v.fLo;
+    *hi = v.fHi;
+}
+
+// SkNx<N/2,T> + SkNx<N/2,T> ~~> SkNx<N,T>
+template <int N, typename T>
+AI static SkNx<N*2,T> SkNx_join(const SkNx<N,T>& lo, const SkNx<N,T>& hi) {
+    return { lo, hi };
+}
+
+// A very generic shuffle.  Can reorder, duplicate, contract, expand...
+//    Sk4f v = { R,G,B,A };
+//    SkNx_shuffle<2,1,0,3>(v)         ~~> {B,G,R,A}
+//    SkNx_shuffle<2,1>(v)             ~~> {B,G}
+//    SkNx_shuffle<2,1,2,1,2,1,2,1>(v) ~~> {B,G,B,G,B,G,B,G}
+//    SkNx_shuffle<3,3,3,3>(v)         ~~> {A,A,A,A}
+template <int... Ix, int N, typename T>
+AI static SkNx<sizeof...(Ix),T> SkNx_shuffle(const SkNx<N,T>& v) {
+    return { v[Ix]... };
+}
+
+// Cast from SkNx<N, Src> to SkNx<N, Dst>, as if you called static_cast<Dst>(Src).
+template <typename Dst, typename Src, int N>
+AI static SkNx<N,Dst> SkNx_cast(const SkNx<N,Src>& v) {
+    return { SkNx_cast<Dst>(v.fLo), SkNx_cast<Dst>(v.fHi) };
+}
+template <typename Dst, typename Src>
+AI static SkNx<1,Dst> SkNx_cast(const SkNx<1,Src>& v) {
+    return static_cast<Dst>(v.fVal);
+}
+
+template <int N, typename T>
+AI static SkNx<N,T> SkNx_fma(const SkNx<N,T>& f, const SkNx<N,T>& m, const SkNx<N,T>& a) {
+    return f*m+a;
+}
+
+}  // namespace
+
+typedef SkNx<2,     float> Sk2f;
+typedef SkNx<4,     float> Sk4f;
+typedef SkNx<8,     float> Sk8f;
+typedef SkNx<16,    float> Sk16f;
+
+typedef SkNx<2,  SkScalar> Sk2s;
+typedef SkNx<4,  SkScalar> Sk4s;
+typedef SkNx<8,  SkScalar> Sk8s;
+typedef SkNx<16, SkScalar> Sk16s;
+
+typedef SkNx<4,   uint8_t> Sk4b;
+typedef SkNx<8,   uint8_t> Sk8b;
+typedef SkNx<16,  uint8_t> Sk16b;
+
+typedef SkNx<4,  uint16_t> Sk4h;
+typedef SkNx<8,  uint16_t> Sk8h;
+typedef SkNx<16, uint16_t> Sk16h;
+
+typedef SkNx<4,  int32_t> Sk4i;
+typedef SkNx<8,  int32_t> Sk8i;
+typedef SkNx<4, uint32_t> Sk4u;
+
+// Include platform specific specializations if available.
+#if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
+    #include "include/private/SkNx_sse.h"
+#elif !defined(SKNX_NO_SIMD) && defined(SK_ARM_HAS_NEON)
+    #include "include/private/SkNx_neon.h"
+#else
+
+AI static Sk4i Sk4f_round(const Sk4f& x) {
+    return { (int) lrintf (x[0]),
+             (int) lrintf (x[1]),
+             (int) lrintf (x[2]),
+             (int) lrintf (x[3]), };
+}
+
+#endif
+
+AI static void Sk4f_ToBytes(uint8_t p[16],
+                            const Sk4f& a, const Sk4f& b, const Sk4f& c, const Sk4f& d) {
+    SkNx_cast<uint8_t>(SkNx_join(SkNx_join(a,b), SkNx_join(c,d))).store(p);
+}
+
+#undef AI
+
+#endif//SkNx_DEFINED
diff --git a/src/deps/skia/include/private/SkNx_neon.h b/src/deps/skia/include/private/SkNx_neon.h
new file mode 100644
index 000000000..a5e2e0109
--- /dev/null
+++ b/src/deps/skia/include/private/SkNx_neon.h
@@ -0,0 +1,713 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkNx_neon_DEFINED
+#define SkNx_neon_DEFINED
+
+#include <arm_neon.h>
+
+namespace {  // NOLINT(google-build-namespaces)
+
+// ARMv8 has vrndm(q)_f32 to floor floats.  Here we emulate it:
+//   - roundtrip through integers via truncation
+//   - subtract 1 if that's too big (possible for negative values).
+// This restricts the domain of our inputs to a maximum somehwere around 2^31.  Seems plenty big.
+AI static float32x4_t emulate_vrndmq_f32(float32x4_t v) {
+    auto roundtrip = vcvtq_f32_s32(vcvtq_s32_f32(v));
+    auto too_big = vcgtq_f32(roundtrip, v);
+    return vsubq_f32(roundtrip, (float32x4_t)vandq_u32(too_big, (uint32x4_t)vdupq_n_f32(1)));
+}
+AI static float32x2_t emulate_vrndm_f32(float32x2_t v) {
+    auto roundtrip = vcvt_f32_s32(vcvt_s32_f32(v));
+    auto too_big = vcgt_f32(roundtrip, v);
+    return vsub_f32(roundtrip, (float32x2_t)vand_u32(too_big, (uint32x2_t)vdup_n_f32(1)));
+}
+
+template <>
+class SkNx<2, float> {
+public:
+    AI SkNx(float32x2_t vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(float val) : fVec(vdup_n_f32(val)) {}
+    AI SkNx(float a, float b) { fVec = (float32x2_t) { a, b }; }
+
+    AI static SkNx Load(const void* ptr) { return vld1_f32((const float*)ptr); }
+    AI void store(void* ptr) const { vst1_f32((float*)ptr, fVec); }
+
+    AI static void Load2(const void* ptr, SkNx* x, SkNx* y) {
+        float32x2x2_t xy = vld2_f32((const float*) ptr);
+        *x = xy.val[0];
+        *y = xy.val[1];
+    }
+
+    AI static void Store2(void* dst, const SkNx& a, const SkNx& b) {
+        float32x2x2_t ab = {{
+            a.fVec,
+            b.fVec,
+        }};
+        vst2_f32((float*) dst, ab);
+    }
+
+    AI static void Store3(void* dst, const SkNx& a, const SkNx& b, const SkNx& c) {
+        float32x2x3_t abc = {{
+            a.fVec,
+            b.fVec,
+            c.fVec,
+        }};
+        vst3_f32((float*) dst, abc);
+    }
+
+    AI static void Store4(void* dst, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
+        float32x2x4_t abcd = {{
+            a.fVec,
+            b.fVec,
+            c.fVec,
+            d.fVec,
+        }};
+        vst4_f32((float*) dst, abcd);
+    }
+
+    AI SkNx operator - () const { return vneg_f32(fVec); }
+
+    AI SkNx operator + (const SkNx& o) const { return vadd_f32(fVec, o.fVec); }
+    AI SkNx operator - (const SkNx& o) const { return vsub_f32(fVec, o.fVec); }
+    AI SkNx operator * (const SkNx& o) const { return vmul_f32(fVec, o.fVec); }
+    AI SkNx operator / (const SkNx& o) const {
+    #if defined(SK_CPU_ARM64)
+        return vdiv_f32(fVec, o.fVec);
+    #else
+        float32x2_t est0 = vrecpe_f32(o.fVec),
+                    est1 = vmul_f32(vrecps_f32(est0, o.fVec), est0),
+                    est2 = vmul_f32(vrecps_f32(est1, o.fVec), est1);
+        return vmul_f32(fVec, est2);
+    #endif
+    }
+
+    AI SkNx operator==(const SkNx& o) const { return vreinterpret_f32_u32(vceq_f32(fVec, o.fVec)); }
+    AI SkNx operator <(const SkNx& o) const { return vreinterpret_f32_u32(vclt_f32(fVec, o.fVec)); }
+    AI SkNx operator >(const SkNx& o) const { return vreinterpret_f32_u32(vcgt_f32(fVec, o.fVec)); }
+    AI SkNx operator<=(const SkNx& o) const { return vreinterpret_f32_u32(vcle_f32(fVec, o.fVec)); }
+    AI SkNx operator>=(const SkNx& o) const { return vreinterpret_f32_u32(vcge_f32(fVec, o.fVec)); }
+    AI SkNx operator!=(const SkNx& o) const {
+        return vreinterpret_f32_u32(vmvn_u32(vceq_f32(fVec, o.fVec)));
+    }
+
+    AI static SkNx Min(const SkNx& l, const SkNx& r) { return vmin_f32(l.fVec, r.fVec); }
+    AI static SkNx Max(const SkNx& l, const SkNx& r) { return vmax_f32(l.fVec, r.fVec); }
+
+    AI SkNx abs() const { return vabs_f32(fVec); }
+    AI SkNx floor() const {
+    #if defined(SK_CPU_ARM64)
+        return vrndm_f32(fVec);
+    #else
+        return emulate_vrndm_f32(fVec);
+    #endif
+    }
+
+    AI SkNx sqrt() const {
+    #if defined(SK_CPU_ARM64)
+        return vsqrt_f32(fVec);
+    #else
+        float32x2_t est0 = vrsqrte_f32(fVec),
+                    est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0),
+                    est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1);
+        return vmul_f32(fVec, est2);
+    #endif
+    }
+
+    AI float operator[](int k) const {
+        SkASSERT(0 <= k && k < 2);
+        union { float32x2_t v; float fs[2]; } pun = {fVec};
+        return pun.fs[k&1];
+    }
+
+    AI bool allTrue() const {
+    #if defined(SK_CPU_ARM64)
+        return 0 != vminv_u32(vreinterpret_u32_f32(fVec));
+    #else
+        auto v = vreinterpret_u32_f32(fVec);
+        return vget_lane_u32(v,0) && vget_lane_u32(v,1);
+    #endif
+    }
+    AI bool anyTrue() const {
+    #if defined(SK_CPU_ARM64)
+        return 0 != vmaxv_u32(vreinterpret_u32_f32(fVec));
+    #else
+        auto v = vreinterpret_u32_f32(fVec);
+        return vget_lane_u32(v,0) || vget_lane_u32(v,1);
+    #endif
+    }
+
+    AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+        return vbsl_f32(vreinterpret_u32_f32(fVec), t.fVec, e.fVec);
+    }
+
+    float32x2_t fVec;
+};
+
+template <>
+class SkNx<4, float> {
+public:
+    AI SkNx(float32x4_t vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(float val) : fVec(vdupq_n_f32(val)) {}
+    AI SkNx(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; }
+
+    AI static SkNx Load(const void* ptr) { return vld1q_f32((const float*)ptr); }
+    AI void store(void* ptr) const { vst1q_f32((float*)ptr, fVec); }
+
+    AI static void Load2(const void* ptr, SkNx* x, SkNx* y) {
+        float32x4x2_t xy = vld2q_f32((const float*) ptr);
+        *x = xy.val[0];
+        *y = xy.val[1];
+    }
+
+    AI static void Load4(const void* ptr, SkNx* r, SkNx* g, SkNx* b, SkNx* a) {
+        float32x4x4_t rgba = vld4q_f32((const float*) ptr);
+        *r = rgba.val[0];
+        *g = rgba.val[1];
+        *b = rgba.val[2];
+        *a = rgba.val[3];
+    }
+    AI static void Store4(void* dst, const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
+        float32x4x4_t rgba = {{
+            r.fVec,
+            g.fVec,
+            b.fVec,
+            a.fVec,
+        }};
+        vst4q_f32((float*) dst, rgba);
+    }
+
+    AI SkNx operator - () const { return vnegq_f32(fVec); }
+
+    AI SkNx operator + (const SkNx& o) const { return vaddq_f32(fVec, o.fVec); }
+    AI SkNx operator - (const SkNx& o) const { return vsubq_f32(fVec, o.fVec); }
+    AI SkNx operator * (const SkNx& o) const { return vmulq_f32(fVec, o.fVec); }
+    AI SkNx operator / (const SkNx& o) const {
+    #if defined(SK_CPU_ARM64)
+        return vdivq_f32(fVec, o.fVec);
+    #else
+        float32x4_t est0 = vrecpeq_f32(o.fVec),
+                    est1 = vmulq_f32(vrecpsq_f32(est0, o.fVec), est0),
+                    est2 = vmulq_f32(vrecpsq_f32(est1, o.fVec), est1);
+        return vmulq_f32(fVec, est2);
+    #endif
+    }
+
+    AI SkNx operator==(const SkNx& o) const {return vreinterpretq_f32_u32(vceqq_f32(fVec, o.fVec));}
+    AI SkNx operator <(const SkNx& o) const {return vreinterpretq_f32_u32(vcltq_f32(fVec, o.fVec));}
+    AI SkNx operator >(const SkNx& o) const {return vreinterpretq_f32_u32(vcgtq_f32(fVec, o.fVec));}
+    AI SkNx operator<=(const SkNx& o) const {return vreinterpretq_f32_u32(vcleq_f32(fVec, o.fVec));}
+    AI SkNx operator>=(const SkNx& o) const {return vreinterpretq_f32_u32(vcgeq_f32(fVec, o.fVec));}
+    AI SkNx operator!=(const SkNx& o) const {
+        return vreinterpretq_f32_u32(vmvnq_u32(vceqq_f32(fVec, o.fVec)));
+    }
+
+    AI static SkNx Min(const SkNx& l, const SkNx& r) { return vminq_f32(l.fVec, r.fVec); }
+    AI static SkNx Max(const SkNx& l, const SkNx& r) { return vmaxq_f32(l.fVec, r.fVec); }
+
+    AI SkNx abs() const { return vabsq_f32(fVec); }
+    AI SkNx floor() const {
+    #if defined(SK_CPU_ARM64)
+        return vrndmq_f32(fVec);
+    #else
+        return emulate_vrndmq_f32(fVec);
+    #endif
+    }
+
+
+    AI SkNx sqrt() const {
+    #if defined(SK_CPU_ARM64)
+        return vsqrtq_f32(fVec);
+    #else
+        float32x4_t est0 = vrsqrteq_f32(fVec),
+                    est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0),
+                    est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1);
+        return vmulq_f32(fVec, est2);
+    #endif
+    }
+
+    AI float operator[](int k) const {
+        SkASSERT(0 <= k && k < 4);
+        union { float32x4_t v; float fs[4]; } pun = {fVec};
+        return pun.fs[k&3];
+    }
+
+    AI float min() const {
+    #if defined(SK_CPU_ARM64)
+        return vminvq_f32(fVec);
+    #else
+        SkNx min = Min(*this, vrev64q_f32(fVec));
+        return std::min(min[0], min[2]);
+    #endif
+    }
+
+    AI float max() const {
+    #if defined(SK_CPU_ARM64)
+        return vmaxvq_f32(fVec);
+    #else
+        SkNx max = Max(*this, vrev64q_f32(fVec));
+        return std::max(max[0], max[2]);
+    #endif
+    }
+
+    AI bool allTrue() const {
+    #if defined(SK_CPU_ARM64)
+        return 0 != vminvq_u32(vreinterpretq_u32_f32(fVec));
+    #else
+        auto v = vreinterpretq_u32_f32(fVec);
+        return vgetq_lane_u32(v,0) && vgetq_lane_u32(v,1)
+            && vgetq_lane_u32(v,2) && vgetq_lane_u32(v,3);
+    #endif
+    }
+    AI bool anyTrue() const {
+    #if defined(SK_CPU_ARM64)
+        return 0 != vmaxvq_u32(vreinterpretq_u32_f32(fVec));
+    #else
+        auto v = vreinterpretq_u32_f32(fVec);
+        return vgetq_lane_u32(v,0) || vgetq_lane_u32(v,1)
+            || vgetq_lane_u32(v,2) || vgetq_lane_u32(v,3);
+    #endif
+    }
+
+    AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+        return vbslq_f32(vreinterpretq_u32_f32(fVec), t.fVec, e.fVec);
+    }
+
+    float32x4_t fVec;
+};
+
+#if defined(SK_CPU_ARM64)
+    AI static Sk4f SkNx_fma(const Sk4f& f, const Sk4f& m, const Sk4f& a) {
+        return vfmaq_f32(a.fVec, f.fVec, m.fVec);
+    }
+#endif
+
+// It's possible that for our current use cases, representing this as
+// half a uint16x8_t might be better than representing it as a uint16x4_t.
+// It'd make conversion to Sk4b one step simpler.
+template <>
+class SkNx<4, uint16_t> {
+public:
+    AI SkNx(const uint16x4_t& vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(uint16_t val) : fVec(vdup_n_u16(val)) {}
+    AI SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) {
+        fVec = (uint16x4_t) { a,b,c,d };
+    }
+
+    AI static SkNx Load(const void* ptr) { return vld1_u16((const uint16_t*)ptr); }
+    AI void store(void* ptr) const { vst1_u16((uint16_t*)ptr, fVec); }
+
+    AI static void Load4(const void* ptr, SkNx* r, SkNx* g, SkNx* b, SkNx* a) {
+        uint16x4x4_t rgba = vld4_u16((const uint16_t*)ptr);
+        *r = rgba.val[0];
+        *g = rgba.val[1];
+        *b = rgba.val[2];
+        *a = rgba.val[3];
+    }
+    AI static void Load3(const void* ptr, SkNx* r, SkNx* g, SkNx* b) {
+        uint16x4x3_t rgba = vld3_u16((const uint16_t*)ptr);
+        *r = rgba.val[0];
+        *g = rgba.val[1];
+        *b = rgba.val[2];
+    }
+    AI static void Store4(void* dst, const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
+        uint16x4x4_t rgba = {{
+            r.fVec,
+            g.fVec,
+            b.fVec,
+            a.fVec,
+        }};
+        vst4_u16((uint16_t*) dst, rgba);
+    }
+
+    AI SkNx operator + (const SkNx& o) const { return vadd_u16(fVec, o.fVec); }
+    AI SkNx operator - (const SkNx& o) const { return vsub_u16(fVec, o.fVec); }
+    AI SkNx operator * (const SkNx& o) const { return vmul_u16(fVec, o.fVec); }
+    AI SkNx operator & (const SkNx& o) const { return vand_u16(fVec, o.fVec); }
+    AI SkNx operator | (const SkNx& o) const { return vorr_u16(fVec, o.fVec); }
+
+    AI SkNx operator << (int bits) const { return fVec << SkNx(bits).fVec; }
+    AI SkNx operator >> (int bits) const { return fVec >> SkNx(bits).fVec; }
+
+    AI static SkNx Min(const SkNx& a, const SkNx& b) { return vmin_u16(a.fVec, b.fVec); }
+
+    AI uint16_t operator[](int k) const {
+        SkASSERT(0 <= k && k < 4);
+        union { uint16x4_t v; uint16_t us[4]; } pun = {fVec};
+        return pun.us[k&3];
+    }
+
+    AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+        return vbsl_u16(fVec, t.fVec, e.fVec);
+    }
+
+    uint16x4_t fVec;
+};
+
+template <>
+class SkNx<8, uint16_t> {
+public:
+    AI SkNx(const uint16x8_t& vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(uint16_t val) : fVec(vdupq_n_u16(val)) {}
+    AI static SkNx Load(const void* ptr) { return vld1q_u16((const uint16_t*)ptr); }
+
+    AI SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
+            uint16_t e, uint16_t f, uint16_t g, uint16_t h) {
+        fVec = (uint16x8_t) { a,b,c,d, e,f,g,h };
+    }
+
+    AI void store(void* ptr) const { vst1q_u16((uint16_t*)ptr, fVec); }
+
+    AI SkNx operator + (const SkNx& o) const { return vaddq_u16(fVec, o.fVec); }
+    AI SkNx operator - (const SkNx& o) const { return vsubq_u16(fVec, o.fVec); }
+    AI SkNx operator * (const SkNx& o) const { return vmulq_u16(fVec, o.fVec); }
+    AI SkNx operator & (const SkNx& o) const { return vandq_u16(fVec, o.fVec); }
+    AI SkNx operator | (const SkNx& o) const { return vorrq_u16(fVec, o.fVec); }
+
+    AI SkNx operator << (int bits) const { return fVec << SkNx(bits).fVec; }
+    AI SkNx operator >> (int bits) const { return fVec >> SkNx(bits).fVec; }
+
+    AI static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u16(a.fVec, b.fVec); }
+
+    AI uint16_t operator[](int k) const {
+        SkASSERT(0 <= k && k < 8);
+        union { uint16x8_t v; uint16_t us[8]; } pun = {fVec};
+        return pun.us[k&7];
+    }
+
+    AI SkNx mulHi(const SkNx& m) const {
+        uint32x4_t hi = vmull_u16(vget_high_u16(fVec), vget_high_u16(m.fVec));
+        uint32x4_t lo = vmull_u16( vget_low_u16(fVec),  vget_low_u16(m.fVec));
+
+        return { vcombine_u16(vshrn_n_u32(lo,16), vshrn_n_u32(hi,16)) };
+    }
+
+    AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+        return vbslq_u16(fVec, t.fVec, e.fVec);
+    }
+
+    uint16x8_t fVec;
+};
+
+template <>
+class SkNx<4, uint8_t> {
+public:
+    typedef uint32_t __attribute__((aligned(1))) unaligned_uint32_t;
+
+    AI SkNx(const uint8x8_t& vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d) {
+        fVec = (uint8x8_t){a,b,c,d, 0,0,0,0};
+    }
+    AI static SkNx Load(const void* ptr) {
+        return (uint8x8_t)vld1_dup_u32((const unaligned_uint32_t*)ptr);
+    }
+    AI void store(void* ptr) const {
+        return vst1_lane_u32((unaligned_uint32_t*)ptr, (uint32x2_t)fVec, 0);
+    }
+    AI uint8_t operator[](int k) const {
+        SkASSERT(0 <= k && k < 4);
+        union { uint8x8_t v; uint8_t us[8]; } pun = {fVec};
+        return pun.us[k&3];
+    }
+
+    // TODO as needed
+
+    uint8x8_t fVec;
+};
+
+template <>
+class SkNx<8, uint8_t> {
+public:
+    AI SkNx(const uint8x8_t& vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(uint8_t val) : fVec(vdup_n_u8(val)) {}
+    AI SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
+            uint8_t e, uint8_t f, uint8_t g, uint8_t h) {
+        fVec = (uint8x8_t) { a,b,c,d, e,f,g,h };
+    }
+
+    AI static SkNx Load(const void* ptr) { return vld1_u8((const uint8_t*)ptr); }
+    AI void store(void* ptr) const { vst1_u8((uint8_t*)ptr, fVec); }
+
+    AI uint8_t operator[](int k) const {
+        SkASSERT(0 <= k && k < 8);
+        union { uint8x8_t v; uint8_t us[8]; } pun = {fVec};
+        return pun.us[k&7];
+    }
+
+    uint8x8_t fVec;
+};
+
+template <>
+class SkNx<16, uint8_t> {
+public:
+    AI SkNx(const uint8x16_t& vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(uint8_t val) : fVec(vdupq_n_u8(val)) {}
+    AI SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
+            uint8_t e, uint8_t f, uint8_t g, uint8_t h,
+            uint8_t i, uint8_t j, uint8_t k, uint8_t l,
+            uint8_t m, uint8_t n, uint8_t o, uint8_t p) {
+        fVec = (uint8x16_t) { a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p };
+    }
+
+    AI static SkNx Load(const void* ptr) { return vld1q_u8((const uint8_t*)ptr); }
+    AI void store(void* ptr) const { vst1q_u8((uint8_t*)ptr, fVec); }
+
+    AI SkNx saturatedAdd(const SkNx& o) const { return vqaddq_u8(fVec, o.fVec); }
+
+    AI SkNx operator + (const SkNx& o) const { return vaddq_u8(fVec, o.fVec); }
+    AI SkNx operator - (const SkNx& o) const { return vsubq_u8(fVec, o.fVec); }
+    AI SkNx operator & (const SkNx& o) const { return vandq_u8(fVec, o.fVec); }
+
+    AI static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u8(a.fVec, b.fVec); }
+    AI SkNx operator < (const SkNx& o) const { return vcltq_u8(fVec, o.fVec); }
+
+    AI uint8_t operator[](int k) const {
+        SkASSERT(0 <= k && k < 16);
+        union { uint8x16_t v; uint8_t us[16]; } pun = {fVec};
+        return pun.us[k&15];
+    }
+
+    AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+        return vbslq_u8(fVec, t.fVec, e.fVec);
+    }
+
+    uint8x16_t fVec;
+};
+
+template <>
+class SkNx<4, int32_t> {
+public:
+    AI SkNx(const int32x4_t& vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(int32_t v) {
+        fVec = vdupq_n_s32(v);
+    }
+    AI SkNx(int32_t a, int32_t b, int32_t c, int32_t d) {
+        fVec = (int32x4_t){a,b,c,d};
+    }
+    AI static SkNx Load(const void* ptr) {
+        return vld1q_s32((const int32_t*)ptr);
+    }
+    AI void store(void* ptr) const {
+        return vst1q_s32((int32_t*)ptr, fVec);
+    }
+    AI int32_t operator[](int k) const {
+        SkASSERT(0 <= k && k < 4);
+        union { int32x4_t v; int32_t is[4]; } pun = {fVec};
+        return pun.is[k&3];
+    }
+
+    AI SkNx operator + (const SkNx& o) const { return vaddq_s32(fVec, o.fVec); }
+    AI SkNx operator - (const SkNx& o) const { return vsubq_s32(fVec, o.fVec); }
+    AI SkNx operator * (const SkNx& o) const { return vmulq_s32(fVec, o.fVec); }
+
+    AI SkNx operator & (const SkNx& o) const { return vandq_s32(fVec, o.fVec); }
+    AI SkNx operator | (const SkNx& o) const { return vorrq_s32(fVec, o.fVec); }
+    AI SkNx operator ^ (const SkNx& o) const { return veorq_s32(fVec, o.fVec); }
+
+    AI SkNx operator << (int bits) const { return fVec << SkNx(bits).fVec; }
+    AI SkNx operator >> (int bits) const { return fVec >> SkNx(bits).fVec; }
+
+    AI SkNx operator == (const SkNx& o) const {
+        return vreinterpretq_s32_u32(vceqq_s32(fVec, o.fVec));
+    }
+    AI SkNx operator <  (const SkNx& o) const {
+        return vreinterpretq_s32_u32(vcltq_s32(fVec, o.fVec));
+    }
+    AI SkNx operator >  (const SkNx& o) const {
+        return vreinterpretq_s32_u32(vcgtq_s32(fVec, o.fVec));
+    }
+
+    AI static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_s32(a.fVec, b.fVec); }
+    AI static SkNx Max(const SkNx& a, const SkNx& b) { return vmaxq_s32(a.fVec, b.fVec); }
+    // TODO as needed
+
+    AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+        return vbslq_s32(vreinterpretq_u32_s32(fVec), t.fVec, e.fVec);
+    }
+
+    AI SkNx abs() const { return vabsq_s32(fVec); }
+
+    int32x4_t fVec;
+};
+
+template <>
+class SkNx<4, uint32_t> {
+public:
+    AI SkNx(const uint32x4_t& vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(uint32_t v) {
+        fVec = vdupq_n_u32(v);
+    }
+    AI SkNx(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+        fVec = (uint32x4_t){a,b,c,d};
+    }
+    AI static SkNx Load(const void* ptr) {
+        return vld1q_u32((const uint32_t*)ptr);
+    }
+    AI void store(void* ptr) const {
+        return vst1q_u32((uint32_t*)ptr, fVec);
+    }
+    AI uint32_t operator[](int k) const {
+        SkASSERT(0 <= k && k < 4);
+        union { uint32x4_t v; uint32_t us[4]; } pun = {fVec};
+        return pun.us[k&3];
+    }
+
+    AI SkNx operator + (const SkNx& o) const { return vaddq_u32(fVec, o.fVec); }
+    AI SkNx operator - (const SkNx& o) const { return vsubq_u32(fVec, o.fVec); }
+    AI SkNx operator * (const SkNx& o) const { return vmulq_u32(fVec, o.fVec); }
+
+    AI SkNx operator & (const SkNx& o) const { return vandq_u32(fVec, o.fVec); }
+    AI SkNx operator | (const SkNx& o) const { return vorrq_u32(fVec, o.fVec); }
+    AI SkNx operator ^ (const SkNx& o) const { return veorq_u32(fVec, o.fVec); }
+
+    AI SkNx operator << (int bits) const { return fVec << SkNx(bits).fVec; }
+    AI SkNx operator >> (int bits) const { return fVec >> SkNx(bits).fVec; }
+
+    AI SkNx operator == (const SkNx& o) const { return vceqq_u32(fVec, o.fVec); }
+    AI SkNx operator <  (const SkNx& o) const { return vcltq_u32(fVec, o.fVec); }
+    AI SkNx operator >  (const SkNx& o) const { return vcgtq_u32(fVec, o.fVec); }
+
+    AI static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u32(a.fVec, b.fVec); }
+    // TODO as needed
+
+    AI SkNx mulHi(const SkNx& m) const {
+        uint64x2_t hi = vmull_u32(vget_high_u32(fVec), vget_high_u32(m.fVec));
+        uint64x2_t lo = vmull_u32( vget_low_u32(fVec),  vget_low_u32(m.fVec));
+
+        return { vcombine_u32(vshrn_n_u64(lo,32), vshrn_n_u64(hi,32)) };
+    }
+
+    AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+        return vbslq_u32(fVec, t.fVec, e.fVec);
+    }
+
+    uint32x4_t fVec;
+};
+
+template<> AI /*static*/ Sk4i SkNx_cast<int32_t, float>(const Sk4f& src) {
+    return vcvtq_s32_f32(src.fVec);
+
+}
+template<> AI /*static*/ Sk4f SkNx_cast<float, int32_t>(const Sk4i& src) {
+    return vcvtq_f32_s32(src.fVec);
+}
+template<> AI /*static*/ Sk4f SkNx_cast<float, uint32_t>(const Sk4u& src) {
+    return SkNx_cast<float>(Sk4i::Load(&src));
+}
+
+template<> AI /*static*/ Sk4h SkNx_cast<uint16_t, float>(const Sk4f& src) {
+    return vqmovn_u32(vcvtq_u32_f32(src.fVec));
+}
+
+template<> AI /*static*/ Sk4f SkNx_cast<float, uint16_t>(const Sk4h& src) {
+    return vcvtq_f32_u32(vmovl_u16(src.fVec));
+}
+
+template<> AI /*static*/ Sk4b SkNx_cast<uint8_t, float>(const Sk4f& src) {
+    uint32x4_t _32 = vcvtq_u32_f32(src.fVec);
+    uint16x4_t _16 = vqmovn_u32(_32);
+    return vqmovn_u16(vcombine_u16(_16, _16));
+}
+
+template<> AI /*static*/ Sk4u SkNx_cast<uint32_t, uint8_t>(const Sk4b& src) {
+    uint16x8_t _16 = vmovl_u8(src.fVec);
+    return vmovl_u16(vget_low_u16(_16));
+}
+
+template<> AI /*static*/ Sk4i SkNx_cast<int32_t, uint8_t>(const Sk4b& src) {
+    return vreinterpretq_s32_u32(SkNx_cast<uint32_t>(src).fVec);
+}
+
+template<> AI /*static*/ Sk4f SkNx_cast<float, uint8_t>(const Sk4b& src) {
+    return vcvtq_f32_s32(SkNx_cast<int32_t>(src).fVec);
+}
+
+template<> AI /*static*/ Sk16b SkNx_cast<uint8_t, float>(const Sk16f& src) {
+    Sk8f ab, cd;
+    SkNx_split(src, &ab, &cd);
+
+    Sk4f a,b,c,d;
+    SkNx_split(ab, &a, &b);
+    SkNx_split(cd, &c, &d);
+    return vuzpq_u8(vuzpq_u8((uint8x16_t)vcvtq_u32_f32(a.fVec),
+                             (uint8x16_t)vcvtq_u32_f32(b.fVec)).val[0],
+                    vuzpq_u8((uint8x16_t)vcvtq_u32_f32(c.fVec),
+                             (uint8x16_t)vcvtq_u32_f32(d.fVec)).val[0]).val[0];
+}
+
+template<> AI /*static*/ Sk8b SkNx_cast<uint8_t, int32_t>(const Sk8i& src) {
+    Sk4i a, b;
+    SkNx_split(src, &a, &b);
+    uint16x4_t a16 = vqmovun_s32(a.fVec);
+    uint16x4_t b16 = vqmovun_s32(b.fVec);
+
+    return vqmovn_u16(vcombine_u16(a16, b16));
+}
+
+template<> AI /*static*/ Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) {
+    return vget_low_u16(vmovl_u8(src.fVec));
+}
+
+template<> AI /*static*/ Sk8h SkNx_cast<uint16_t, uint8_t>(const Sk8b& src) {
+    return vmovl_u8(src.fVec);
+}
+
+template<> AI /*static*/ Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) {
+    return vmovn_u16(vcombine_u16(src.fVec, src.fVec));
+}
+
+template<> AI /*static*/ Sk8b SkNx_cast<uint8_t, uint16_t>(const Sk8h& src) {
+    return vqmovn_u16(src.fVec);
+}
+
+template<> AI /*static*/ Sk4b SkNx_cast<uint8_t, int32_t>(const Sk4i& src) {
+    uint16x4_t _16 = vqmovun_s32(src.fVec);
+    return vqmovn_u16(vcombine_u16(_16, _16));
+}
+
+template<> AI /*static*/ Sk4b SkNx_cast<uint8_t, uint32_t>(const Sk4u& src) {
+    uint16x4_t _16 = vqmovn_u32(src.fVec);
+    return vqmovn_u16(vcombine_u16(_16, _16));
+}
+
+template<> AI /*static*/ Sk4i SkNx_cast<int32_t, uint16_t>(const Sk4h& src) {
+    return vreinterpretq_s32_u32(vmovl_u16(src.fVec));
+}
+
+template<> AI /*static*/ Sk4h SkNx_cast<uint16_t, int32_t>(const Sk4i& src) {
+    return vmovn_u32(vreinterpretq_u32_s32(src.fVec));
+}
+
+template<> AI /*static*/ Sk4i SkNx_cast<int32_t, uint32_t>(const Sk4u& src) {
+    return vreinterpretq_s32_u32(src.fVec);
+}
+
+AI static Sk4i Sk4f_round(const Sk4f& x) {
+    return vcvtq_s32_f32((x + 0.5f).fVec);
+}
+
+}  // namespace
+
+#endif//SkNx_neon_DEFINED
diff --git a/src/deps/skia/include/private/SkNx_sse.h b/src/deps/skia/include/private/SkNx_sse.h
new file mode 100644
index 000000000..e07f780e5
--- /dev/null
+++ b/src/deps/skia/include/private/SkNx_sse.h
@@ -0,0 +1,823 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkNx_sse_DEFINED
+#define SkNx_sse_DEFINED
+
+#include "include/core/SkTypes.h"
+
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+    #include <smmintrin.h>
+#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
+    #include <tmmintrin.h>
+#else
+    #include <emmintrin.h>
+#endif
+
+// This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything more recent.
+// If you do, make sure this is in a static inline function... anywhere else risks violating ODR.
+
+namespace {  // NOLINT(google-build-namespaces)
+
+// Emulate _mm_floor_ps() with SSE2:
+//   - roundtrip through integers via truncation
+//   - subtract 1 if that's too big (possible for negative values).
+// This restricts the domain of our inputs to a maximum somehwere around 2^31.
+// Seems plenty big.
+AI static __m128 emulate_mm_floor_ps(__m128 v) {
+    __m128 roundtrip = _mm_cvtepi32_ps(_mm_cvttps_epi32(v));
+    __m128 too_big = _mm_cmpgt_ps(roundtrip, v);
+    return _mm_sub_ps(roundtrip, _mm_and_ps(too_big, _mm_set1_ps(1.0f)));
+}
+
+template <>
+class SkNx<2, float> {
+public:
+    AI SkNx(const __m128& vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(float val) : fVec(_mm_set1_ps(val)) {}
+    AI static SkNx Load(const void* ptr) {
+        return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)ptr));
+    }
+    AI SkNx(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {}
+
+    AI void store(void* ptr) const { _mm_storel_pi((__m64*)ptr, fVec); }
+
+    AI static void Load2(const void* ptr, SkNx* x, SkNx* y) {
+        const float* m = (const float*)ptr;
+        *x = SkNx{m[0], m[2]};
+        *y = SkNx{m[1], m[3]};
+    }
+
+    AI static void Store2(void* dst, const SkNx& a, const SkNx& b) {
+        auto vals = _mm_unpacklo_ps(a.fVec, b.fVec);
+        _mm_storeu_ps((float*)dst, vals);
+    }
+
+    AI static void Store3(void* dst, const SkNx& a, const SkNx& b, const SkNx& c) {
+        auto lo = _mm_setr_ps(a[0], b[0], c[0], a[1]),
+             hi = _mm_setr_ps(b[1], c[1],    0,    0);
+        _mm_storeu_ps((float*)dst, lo);
+        _mm_storel_pi(((__m64*)dst) + 2, hi);
+    }
+
+    AI static void Store4(void* dst, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
+        auto lo = _mm_setr_ps(a[0], b[0], c[0], d[0]),
+             hi = _mm_setr_ps(a[1], b[1], c[1], d[1]);
+        _mm_storeu_ps((float*)dst, lo);
+        _mm_storeu_ps(((float*)dst) + 4, hi);
+    }
+
+    AI SkNx operator - () const { return _mm_xor_ps(_mm_set1_ps(-0.0f), fVec); }
+
+    AI SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }
+    AI SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); }
+    AI SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); }
+    AI SkNx operator / (const SkNx& o) const { return _mm_div_ps(fVec, o.fVec); }
+
+    AI SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec); }
+    AI SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec); }
+    AI SkNx operator  < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec); }
+    AI SkNx operator  > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec); }
+    AI SkNx operator <= (const SkNx& o) const { return _mm_cmple_ps (fVec, o.fVec); }
+    AI SkNx operator >= (const SkNx& o) const { return _mm_cmpge_ps (fVec, o.fVec); }
+
+    AI static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.fVec); }
+    AI static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.fVec); }
+
+    AI SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); }
+    AI SkNx floor() const {
+    #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+        return _mm_floor_ps(fVec);
+    #else
+        return emulate_mm_floor_ps(fVec);
+    #endif
+    }
+
+    AI SkNx   sqrt() const { return _mm_sqrt_ps (fVec);  }
+
+    AI float operator[](int k) const {
+        SkASSERT(0 <= k && k < 2);
+        union { __m128 v; float fs[4]; } pun = {fVec};
+        return pun.fs[k&1];
+    }
+
+    AI bool allTrue() const { return 0b11 == (_mm_movemask_ps(fVec) & 0b11); }
+    AI bool anyTrue() const { return 0b00 != (_mm_movemask_ps(fVec) & 0b11); }
+
+    AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+    #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+        return _mm_blendv_ps(e.fVec, t.fVec, fVec);
+    #else
+        return _mm_or_ps(_mm_and_ps   (fVec, t.fVec),
+                         _mm_andnot_ps(fVec, e.fVec));
+    #endif
+    }
+
+    __m128 fVec;
+};
+
+template <>
+class SkNx<4, float> {
+public:
+    AI SkNx(const __m128& vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(float val)           : fVec( _mm_set1_ps(val) ) {}
+    AI SkNx(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {}
+
+    AI static SkNx Load(const void* ptr) { return _mm_loadu_ps((const float*)ptr); }
+    AI void store(void* ptr) const { _mm_storeu_ps((float*)ptr, fVec); }
+
+    AI static void Load2(const void* ptr, SkNx* x, SkNx* y) {
+        SkNx lo = SkNx::Load((const float*)ptr+0),
+             hi = SkNx::Load((const float*)ptr+4);
+        *x = SkNx{lo[0], lo[2], hi[0], hi[2]};
+        *y = SkNx{lo[1], lo[3], hi[1], hi[3]};
+    }
+
+    AI static void Load4(const void* ptr, SkNx* r, SkNx* g, SkNx* b, SkNx* a) {
+        __m128 v0 = _mm_loadu_ps(((float*)ptr) +  0),
+               v1 = _mm_loadu_ps(((float*)ptr) +  4),
+               v2 = _mm_loadu_ps(((float*)ptr) +  8),
+               v3 = _mm_loadu_ps(((float*)ptr) + 12);
+        _MM_TRANSPOSE4_PS(v0, v1, v2, v3);
+        *r = v0;
+        *g = v1;
+        *b = v2;
+        *a = v3;
+    }
+    AI static void Store4(void* dst, const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
+        __m128 v0 = r.fVec,
+               v1 = g.fVec,
+               v2 = b.fVec,
+               v3 = a.fVec;
+        _MM_TRANSPOSE4_PS(v0, v1, v2, v3);
+        _mm_storeu_ps(((float*) dst) +  0, v0);
+        _mm_storeu_ps(((float*) dst) +  4, v1);
+        _mm_storeu_ps(((float*) dst) +  8, v2);
+        _mm_storeu_ps(((float*) dst) + 12, v3);
+    }
+
+    AI SkNx operator - () const { return _mm_xor_ps(_mm_set1_ps(-0.0f), fVec); }
+
+    AI SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }
+    AI SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); }
+    AI SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); }
+    AI SkNx operator / (const SkNx& o) const { return _mm_div_ps(fVec, o.fVec); }
+
+    AI SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec); }
+    AI SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec); }
+    AI SkNx operator  < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec); }
+    AI SkNx operator  > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec); }
+    AI SkNx operator <= (const SkNx& o) const { return _mm_cmple_ps (fVec, o.fVec); }
+    AI SkNx operator >= (const SkNx& o) const { return _mm_cmpge_ps (fVec, o.fVec); }
+
+    AI static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.fVec); }
+    AI static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.fVec); }
+
+    AI SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); }
+    AI SkNx floor() const {
+    #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+        return _mm_floor_ps(fVec);
+    #else
+        return emulate_mm_floor_ps(fVec);
+    #endif
+    }
+
+    AI SkNx   sqrt() const { return _mm_sqrt_ps (fVec);  }
+
+    AI float operator[](int k) const {
+        SkASSERT(0 <= k && k < 4);
+        union { __m128 v; float fs[4]; } pun = {fVec};
+        return pun.fs[k&3];
+    }
+
+    AI float min() const {
+        SkNx min = Min(*this, _mm_shuffle_ps(fVec, fVec, _MM_SHUFFLE(2,3,0,1)));
+        min = Min(min, _mm_shuffle_ps(min.fVec, min.fVec, _MM_SHUFFLE(0,1,2,3)));
+        return min[0];
+    }
+
+    AI float max() const {
+        SkNx max = Max(*this, _mm_shuffle_ps(fVec, fVec, _MM_SHUFFLE(2,3,0,1)));
+        max = Max(max, _mm_shuffle_ps(max.fVec, max.fVec, _MM_SHUFFLE(0,1,2,3)));
+        return max[0];
+    }
+
+    AI bool allTrue() const { return 0b1111 == _mm_movemask_ps(fVec); }
+    AI bool anyTrue() const { return 0b0000 != _mm_movemask_ps(fVec); }
+
+    AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+    #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+        return _mm_blendv_ps(e.fVec, t.fVec, fVec);
+    #else
+        return _mm_or_ps(_mm_and_ps   (fVec, t.fVec),
+                         _mm_andnot_ps(fVec, e.fVec));
+    #endif
+    }
+
+    __m128 fVec;
+};
+
+AI static __m128i mullo32(__m128i a, __m128i b) {
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+    return _mm_mullo_epi32(a, b);
+#else
+    __m128i mul20 = _mm_mul_epu32(a, b),
+            mul31 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4));
+    return _mm_unpacklo_epi32(_mm_shuffle_epi32(mul20, _MM_SHUFFLE(0,0,2,0)),
+                              _mm_shuffle_epi32(mul31, _MM_SHUFFLE(0,0,2,0)));
+#endif
+}
+
+template <>
+class SkNx<4, int32_t> {
+public:
+    AI SkNx(const __m128i& vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(int32_t val) : fVec(_mm_set1_epi32(val)) {}
+    AI static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)ptr); }
+    AI SkNx(int32_t a, int32_t b, int32_t c, int32_t d) : fVec(_mm_setr_epi32(a,b,c,d)) {}
+
+    AI void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); }
+
+    AI SkNx operator + (const SkNx& o) const { return _mm_add_epi32(fVec, o.fVec); }
+    AI SkNx operator - (const SkNx& o) const { return _mm_sub_epi32(fVec, o.fVec); }
+    AI SkNx operator * (const SkNx& o) const { return mullo32(fVec, o.fVec);       }
+
+    AI SkNx operator & (const SkNx& o) const { return _mm_and_si128(fVec, o.fVec); }
+    AI SkNx operator | (const SkNx& o) const { return _mm_or_si128(fVec, o.fVec);  }
+    AI SkNx operator ^ (const SkNx& o) const { return _mm_xor_si128(fVec, o.fVec); }
+
+    AI SkNx operator << (int bits) const { return _mm_slli_epi32(fVec, bits); }
+    AI SkNx operator >> (int bits) const { return _mm_srai_epi32(fVec, bits); }
+
+    AI SkNx operator == (const SkNx& o) const { return _mm_cmpeq_epi32 (fVec, o.fVec); }
+    AI SkNx operator  < (const SkNx& o) const { return _mm_cmplt_epi32 (fVec, o.fVec); }
+    AI SkNx operator  > (const SkNx& o) const { return _mm_cmpgt_epi32 (fVec, o.fVec); }
+
+    AI int32_t operator[](int k) const {
+        SkASSERT(0 <= k && k < 4);
+        union { __m128i v; int32_t is[4]; } pun = {fVec};
+        return pun.is[k&3];
+    }
+
+    AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+    #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+        return _mm_blendv_epi8(e.fVec, t.fVec, fVec);
+    #else
+        return _mm_or_si128(_mm_and_si128   (fVec, t.fVec),
+                            _mm_andnot_si128(fVec, e.fVec));
+    #endif
+    }
+
+    AI SkNx abs() const {
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
+        return _mm_abs_epi32(fVec);
+#else
+        SkNx mask = (*this) >> 31;
+        return (mask ^ (*this)) - mask;
+#endif
+    }
+
+    AI static SkNx Min(const SkNx& x, const SkNx& y) {
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+        return _mm_min_epi32(x.fVec, y.fVec);
+#else
+        return (x < y).thenElse(x, y);
+#endif
+    }
+
+    AI static SkNx Max(const SkNx& x, const SkNx& y) {
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+        return _mm_max_epi32(x.fVec, y.fVec);
+#else
+        return (x > y).thenElse(x, y);
+#endif
+    }
+
+    __m128i fVec;
+};
+
+template <>
+class SkNx<2, uint32_t> {
+public:
+    AI SkNx(const __m128i& vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(uint32_t val) : fVec(_mm_set1_epi32((int)val)) {}
+    AI static SkNx Load(const void* ptr) { return _mm_loadl_epi64((const __m128i*)ptr); }
+    AI SkNx(uint32_t a, uint32_t b) : fVec(_mm_setr_epi32((int)a,(int)b,0,0)) {}
+
+    AI void store(void* ptr) const { _mm_storel_epi64((__m128i*)ptr, fVec); }
+
+    AI SkNx operator + (const SkNx& o) const { return _mm_add_epi32(fVec, o.fVec); }
+    AI SkNx operator - (const SkNx& o) const { return _mm_sub_epi32(fVec, o.fVec); }
+    AI SkNx operator * (const SkNx& o) const { return mullo32(fVec, o.fVec);       }
+
+    AI SkNx operator & (const SkNx& o) const { return _mm_and_si128(fVec, o.fVec); }
+    AI SkNx operator | (const SkNx& o) const { return _mm_or_si128(fVec, o.fVec);  }
+    AI SkNx operator ^ (const SkNx& o) const { return _mm_xor_si128(fVec, o.fVec); }
+
+    AI SkNx operator << (int bits) const { return _mm_slli_epi32(fVec, bits); }
+    AI SkNx operator >> (int bits) const { return _mm_srli_epi32(fVec, bits); }
+
+    AI SkNx operator == (const SkNx& o) const { return _mm_cmpeq_epi32 (fVec, o.fVec); }
+    AI SkNx operator != (const SkNx& o) const { return (*this == o) ^ 0xffffffff; }
+    // operator < and > take a little extra fiddling to make work for unsigned ints.
+
+    AI uint32_t operator[](int k) const {
+        SkASSERT(0 <= k && k < 2);
+        union { __m128i v; uint32_t us[4]; } pun = {fVec};
+        return pun.us[k&1];
+    }
+
+    AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+        return _mm_blendv_epi8(e.fVec, t.fVec, fVec);
+#else
+        return _mm_or_si128(_mm_and_si128   (fVec, t.fVec),
+                            _mm_andnot_si128(fVec, e.fVec));
+#endif
+    }
+
+    AI bool allTrue() const { return 0xff == (_mm_movemask_epi8(fVec) & 0xff); }
+
+    __m128i fVec;
+};
+
+template <>
+class SkNx<4, uint32_t> {
+public:
+    AI SkNx(const __m128i& vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(uint32_t val) : fVec(_mm_set1_epi32((int)val)) {}
+    AI static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)ptr); }
+    AI SkNx(uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+        : fVec(_mm_setr_epi32((int)a,(int)b,(int)c,(int)d)) {}
+
+    AI void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); }
+
+    AI SkNx operator + (const SkNx& o) const { return _mm_add_epi32(fVec, o.fVec); }
+    AI SkNx operator - (const SkNx& o) const { return _mm_sub_epi32(fVec, o.fVec); }
+    AI SkNx operator * (const SkNx& o) const { return mullo32(fVec, o.fVec);       }
+
+    AI SkNx operator & (const SkNx& o) const { return _mm_and_si128(fVec, o.fVec); }
+    AI SkNx operator | (const SkNx& o) const { return _mm_or_si128(fVec, o.fVec);  }
+    AI SkNx operator ^ (const SkNx& o) const { return _mm_xor_si128(fVec, o.fVec); }
+
+    AI SkNx operator << (int bits) const { return _mm_slli_epi32(fVec, bits); }
+    AI SkNx operator >> (int bits) const { return _mm_srli_epi32(fVec, bits); }
+
+    AI SkNx operator == (const SkNx& o) const { return _mm_cmpeq_epi32 (fVec, o.fVec); }
+    AI SkNx operator != (const SkNx& o) const { return (*this == o) ^ 0xffffffff; }
+
+    // operator < and > take a little extra fiddling to make work for unsigned ints.
+
+    AI uint32_t operator[](int k) const {
+        SkASSERT(0 <= k && k < 4);
+        union { __m128i v; uint32_t us[4]; } pun = {fVec};
+        return pun.us[k&3];
+    }
+
+    AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+    #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+        return _mm_blendv_epi8(e.fVec, t.fVec, fVec);
+    #else
+        return _mm_or_si128(_mm_and_si128   (fVec, t.fVec),
+                            _mm_andnot_si128(fVec, e.fVec));
+    #endif
+    }
+
+    AI SkNx mulHi(SkNx m) const {
+        SkNx v20{_mm_mul_epu32(m.fVec, fVec)};
+        SkNx v31{_mm_mul_epu32(_mm_srli_si128(m.fVec, 4), _mm_srli_si128(fVec, 4))};
+
+        return SkNx{v20[1], v31[1], v20[3], v31[3]};
+    }
+
+    __m128i fVec;
+};
+
+template <>
+class SkNx<4, uint16_t> {
+public:
+    AI SkNx(const __m128i& vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(uint16_t val) : fVec(_mm_set1_epi16((short)val)) {}
+    AI SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d)
+        : fVec(_mm_setr_epi16((short)a,(short)b,(short)c,(short)d,0,0,0,0)) {}
+
+    AI static SkNx Load(const void* ptr) { return _mm_loadl_epi64((const __m128i*)ptr); }
+    AI void store(void* ptr) const { _mm_storel_epi64((__m128i*)ptr, fVec); }
+
+    AI static void Load4(const void* ptr, SkNx* r, SkNx* g, SkNx* b, SkNx* a) {
+        __m128i lo = _mm_loadu_si128(((__m128i*)ptr) + 0),
+                hi = _mm_loadu_si128(((__m128i*)ptr) + 1);
+        __m128i even = _mm_unpacklo_epi16(lo, hi),   // r0 r2 g0 g2 b0 b2 a0 a2
+                 odd = _mm_unpackhi_epi16(lo, hi);   // r1 r3 ...
+        __m128i rg = _mm_unpacklo_epi16(even, odd),  // r0 r1 r2 r3 g0 g1 g2 g3
+                ba = _mm_unpackhi_epi16(even, odd);  // b0 b1 ...   a0 a1 ...
+        *r = rg;
+        *g = _mm_srli_si128(rg, 8);
+        *b = ba;
+        *a = _mm_srli_si128(ba, 8);
+    }
+    AI static void Load3(const void* ptr, SkNx* r, SkNx* g, SkNx* b) {
+        // The idea here is to get 4 vectors that are R G B _ _ _ _ _.
+        // The second load is at a funny location to make sure we don't read past
+        // the bounds of memory.  This is fine, we just need to shift it a little bit.
+        const uint8_t* ptr8 = (const uint8_t*) ptr;
+        __m128i rgb0 = _mm_loadu_si128((const __m128i*) (ptr8 + 0));
+        __m128i rgb1 = _mm_srli_si128(rgb0, 3*2);
+        __m128i rgb2 = _mm_srli_si128(_mm_loadu_si128((const __m128i*) (ptr8 + 4*2)), 2*2);
+        __m128i rgb3 = _mm_srli_si128(rgb2, 3*2);
+
+        __m128i rrggbb01 = _mm_unpacklo_epi16(rgb0, rgb1);
+        __m128i rrggbb23 = _mm_unpacklo_epi16(rgb2, rgb3);
+        *r = _mm_unpacklo_epi32(rrggbb01, rrggbb23);
+        *g = _mm_srli_si128(r->fVec, 4*2);
+        *b = _mm_unpackhi_epi32(rrggbb01, rrggbb23);
+    }
+    AI static void Store4(void* dst, const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
+        __m128i rg = _mm_unpacklo_epi16(r.fVec, g.fVec);
+        __m128i ba = _mm_unpacklo_epi16(b.fVec, a.fVec);
+        __m128i lo = _mm_unpacklo_epi32(rg, ba);
+        __m128i hi = _mm_unpackhi_epi32(rg, ba);
+        _mm_storeu_si128(((__m128i*) dst) + 0, lo);
+        _mm_storeu_si128(((__m128i*) dst) + 1, hi);
+    }
+
+    AI SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec); }
+    AI SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec); }
+    AI SkNx operator * (const SkNx& o) const { return _mm_mullo_epi16(fVec, o.fVec); }
+    AI SkNx operator & (const SkNx& o) const { return _mm_and_si128(fVec, o.fVec); }
+    AI SkNx operator | (const SkNx& o) const { return _mm_or_si128(fVec, o.fVec); }
+
+    AI SkNx operator << (int bits) const { return _mm_slli_epi16(fVec, bits); }
+    AI SkNx operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); }
+
+    AI uint16_t operator[](int k) const {
+        SkASSERT(0 <= k && k < 4);
+        union { __m128i v; uint16_t us[8]; } pun = {fVec};
+        return pun.us[k&3];
+    }
+
+    __m128i fVec;
+};
+
+template <>
+class SkNx<8, uint16_t> {
+public:
+    AI SkNx(const __m128i& vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(uint16_t val) : fVec(_mm_set1_epi16((short)val)) {}
+    AI SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
+            uint16_t e, uint16_t f, uint16_t g, uint16_t h)
+        : fVec(_mm_setr_epi16((short)a,(short)b,(short)c,(short)d,
+                              (short)e,(short)f,(short)g,(short)h)) {}
+
+    AI static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)ptr); }
+    AI void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); }
+
+    AI static void Load4(const void* ptr, SkNx* r, SkNx* g, SkNx* b, SkNx* a) {
+        __m128i _01 = _mm_loadu_si128(((__m128i*)ptr) + 0),
+                _23 = _mm_loadu_si128(((__m128i*)ptr) + 1),
+                _45 = _mm_loadu_si128(((__m128i*)ptr) + 2),
+                _67 = _mm_loadu_si128(((__m128i*)ptr) + 3);
+
+        __m128i _02 = _mm_unpacklo_epi16(_01, _23),  // r0 r2 g0 g2 b0 b2 a0 a2
+                _13 = _mm_unpackhi_epi16(_01, _23),  // r1 r3 g1 g3 b1 b3 a1 a3
+                _46 = _mm_unpacklo_epi16(_45, _67),
+                _57 = _mm_unpackhi_epi16(_45, _67);
+
+        __m128i rg0123 = _mm_unpacklo_epi16(_02, _13),  // r0 r1 r2 r3 g0 g1 g2 g3
+                ba0123 = _mm_unpackhi_epi16(_02, _13),  // b0 b1 b2 b3 a0 a1 a2 a3
+                rg4567 = _mm_unpacklo_epi16(_46, _57),
+                ba4567 = _mm_unpackhi_epi16(_46, _57);
+
+        *r = _mm_unpacklo_epi64(rg0123, rg4567);
+        *g = _mm_unpackhi_epi64(rg0123, rg4567);
+        *b = _mm_unpacklo_epi64(ba0123, ba4567);
+        *a = _mm_unpackhi_epi64(ba0123, ba4567);
+    }
+    AI static void Load3(const void* ptr, SkNx* r, SkNx* g, SkNx* b) {
+        const uint8_t* ptr8 = (const uint8_t*) ptr;
+        __m128i rgb0 = _mm_loadu_si128((const __m128i*) (ptr8 +  0*2));
+        __m128i rgb1 = _mm_srli_si128(rgb0, 3*2);
+        __m128i rgb2 = _mm_loadu_si128((const __m128i*) (ptr8 +  6*2));
+        __m128i rgb3 = _mm_srli_si128(rgb2, 3*2);
+        __m128i rgb4 = _mm_loadu_si128((const __m128i*) (ptr8 + 12*2));
+        __m128i rgb5 = _mm_srli_si128(rgb4, 3*2);
+        __m128i rgb6 = _mm_srli_si128(_mm_loadu_si128((const __m128i*) (ptr8 + 16*2)), 2*2);
+        __m128i rgb7 = _mm_srli_si128(rgb6, 3*2);
+
+        __m128i rgb01 = _mm_unpacklo_epi16(rgb0, rgb1);
+        __m128i rgb23 = _mm_unpacklo_epi16(rgb2, rgb3);
+        __m128i rgb45 = _mm_unpacklo_epi16(rgb4, rgb5);
+        __m128i rgb67 = _mm_unpacklo_epi16(rgb6, rgb7);
+
+        __m128i rg03 = _mm_unpacklo_epi32(rgb01, rgb23);
+        __m128i bx03 = _mm_unpackhi_epi32(rgb01, rgb23);
+        __m128i rg47 = _mm_unpacklo_epi32(rgb45, rgb67);
+        __m128i bx47 = _mm_unpackhi_epi32(rgb45, rgb67);
+
+        *r = _mm_unpacklo_epi64(rg03, rg47);
+        *g = _mm_unpackhi_epi64(rg03, rg47);
+        *b = _mm_unpacklo_epi64(bx03, bx47);
+    }
+    AI static void Store4(void* ptr, const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
+        __m128i rg0123 = _mm_unpacklo_epi16(r.fVec, g.fVec),  // r0 g0 r1 g1 r2 g2 r3 g3
+                rg4567 = _mm_unpackhi_epi16(r.fVec, g.fVec),  // r4 g4 r5 g5 r6 g6 r7 g7
+                ba0123 = _mm_unpacklo_epi16(b.fVec, a.fVec),
+                ba4567 = _mm_unpackhi_epi16(b.fVec, a.fVec);
+
+        _mm_storeu_si128((__m128i*)ptr + 0, _mm_unpacklo_epi32(rg0123, ba0123));
+        _mm_storeu_si128((__m128i*)ptr + 1, _mm_unpackhi_epi32(rg0123, ba0123));
+        _mm_storeu_si128((__m128i*)ptr + 2, _mm_unpacklo_epi32(rg4567, ba4567));
+        _mm_storeu_si128((__m128i*)ptr + 3, _mm_unpackhi_epi32(rg4567, ba4567));
+    }
+
+    AI SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec); }
+    AI SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec); }
+    AI SkNx operator * (const SkNx& o) const { return _mm_mullo_epi16(fVec, o.fVec); }
+    AI SkNx operator & (const SkNx& o) const { return _mm_and_si128(fVec, o.fVec); }
+    AI SkNx operator | (const SkNx& o) const { return _mm_or_si128(fVec, o.fVec); }
+
+    AI SkNx operator << (int bits) const { return _mm_slli_epi16(fVec, bits); }
+    AI SkNx operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); }
+
+    AI static SkNx Min(const SkNx& a, const SkNx& b) {
+        // No unsigned _mm_min_epu16, so we'll shift into a space where we can use the
+        // signed version, _mm_min_epi16, then shift back.
+        const uint16_t top = 0x8000; // Keep this separate from _mm_set1_epi16 or MSVC will whine.
+        const __m128i top_8x = _mm_set1_epi16((short)top);
+        return _mm_add_epi8(top_8x, _mm_min_epi16(_mm_sub_epi8(a.fVec, top_8x),
+                                                  _mm_sub_epi8(b.fVec, top_8x)));
+    }
+
+    AI SkNx mulHi(const SkNx& m) const {
+        return _mm_mulhi_epu16(fVec, m.fVec);
+    }
+
+    AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+        return _mm_or_si128(_mm_and_si128   (fVec, t.fVec),
+                            _mm_andnot_si128(fVec, e.fVec));
+    }
+
+    AI uint16_t operator[](int k) const {
+        SkASSERT(0 <= k && k < 8);
+        union { __m128i v; uint16_t us[8]; } pun = {fVec};
+        return pun.us[k&7];
+    }
+
+    __m128i fVec;
+};
+
+template <>
+class SkNx<4, uint8_t> {
+public:
+    AI SkNx() {}
+    AI SkNx(const __m128i& vec) : fVec(vec) {}
+    AI SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d)
+        : fVec(_mm_setr_epi8((char)a,(char)b,(char)c,(char)d, 0,0,0,0, 0,0,0,0, 0,0,0,0)) {}
+
+    AI static SkNx Load(const void* ptr) { return _mm_cvtsi32_si128(*(const int*)ptr); }
+    AI void store(void* ptr) const { *(int*)ptr = _mm_cvtsi128_si32(fVec); }
+
+    AI uint8_t operator[](int k) const {
+        SkASSERT(0 <= k && k < 4);
+        union { __m128i v; uint8_t us[16]; } pun = {fVec};
+        return pun.us[k&3];
+    }
+
+    // TODO as needed
+
+    __m128i fVec;
+};
+
+template <>
+class SkNx<8, uint8_t> {
+public:
+    AI SkNx(const __m128i& vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(uint8_t val) : fVec(_mm_set1_epi8((char)val)) {}
+    AI static SkNx Load(const void* ptr) { return _mm_loadl_epi64((const __m128i*)ptr); }
+    AI SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
+            uint8_t e, uint8_t f, uint8_t g, uint8_t h)
+            : fVec(_mm_setr_epi8((char)a,(char)b,(char)c,(char)d,
+                                 (char)e,(char)f,(char)g,(char)h,
+                                 0,0,0,0, 0,0,0,0)) {}
+
+    AI void store(void* ptr) const {_mm_storel_epi64((__m128i*)ptr, fVec);}
+
+    AI SkNx saturatedAdd(const SkNx& o) const { return _mm_adds_epu8(fVec, o.fVec); }
+
+    AI SkNx operator + (const SkNx& o) const { return _mm_add_epi8(fVec, o.fVec); }
+    AI SkNx operator - (const SkNx& o) const { return _mm_sub_epi8(fVec, o.fVec); }
+
+    AI static SkNx Min(const SkNx& a, const SkNx& b) { return _mm_min_epu8(a.fVec, b.fVec); }
+    AI SkNx operator < (const SkNx& o) const {
+        // There's no unsigned _mm_cmplt_epu8, so we flip the sign bits then use a signed compare.
+        auto flip = _mm_set1_epi8(char(0x80));
+        return _mm_cmplt_epi8(_mm_xor_si128(flip, fVec), _mm_xor_si128(flip, o.fVec));
+    }
+
+    AI uint8_t operator[](int k) const {
+        SkASSERT(0 <= k && k < 16);
+        union { __m128i v; uint8_t us[16]; } pun = {fVec};
+        return pun.us[k&15];
+    }
+
+    AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+        return _mm_or_si128(_mm_and_si128   (fVec, t.fVec),
+                            _mm_andnot_si128(fVec, e.fVec));
+    }
+
+    __m128i fVec;
+};
+
+template <>
+class SkNx<16, uint8_t> {
+public:
+    AI SkNx(const __m128i& vec) : fVec(vec) {}
+
+    AI SkNx() {}
+    AI SkNx(uint8_t val) : fVec(_mm_set1_epi8((char)val)) {}
+    AI static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)ptr); }
+    AI SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
+            uint8_t e, uint8_t f, uint8_t g, uint8_t h,
+            uint8_t i, uint8_t j, uint8_t k, uint8_t l,
+            uint8_t m, uint8_t n, uint8_t o, uint8_t p)
+        : fVec(_mm_setr_epi8((char)a,(char)b,(char)c,(char)d,
+                             (char)e,(char)f,(char)g,(char)h,
+                             (char)i,(char)j,(char)k,(char)l,
+                             (char)m,(char)n,(char)o,(char)p)) {}
+
+    AI void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); }
+
+    AI SkNx saturatedAdd(const SkNx& o) const { return _mm_adds_epu8(fVec, o.fVec); }
+
+    AI SkNx operator + (const SkNx& o) const { return _mm_add_epi8(fVec, o.fVec); }
+    AI SkNx operator - (const SkNx& o) const { return _mm_sub_epi8(fVec, o.fVec); }
+    AI SkNx operator & (const SkNx& o) const { return _mm_and_si128(fVec, o.fVec); }
+
+    AI static SkNx Min(const SkNx& a, const SkNx& b) { return _mm_min_epu8(a.fVec, b.fVec); }
+    AI SkNx operator < (const SkNx& o) const {
+        // There's no unsigned _mm_cmplt_epu8, so we flip the sign bits then use a signed compare.
+        auto flip = _mm_set1_epi8(char(0x80));
+        return _mm_cmplt_epi8(_mm_xor_si128(flip, fVec), _mm_xor_si128(flip, o.fVec));
+    }
+
+    AI uint8_t operator[](int k) const {
+        SkASSERT(0 <= k && k < 16);
+        union { __m128i v; uint8_t us[16]; } pun = {fVec};
+        return pun.us[k&15];
+    }
+
+    AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+        return _mm_or_si128(_mm_and_si128   (fVec, t.fVec),
+                            _mm_andnot_si128(fVec, e.fVec));
+    }
+
+    __m128i fVec;
+};
+
+template<> AI /*static*/ Sk4f SkNx_cast<float, int32_t>(const Sk4i& src) {
+    return _mm_cvtepi32_ps(src.fVec);
+}
+
+template<> AI /*static*/ Sk4f SkNx_cast<float, uint32_t>(const Sk4u& src) {
+    return SkNx_cast<float>(Sk4i::Load(&src));
+}
+
+template <> AI /*static*/ Sk4i SkNx_cast<int32_t, float>(const Sk4f& src) {
+    return _mm_cvttps_epi32(src.fVec);
+}
+
+template<> AI /*static*/ Sk4h SkNx_cast<uint16_t, int32_t>(const Sk4i& src) {
+#if 0 && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+    // TODO: This seems to be causing code generation problems.   Investigate?
+    return _mm_packus_epi32(src.fVec);
+#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
+    // With SSSE3, we can just shuffle the low 2 bytes from each lane right into place.
+    const int _ = ~0;
+    return _mm_shuffle_epi8(src.fVec, _mm_setr_epi8(0,1, 4,5, 8,9, 12,13, _,_,_,_,_,_,_,_));
+#else
+    // With SSE2, we have to sign extend our input, making _mm_packs_epi32 do the pack we want.
+    __m128i x = _mm_srai_epi32(_mm_slli_epi32(src.fVec, 16), 16);
+    return _mm_packs_epi32(x,x);
+#endif
+}
+
+template<> AI /*static*/ Sk4h SkNx_cast<uint16_t, float>(const Sk4f& src) {
+    return SkNx_cast<uint16_t>(SkNx_cast<int32_t>(src));
+}
+
+template<> AI /*static*/ Sk4b SkNx_cast<uint8_t, float>(const Sk4f& src) {
+    auto _32 = _mm_cvttps_epi32(src.fVec);
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
+    const int _ = ~0;
+    return _mm_shuffle_epi8(_32, _mm_setr_epi8(0,4,8,12, _,_,_,_, _,_,_,_, _,_,_,_));
+#else
+    auto _16 = _mm_packus_epi16(_32, _32);
+    return     _mm_packus_epi16(_16, _16);
+#endif
+}
+
+template<> AI /*static*/ Sk4u SkNx_cast<uint32_t, uint8_t>(const Sk4b& src) {
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
+    const int _ = ~0;
+    return _mm_shuffle_epi8(src.fVec, _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, 3,_,_,_));
+#else
+    auto _16 = _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128());
+    return _mm_unpacklo_epi16(_16, _mm_setzero_si128());
+#endif
+}
+
+template<> AI /*static*/ Sk4i SkNx_cast<int32_t, uint8_t>(const Sk4b& src) {
+    return SkNx_cast<uint32_t>(src).fVec;
+}
+
+template<> AI /*static*/ Sk4f SkNx_cast<float, uint8_t>(const Sk4b& src) {
+    return _mm_cvtepi32_ps(SkNx_cast<int32_t>(src).fVec);
+}
+
+template<> AI /*static*/ Sk4f SkNx_cast<float, uint16_t>(const Sk4h& src) {
+    auto _32 = _mm_unpacklo_epi16(src.fVec, _mm_setzero_si128());
+    return _mm_cvtepi32_ps(_32);
+}
+
+template<> AI /*static*/ Sk8b SkNx_cast<uint8_t, int32_t>(const Sk8i& src) {
+    Sk4i lo, hi;
+    SkNx_split(src, &lo, &hi);
+
+    auto t = _mm_packs_epi32(lo.fVec, hi.fVec);
+    return _mm_packus_epi16(t, t);
+}
+
+template<> AI /*static*/ Sk16b SkNx_cast<uint8_t, float>(const Sk16f& src) {
+    Sk8f ab, cd;
+    SkNx_split(src, &ab, &cd);
+
+    Sk4f a,b,c,d;
+    SkNx_split(ab, &a, &b);
+    SkNx_split(cd, &c, &d);
+
+    return _mm_packus_epi16(_mm_packus_epi16(_mm_cvttps_epi32(a.fVec),
+                                             _mm_cvttps_epi32(b.fVec)),
+                            _mm_packus_epi16(_mm_cvttps_epi32(c.fVec),
+                                             _mm_cvttps_epi32(d.fVec)));
+}
+
+template<> AI /*static*/ Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) {
+    return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128());
+}
+
+template<> AI /*static*/ Sk8h SkNx_cast<uint16_t, uint8_t>(const Sk8b& src) {
+    return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128());
+}
+
+template<> AI /*static*/ Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) {
+    return _mm_packus_epi16(src.fVec, src.fVec);
+}
+
+template<> AI /*static*/ Sk8b SkNx_cast<uint8_t, uint16_t>(const Sk8h& src) {
+    return _mm_packus_epi16(src.fVec, src.fVec);
+}
+
+template<> AI /*static*/ Sk4i SkNx_cast<int32_t, uint16_t>(const Sk4h& src) {
+    return _mm_unpacklo_epi16(src.fVec, _mm_setzero_si128());
+}
+
+
+template<> AI /*static*/ Sk4b SkNx_cast<uint8_t, int32_t>(const Sk4i& src) {
+    return _mm_packus_epi16(_mm_packus_epi16(src.fVec, src.fVec), src.fVec);
+}
+
+template<> AI /*static*/ Sk4b SkNx_cast<uint8_t, uint32_t>(const Sk4u& src) {
+    return _mm_packus_epi16(_mm_packus_epi16(src.fVec, src.fVec), src.fVec);
+}
+
+template<> AI /*static*/ Sk4i SkNx_cast<int32_t, uint32_t>(const Sk4u& src) {
+    return src.fVec;
+}
+
+AI static Sk4i Sk4f_round(const Sk4f& x) {
+    return _mm_cvtps_epi32(x.fVec);
+}
+
+}  // namespace
+
+#endif//SkNx_sse_DEFINED
diff --git a/src/deps/skia/include/private/SkOnce.h b/src/deps/skia/include/private/SkOnce.h
new file mode 100644
index 000000000..edf3e8335
--- /dev/null
+++ b/src/deps/skia/include/private/SkOnce.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkOnce_DEFINED
+#define SkOnce_DEFINED
+
+#include "include/private/SkThreadAnnotations.h"
+#include <atomic>
+#include <utility>
+
+// SkOnce provides call-once guarantees for Skia, much like std::once_flag/std::call_once().
+//
+// There should be no particularly error-prone gotcha use cases when using SkOnce.
+// It works correctly as a class member, a local, a global, a function-scoped static, whatever.
+
+class SkOnce {
+public:
+    constexpr SkOnce() = default;
+
+    template <typename Fn, typename... Args>
+    void operator()(Fn&& fn, Args&&... args) {
+        auto state = fState.load(std::memory_order_acquire);
+
+        if (state == Done) {
+            return;
+        }
+
+        // If it looks like no one has started calling fn(), try to claim that job.
+        if (state == NotStarted && fState.compare_exchange_strong(state, Claimed,
+                                                                  std::memory_order_relaxed,
+                                                                  std::memory_order_relaxed)) {
+            // Great!  We'll run fn() then notify the other threads by releasing Done into fState.
+            fn(std::forward<Args>(args)...);
+            return fState.store(Done, std::memory_order_release);
+        }
+
+        // Some other thread is calling fn().
+        // We'll just spin here acquiring until it releases Done into fState.
+        SK_POTENTIALLY_BLOCKING_REGION_BEGIN;
+        while (fState.load(std::memory_order_acquire) != Done) { /*spin*/ }
+        SK_POTENTIALLY_BLOCKING_REGION_END;
+    }
+
+private:
+    enum State : uint8_t { NotStarted, Claimed, Done};
+    std::atomic<uint8_t> fState{NotStarted};
+};
+
+#endif  // SkOnce_DEFINED
diff --git a/src/deps/skia/include/private/SkOpts_spi.h b/src/deps/skia/include/private/SkOpts_spi.h
new file mode 100644
index 000000000..e57dc1433
--- /dev/null
+++ b/src/deps/skia/include/private/SkOpts_spi.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2020 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkOpts_spi_DEFINED
+#define SkOpts_spi_DEFINED
+
+#include "include/core/SkTypes.h"
+
+// These are exposed as SK_SPI (e.g. SkParagraph), the rest of SkOpts is
+// declared in src/core
+
+namespace SkOpts {
+    // The fastest high quality 32-bit hash we can provide on this platform.
+    extern uint32_t SK_SPI (*hash_fn)(const void* data, size_t bytes, uint32_t seed);
+} // namespace SkOpts
+
+#endif
diff --git a/src/deps/skia/include/private/SkPaintParamsKey.h b/src/deps/skia/include/private/SkPaintParamsKey.h
new file mode 100644
index 000000000..44a88f59f
--- /dev/null
+++ b/src/deps/skia/include/private/SkPaintParamsKey.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkPaintParamsKey_DEFINED
+#define SkPaintParamsKey_DEFINED
+
+#include <array>
+#include <limits>
+#include "include/core/SkTypes.h"
+
+enum class SkBackend : uint8_t {
+    kGanesh,
+    kGraphite,
+    kSkVM
+};
+
+// TODO: this needs to be expanded into a more flexible dictionary (esp. for user-supplied SkSL)
+// TODO: should this enum actually be in ShaderCodeDictionary.h?
+enum class CodeSnippetID : uint8_t {
+    // TODO: It seems like this requires some refinement. Fundamentally this doesn't seem like a
+    // draw that originated from a PaintParams.
+    kDepthStencilOnlyDraw,
+
+    // SkShader code snippets
+    kSolidColorShader,
+    kLinearGradientShader,
+    kRadialGradientShader,
+    kSweepGradientShader,
+    kConicalGradientShader,
+
+    // BlendMode code snippets
+    kSimpleBlendMode,
+
+    kLast = kSimpleBlendMode
+};
+static constexpr int kCodeSnippetIDCount = static_cast<int>(CodeSnippetID::kLast) + 1;
+
+// This class is a compact representation of the shader needed to implement a given
+// PaintParams. Its structure is a series of blocks where each block has a
+// header that consists of 2-bytes - a 1-byte code-snippet ID and a 1-byte number-of-bytes-in-the-
+// block field. The rest of the data in the block is dependent on the individual code snippet.
+class SkPaintParamsKey {
+public:
+    static const int kBlockHeaderSizeInBytes = 2;
+    static const int kBlockSizeOffsetInBytes = 1; // offset to the block size w/in the header
+
+    // Block headers have the following structure:
+    //  1st byte: codeSnippetID
+    //  2nd byte: total blockSize in bytes
+    // Returns the header's offset in the key - to be passed back into endBlock
+    int beginBlock(CodeSnippetID codeSnippetID) {
+        SkASSERT(fNumBytes < kMaxKeySize);
+
+        this->addByte((uint8_t) codeSnippetID);
+        this->addByte(0); // this needs to be patched up with a call to endBlock
+        return fNumBytes - kBlockHeaderSizeInBytes;
+    }
+
+    // Update the size byte of a block header
+    void endBlock(int headerOffset, CodeSnippetID codeSnippetID) {
+        SkASSERT(fData[headerOffset] == (uint32_t) codeSnippetID);
+        int blockSize = fNumBytes - headerOffset;
+        SkASSERT(blockSize <= kMaxBlockSize);
+        fData[headerOffset+1] = blockSize;
+    }
+
+    std::pair<CodeSnippetID, uint8_t> readCodeSnippetID(int headerOffset) const {
+        SkASSERT(headerOffset < kMaxKeySize - kBlockHeaderSizeInBytes);
+
+        CodeSnippetID id = static_cast<CodeSnippetID>(fData[headerOffset]);
+        uint8_t blockSize = fData[headerOffset+1];
+        SkASSERT(headerOffset + blockSize <= this->sizeInBytes());
+
+        return { id, blockSize };
+    }
+
+    void addByte(uint8_t byte) {
+        SkASSERT(fNumBytes < kMaxKeySize);
+
+        fData[fNumBytes++] = byte;
+    }
+
+#ifdef SK_DEBUG
+    static int DumpBlock(const SkPaintParamsKey&, int headerOffset);
+    void dump() const;
+#endif
+
+    uint8_t byte(int offset) const { SkASSERT(offset < fNumBytes); return fData[offset]; }
+    const void* data() const { return fData.data(); }
+    int sizeInBytes() const { return fNumBytes; }
+
+    bool operator==(const SkPaintParamsKey& that) const;
+    bool operator!=(const SkPaintParamsKey& that) const { return !(*this == that); }
+
+private:
+    // TODO: need to make it so the key can can dynamically grow
+    static const int kMaxKeySize = 32;
+    static const int kMaxBlockSize = std::numeric_limits<uint8_t>::max();
+
+    // TODO: It is probably overkill but we could encode the SkBackend in the first byte of
+    // the key.
+    int fNumBytes = 0;
+    std::array<uint8_t, kMaxKeySize> fData;
+};
+
+#endif // SkPaintParamsKey_DEFINED
diff --git a/src/deps/skia/include/private/SkPathRef.h b/src/deps/skia/include/private/SkPathRef.h
new file mode 100644
index 000000000..301f3b751
--- /dev/null
+++ b/src/deps/skia/include/private/SkPathRef.h
@@ -0,0 +1,536 @@
+/*
+ * Copyright 2012 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkPathRef_DEFINED
+#define SkPathRef_DEFINED
+
+#include "include/core/SkMatrix.h"
+#include "include/core/SkPoint.h"
+#include "include/core/SkRRect.h"
+#include "include/core/SkRect.h"
+#include "include/core/SkRefCnt.h"
+#include "include/private/SkIDChangeListener.h"
+#include "include/private/SkMutex.h"
+#include "include/private/SkTDArray.h"
+#include "include/private/SkTemplates.h"
+#include "include/private/SkTo.h"
+
+#include <atomic>
+#include <limits>
+#include <tuple>
+
+class SkRBuffer;
+class SkWBuffer;
+
+enum class SkPathConvexity {
+    kConvex,
+    kConcave,
+    kUnknown,
+};
+
+enum class SkPathFirstDirection {
+    kCW,         // == SkPathDirection::kCW
+    kCCW,        // == SkPathDirection::kCCW
+    kUnknown,
+};
+
+// These are computed from a stream of verbs
+struct SkPathVerbAnalysis {
+    bool     valid;
+    int      points, weights;
+    unsigned segmentMask;
+};
+SkPathVerbAnalysis sk_path_analyze_verbs(const uint8_t verbs[], int count);
+
+
+/**
+ * Holds the path verbs and points. It is versioned by a generation ID. None of its public methods
+ * modify the contents. To modify or append to the verbs/points wrap the SkPathRef in an
+ * SkPathRef::Editor object. Installing the editor resets the generation ID. It also performs
+ * copy-on-write if the SkPathRef is shared by multiple SkPaths. The caller passes the Editor's
+ * constructor a pointer to a sk_sp<SkPathRef>, which may be updated to point to a new SkPathRef
+ * after the editor's constructor returns.
+ *
+ * The points and verbs are stored in a single allocation. The points are at the begining of the
+ * allocation while the verbs are stored at end of the allocation, in reverse order. Thus the points
+ * and verbs both grow into the middle of the allocation until the meet. To access verb i in the
+ * verb array use ref.verbs()[~i] (because verbs() returns a pointer just beyond the first
+ * logical verb or the last verb in memory).
+ */
+
+class SK_API SkPathRef final : public SkNVRefCnt<SkPathRef> {
+public:
+    SkPathRef(SkTDArray<SkPoint> points, SkTDArray<uint8_t> verbs, SkTDArray<SkScalar> weights,
+              unsigned segmentMask)
+        : fPoints(std::move(points))
+        , fVerbs(std::move(verbs))
+        , fConicWeights(std::move(weights))
+    {
+        fBoundsIsDirty = true;    // this also invalidates fIsFinite
+        fGenerationID = 0;        // recompute
+        fSegmentMask = segmentMask;
+        fIsOval = false;
+        fIsRRect = false;
+        // The next two values don't matter unless fIsOval or fIsRRect are true.
+        fRRectOrOvalIsCCW = false;
+        fRRectOrOvalStartIdx = 0xAC;
+        SkDEBUGCODE(fEditorsAttached.store(0);)
+
+        this->computeBounds();  // do this now, before we worry about multiple owners/threads
+        SkDEBUGCODE(this->validate();)
+    }
+
+    class Editor {
+    public:
+        Editor(sk_sp<SkPathRef>* pathRef,
+               int incReserveVerbs = 0,
+               int incReservePoints = 0);
+
+        ~Editor() { SkDEBUGCODE(fPathRef->fEditorsAttached--;) }
+
+        /**
+         * Returns the array of points.
+         */
+        SkPoint* writablePoints() { return fPathRef->getWritablePoints(); }
+        const SkPoint* points() const { return fPathRef->points(); }
+
+        /**
+         * Gets the ith point. Shortcut for this->points() + i
+         */
+        SkPoint* atPoint(int i) { return fPathRef->getWritablePoints() + i; }
+        const SkPoint* atPoint(int i) const { return &fPathRef->fPoints[i]; }
+
+        /**
+         * Adds the verb and allocates space for the number of points indicated by the verb. The
+         * return value is a pointer to where the points for the verb should be written.
+         * 'weight' is only used if 'verb' is kConic_Verb
+         */
+        SkPoint* growForVerb(int /*SkPath::Verb*/ verb, SkScalar weight = 0) {
+            SkDEBUGCODE(fPathRef->validate();)
+            return fPathRef->growForVerb(verb, weight);
+        }
+
+        /**
+         * Allocates space for multiple instances of a particular verb and the
+         * requisite points & weights.
+         * The return pointer points at the first new point (indexed normally [<i>]).
+         * If 'verb' is kConic_Verb, 'weights' will return a pointer to the
+         * space for the conic weights (indexed normally).
+         */
+        SkPoint* growForRepeatedVerb(int /*SkPath::Verb*/ verb,
+                                     int numVbs,
+                                     SkScalar** weights = nullptr) {
+            return fPathRef->growForRepeatedVerb(verb, numVbs, weights);
+        }
+
+        /**
+         * Concatenates all verbs from 'path' onto the pathRef's verbs array. Increases the point
+         * count by the number of points in 'path', and the conic weight count by the number of
+         * conics in 'path'.
+         *
+         * Returns pointers to the uninitialized points and conic weights data.
+         */
+        std::tuple<SkPoint*, SkScalar*> growForVerbsInPath(const SkPathRef& path) {
+            return fPathRef->growForVerbsInPath(path);
+        }
+
+        /**
+         * Resets the path ref to a new verb and point count. The new verbs and points are
+         * uninitialized.
+         */
+        void resetToSize(int newVerbCnt, int newPointCnt, int newConicCount) {
+            fPathRef->resetToSize(newVerbCnt, newPointCnt, newConicCount);
+        }
+
+        /**
+         * Gets the path ref that is wrapped in the Editor.
+         */
+        SkPathRef* pathRef() { return fPathRef; }
+
+        void setIsOval(bool isOval, bool isCCW, unsigned start) {
+            fPathRef->setIsOval(isOval, isCCW, start);
+        }
+
+        void setIsRRect(bool isRRect, bool isCCW, unsigned start) {
+            fPathRef->setIsRRect(isRRect, isCCW, start);
+        }
+
+        void setBounds(const SkRect& rect) { fPathRef->setBounds(rect); }
+
+    private:
+        SkPathRef* fPathRef;
+    };
+
+    class SK_API Iter {
+    public:
+        Iter();
+        Iter(const SkPathRef&);
+
+        void setPathRef(const SkPathRef&);
+
+        /** Return the next verb in this iteration of the path. When all
+            segments have been visited, return kDone_Verb.
+
+            If any point in the path is non-finite, return kDone_Verb immediately.
+
+            @param  pts The points representing the current verb and/or segment
+                        This must not be NULL.
+            @return The verb for the current segment
+        */
+        uint8_t next(SkPoint pts[4]);
+        uint8_t peek() const;
+
+        SkScalar conicWeight() const { return *fConicWeights; }
+
+    private:
+        const SkPoint*  fPts;
+        const uint8_t*  fVerbs;
+        const uint8_t*  fVerbStop;
+        const SkScalar* fConicWeights;
+    };
+
+public:
+    /**
+     * Gets a path ref with no verbs or points.
+     */
+    static SkPathRef* CreateEmpty();
+
+    /**
+     *  Returns true if all of the points in this path are finite, meaning there
+     *  are no infinities and no NaNs.
+     */
+    bool isFinite() const {
+        if (fBoundsIsDirty) {
+            this->computeBounds();
+        }
+        return SkToBool(fIsFinite);
+    }
+
+    /**
+     *  Returns a mask, where each bit corresponding to a SegmentMask is
+     *  set if the path contains 1 or more segments of that type.
+     *  Returns 0 for an empty path (no segments).
+     */
+    uint32_t getSegmentMasks() const { return fSegmentMask; }
+
+    /** Returns true if the path is an oval.
+     *
+     * @param rect      returns the bounding rect of this oval. It's a circle
+     *                  if the height and width are the same.
+     * @param isCCW     is the oval CCW (or CW if false).
+     * @param start     indicates where the contour starts on the oval (see
+     *                  SkPath::addOval for intepretation of the index).
+     *
+     * @return true if this path is an oval.
+     *              Tracking whether a path is an oval is considered an
+     *              optimization for performance and so some paths that are in
+     *              fact ovals can report false.
+     */
+    bool isOval(SkRect* rect, bool* isCCW, unsigned* start) const {
+        if (fIsOval) {
+            if (rect) {
+                *rect = this->getBounds();
+            }
+            if (isCCW) {
+                *isCCW = SkToBool(fRRectOrOvalIsCCW);
+            }
+            if (start) {
+                *start = fRRectOrOvalStartIdx;
+            }
+        }
+
+        return SkToBool(fIsOval);
+    }
+
+    bool isRRect(SkRRect* rrect, bool* isCCW, unsigned* start) const {
+        if (fIsRRect) {
+            if (rrect) {
+                *rrect = this->getRRect();
+            }
+            if (isCCW) {
+                *isCCW = SkToBool(fRRectOrOvalIsCCW);
+            }
+            if (start) {
+                *start = fRRectOrOvalStartIdx;
+            }
+        }
+        return SkToBool(fIsRRect);
+    }
+
+
+    bool hasComputedBounds() const {
+        return !fBoundsIsDirty;
+    }
+
+    /** Returns the bounds of the path's points. If the path contains 0 or 1
+        points, the bounds is set to (0,0,0,0), and isEmpty() will return true.
+        Note: this bounds may be larger than the actual shape, since curves
+        do not extend as far as their control points.
+    */
+    const SkRect& getBounds() const {
+        if (fBoundsIsDirty) {
+            this->computeBounds();
+        }
+        return fBounds;
+    }
+
+    SkRRect getRRect() const;
+
+    /**
+     * Transforms a path ref by a matrix, allocating a new one only if necessary.
+     */
+    static void CreateTransformedCopy(sk_sp<SkPathRef>* dst,
+                                      const SkPathRef& src,
+                                      const SkMatrix& matrix);
+
+  //  static SkPathRef* CreateFromBuffer(SkRBuffer* buffer);
+
+    /**
+     * Rollsback a path ref to zero verbs and points with the assumption that the path ref will be
+     * repopulated with approximately the same number of verbs and points. A new path ref is created
+     * only if necessary.
+     */
+    static void Rewind(sk_sp<SkPathRef>* pathRef);
+
+    ~SkPathRef();
+    int countPoints() const { return fPoints.count(); }
+    int countVerbs() const { return fVerbs.count(); }
+    int countWeights() const { return fConicWeights.count(); }
+
+    size_t approximateBytesUsed() const;
+
+    /**
+     * Returns a pointer one beyond the first logical verb (last verb in memory order).
+     */
+    const uint8_t* verbsBegin() const { return fVerbs.begin(); }
+
+    /**
+     * Returns a const pointer to the first verb in memory (which is the last logical verb).
+     */
+    const uint8_t* verbsEnd() const { return fVerbs.end(); }
+
+    /**
+     * Returns a const pointer to the first point.
+     */
+    const SkPoint* points() const { return fPoints.begin(); }
+
+    /**
+     * Shortcut for this->points() + this->countPoints()
+     */
+    const SkPoint* pointsEnd() const { return this->points() + this->countPoints(); }
+
+    const SkScalar* conicWeights() const { return fConicWeights.begin(); }
+    const SkScalar* conicWeightsEnd() const { return fConicWeights.end(); }
+
+    /**
+     * Convenience methods for getting to a verb or point by index.
+     */
+    uint8_t atVerb(int index) const { return fVerbs[index]; }
+    const SkPoint& atPoint(int index) const { return fPoints[index]; }
+
+    bool operator== (const SkPathRef& ref) const;
+
+    /**
+     * Writes the path points and verbs to a buffer.
+     */
+    void writeToBuffer(SkWBuffer* buffer) const;
+
+    /**
+     * Gets the number of bytes that would be written in writeBuffer()
+     */
+    uint32_t writeSize() const;
+
+    void interpolate(const SkPathRef& ending, SkScalar weight, SkPathRef* out) const;
+
+    /**
+     * Gets an ID that uniquely identifies the contents of the path ref. If two path refs have the
+     * same ID then they have the same verbs and points. However, two path refs may have the same
+     * contents but different genIDs.
+     */
+    uint32_t genID() const;
+
+    void addGenIDChangeListener(sk_sp<SkIDChangeListener>);   // Threadsafe.
+    int genIDChangeListenerCount();                           // Threadsafe
+
+    bool dataMatchesVerbs() const;
+    bool isValid() const;
+    SkDEBUGCODE(void validate() const { SkASSERT(this->isValid()); } )
+
+private:
+    enum SerializationOffsets {
+        kLegacyRRectOrOvalStartIdx_SerializationShift = 28, // requires 3 bits, ignored.
+        kLegacyRRectOrOvalIsCCW_SerializationShift = 27,    // requires 1 bit, ignored.
+        kLegacyIsRRect_SerializationShift = 26,             // requires 1 bit, ignored.
+        kIsFinite_SerializationShift = 25,                  // requires 1 bit
+        kLegacyIsOval_SerializationShift = 24,              // requires 1 bit, ignored.
+        kSegmentMask_SerializationShift = 0                 // requires 4 bits (deprecated)
+    };
+
+    SkPathRef() {
+        fBoundsIsDirty = true;    // this also invalidates fIsFinite
+        fGenerationID = kEmptyGenID;
+        fSegmentMask = 0;
+        fIsOval = false;
+        fIsRRect = false;
+        // The next two values don't matter unless fIsOval or fIsRRect are true.
+        fRRectOrOvalIsCCW = false;
+        fRRectOrOvalStartIdx = 0xAC;
+        SkDEBUGCODE(fEditorsAttached.store(0);)
+        SkDEBUGCODE(this->validate();)
+    }
+
+    void copy(const SkPathRef& ref, int additionalReserveVerbs, int additionalReservePoints);
+
+    // Return true if the computed bounds are finite.
+    static bool ComputePtBounds(SkRect* bounds, const SkPathRef& ref) {
+        return bounds->setBoundsCheck(ref.points(), ref.countPoints());
+    }
+
+    // called, if dirty, by getBounds()
+    void computeBounds() const {
+        SkDEBUGCODE(this->validate();)
+        // TODO(mtklein): remove fBoundsIsDirty and fIsFinite,
+        // using an inverted rect instead of fBoundsIsDirty and always recalculating fIsFinite.
+        SkASSERT(fBoundsIsDirty);
+
+        fIsFinite = ComputePtBounds(&fBounds, *this);
+        fBoundsIsDirty = false;
+    }
+
+    void setBounds(const SkRect& rect) {
+        SkASSERT(rect.fLeft <= rect.fRight && rect.fTop <= rect.fBottom);
+        fBounds = rect;
+        fBoundsIsDirty = false;
+        fIsFinite = fBounds.isFinite();
+    }
+
+    /** Makes additional room but does not change the counts or change the genID */
+    void incReserve(int additionalVerbs, int additionalPoints) {
+        SkDEBUGCODE(this->validate();)
+        fPoints.setReserve(fPoints.count() + additionalPoints);
+        fVerbs.setReserve(fVerbs.count() + additionalVerbs);
+        SkDEBUGCODE(this->validate();)
+    }
+
+    /** Resets the path ref with verbCount verbs and pointCount points, all uninitialized. Also
+     *  allocates space for reserveVerb additional verbs and reservePoints additional points.*/
+    void resetToSize(int verbCount, int pointCount, int conicCount,
+                     int reserveVerbs = 0, int reservePoints = 0) {
+        SkDEBUGCODE(this->validate();)
+        this->callGenIDChangeListeners();
+        fBoundsIsDirty = true;      // this also invalidates fIsFinite
+        fGenerationID = 0;
+
+        fSegmentMask = 0;
+        fIsOval = false;
+        fIsRRect = false;
+
+        fPoints.setReserve(pointCount + reservePoints);
+        fPoints.setCount(pointCount);
+        fVerbs.setReserve(verbCount + reserveVerbs);
+        fVerbs.setCount(verbCount);
+        fConicWeights.setCount(conicCount);
+        SkDEBUGCODE(this->validate();)
+    }
+
+    /**
+     * Increases the verb count by numVbs and point count by the required amount.
+     * The new points are uninitialized. All the new verbs are set to the specified
+     * verb. If 'verb' is kConic_Verb, 'weights' will return a pointer to the
+     * uninitialized conic weights.
+     */
+    SkPoint* growForRepeatedVerb(int /*SkPath::Verb*/ verb, int numVbs, SkScalar** weights);
+
+    /**
+     * Increases the verb count 1, records the new verb, and creates room for the requisite number
+     * of additional points. A pointer to the first point is returned. Any new points are
+     * uninitialized.
+     */
+    SkPoint* growForVerb(int /*SkPath::Verb*/ verb, SkScalar weight);
+
+    /**
+     * Concatenates all verbs from 'path' onto our own verbs array. Increases the point count by the
+     * number of points in 'path', and the conic weight count by the number of conics in 'path'.
+     *
+     * Returns pointers to the uninitialized points and conic weights data.
+     */
+    std::tuple<SkPoint*, SkScalar*> growForVerbsInPath(const SkPathRef& path);
+
+    /**
+     * Private, non-const-ptr version of the public function verbsMemBegin().
+     */
+    uint8_t* verbsBeginWritable() { return fVerbs.begin(); }
+
+    /**
+     * Called the first time someone calls CreateEmpty to actually create the singleton.
+     */
+    friend SkPathRef* sk_create_empty_pathref();
+
+    void setIsOval(bool isOval, bool isCCW, unsigned start) {
+        fIsOval = isOval;
+        fRRectOrOvalIsCCW = isCCW;
+        fRRectOrOvalStartIdx = SkToU8(start);
+    }
+
+    void setIsRRect(bool isRRect, bool isCCW, unsigned start) {
+        fIsRRect = isRRect;
+        fRRectOrOvalIsCCW = isCCW;
+        fRRectOrOvalStartIdx = SkToU8(start);
+    }
+
+    // called only by the editor. Note that this is not a const function.
+    SkPoint* getWritablePoints() {
+        SkDEBUGCODE(this->validate();)
+        fIsOval = false;
+        fIsRRect = false;
+        return fPoints.begin();
+    }
+
+    const SkPoint* getPoints() const {
+        SkDEBUGCODE(this->validate();)
+        return fPoints.begin();
+    }
+
+    void callGenIDChangeListeners();
+
+    enum {
+        kMinSize = 256,
+    };
+
+    mutable SkRect   fBounds;
+
+    SkTDArray<SkPoint>  fPoints;
+    SkTDArray<uint8_t>  fVerbs;
+    SkTDArray<SkScalar> fConicWeights;
+
+    enum {
+        kEmptyGenID = 1, // GenID reserved for path ref with zero points and zero verbs.
+    };
+    mutable uint32_t    fGenerationID;
+    SkDEBUGCODE(std::atomic<int> fEditorsAttached;) // assert only one editor in use at any time.
+
+    SkIDChangeListener::List fGenIDChangeListeners;
+
+    mutable uint8_t  fBoundsIsDirty;
+    mutable bool     fIsFinite;    // only meaningful if bounds are valid
+
+    bool     fIsOval;
+    bool     fIsRRect;
+    // Both the circle and rrect special cases have a notion of direction and starting point
+    // The next two variables store that information for either.
+    bool     fRRectOrOvalIsCCW;
+    uint8_t  fRRectOrOvalStartIdx;
+    uint8_t  fSegmentMask;
+
+    friend class PathRefTest_Private;
+    friend class ForceIsRRect_Private; // unit test isRRect
+    friend class SkPath;
+    friend class SkPathBuilder;
+    friend class SkPathPriv;
+};
+
+#endif
diff --git a/src/deps/skia/include/private/SkSLDefines.h b/src/deps/skia/include/private/SkSLDefines.h
new file mode 100644
index 000000000..50024b357
--- /dev/null
+++ b/src/deps/skia/include/private/SkSLDefines.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2019 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKSL_DEFINES
+#define SKSL_DEFINES
+
+#include <cstdint>
+
+#include "include/core/SkTypes.h"
+#include "include/private/SkTArray.h"
+
+using SKSL_INT = int64_t;
+using SKSL_FLOAT = float;
+
+namespace SkSL {
+
+class Expression;
+class Statement;
+
+using ComponentArray = SkSTArray<4, int8_t>; // for Swizzles
+using ExpressionArray = SkSTArray<2, std::unique_ptr<Expression>>;
+using StatementArray = SkSTArray<2, std::unique_ptr<Statement>>;
+
+// Functions larger than this (measured in IR nodes) will not be inlined. This growth factor
+// accounts for the number of calls being inlined--i.e., a function called five times (that is, with
+// five inlining opportunities) would be considered 5x larger than if it were called once. This
+// default threshold value is arbitrary, but tends to work well in practice.
+static constexpr int kDefaultInlineThreshold = 50;
+
+// A hard upper limit on the number of variable slots allowed in a function/global scope.
+// This is an arbitrary limit, but is needed to prevent code generation from taking unbounded
+// amounts of time or space.
+static constexpr int kVariableSlotLimit = 100000;
+
+// The SwizzleComponent namespace is used both by the SkSL::Swizzle expression, and the DSL swizzle.
+// This namespace is injected into SkSL::dsl so that `using namespace SkSL::dsl` enables DSL code
+// like `Swizzle(var, X, Y, ONE)` to compile without any extra qualifications.
+namespace SwizzleComponent {
+
+enum Type : int8_t {
+    X  =  0,  Y =  1,  Z =  2,  W =  3,
+    R  =  4,  G =  5,  B =  6,  A =  7,
+    S  =  8,  T =  9,  P = 10,  Q = 11,
+    UL = 12, UT = 13, UR = 14, UB = 15,
+    ZERO,
+    ONE
+};
+
+}  // namespace SwizzleComponent
+}  // namespace SkSL
+
+#endif
diff --git a/src/deps/skia/include/private/SkSLIRNode.h b/src/deps/skia/include/private/SkSLIRNode.h
new file mode 100644
index 000000000..2e545c349
--- /dev/null
+++ b/src/deps/skia/include/private/SkSLIRNode.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKSL_IRNODE
+#define SKSL_IRNODE
+
+#include "include/private/SkSLString.h"
+#include "include/private/SkTArray.h"
+#include "src/sksl/SkSLLexer.h"
+#include "src/sksl/SkSLModifiersPool.h"
+#include "src/sksl/SkSLPool.h"
+
+#include <algorithm>
+#include <atomic>
+#include <unordered_set>
+#include <vector>
+
+namespace SkSL {
+
+class Expression;
+class FunctionDeclaration;
+class FunctionDefinition;
+class Statement;
+class Symbol;
+class SymbolTable;
+class Type;
+class Variable;
+class VariableReference;
+enum class VariableRefKind : int8_t;
+enum class VariableStorage : int8_t;
+
+/**
+ * Represents a node in the intermediate representation (IR) tree. The IR is a fully-resolved
+ * version of the program (all types determined, everything validated), ready for code generation.
+ */
+class IRNode : public Poolable {
+public:
+    virtual ~IRNode() {}
+
+    virtual String description() const = 0;
+
+    // No copy construction or assignment
+    IRNode(const IRNode&) = delete;
+    IRNode& operator=(const IRNode&) = delete;
+
+    // line of this element within the program being compiled, for error reporting purposes
+    int fLine;
+
+protected:
+    IRNode(int line, int kind)
+        : fLine(line)
+        , fKind(kind) {}
+
+    int fKind;
+};
+
+}  // namespace SkSL
+
+#endif
diff --git a/src/deps/skia/include/private/SkSLLayout.h b/src/deps/skia/include/private/SkSLLayout.h
new file mode 100644
index 000000000..d3654dd43
--- /dev/null
+++ b/src/deps/skia/include/private/SkSLLayout.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKSL_LAYOUT
+#define SKSL_LAYOUT
+
+#include "include/private/SkSLString.h"
+
+namespace SkSL {
+
+/**
+ * Represents a layout block appearing before a variable declaration, as in:
+ *
+ * layout (location = 0) int x;
+ */
+struct Layout {
+    enum Flag {
+        kOriginUpperLeft_Flag            = 1 <<  0,
+        kPushConstant_Flag               = 1 <<  1,
+        kBlendSupportAllEquations_Flag   = 1 <<  2,
+        kColor_Flag                      = 1 <<  3,
+
+        // These flags indicate if the qualifier appeared, regardless of the accompanying value.
+        kLocation_Flag                   = 1 <<  4,
+        kOffset_Flag                     = 1 <<  5,
+        kBinding_Flag                    = 1 <<  6,
+        kIndex_Flag                      = 1 <<  7,
+        kSet_Flag                        = 1 <<  8,
+        kBuiltin_Flag                    = 1 <<  9,
+        kInputAttachmentIndex_Flag       = 1 << 10,
+    };
+
+    Layout(int flags, int location, int offset, int binding, int index, int set, int builtin,
+           int inputAttachmentIndex)
+    : fFlags(flags)
+    , fLocation(location)
+    , fOffset(offset)
+    , fBinding(binding)
+    , fIndex(index)
+    , fSet(set)
+    , fBuiltin(builtin)
+    , fInputAttachmentIndex(inputAttachmentIndex) {}
+
+    Layout()
+    : fFlags(0)
+    , fLocation(-1)
+    , fOffset(-1)
+    , fBinding(-1)
+    , fIndex(-1)
+    , fSet(-1)
+    , fBuiltin(-1)
+    , fInputAttachmentIndex(-1) {}
+
+    static Layout builtin(int builtin) {
+        Layout result;
+        result.fBuiltin = builtin;
+        return result;
+    }
+
+    String description() const {
+        String result;
+        auto separator = [firstSeparator = true]() mutable -> String {
+            if (firstSeparator) {
+                firstSeparator = false;
+                return "";
+            } else {
+                return ", ";
+            }};
+        if (fLocation >= 0) {
+            result += separator() + "location = " + to_string(fLocation);
+        }
+        if (fOffset >= 0) {
+            result += separator() + "offset = " + to_string(fOffset);
+        }
+        if (fBinding >= 0) {
+            result += separator() + "binding = " + to_string(fBinding);
+        }
+        if (fIndex >= 0) {
+            result += separator() + "index = " + to_string(fIndex);
+        }
+        if (fSet >= 0) {
+            result += separator() + "set = " + to_string(fSet);
+        }
+        if (fBuiltin >= 0) {
+            result += separator() + "builtin = " + to_string(fBuiltin);
+        }
+        if (fInputAttachmentIndex >= 0) {
+            result += separator() + "input_attachment_index = " + to_string(fInputAttachmentIndex);
+        }
+        if (fFlags & kOriginUpperLeft_Flag) {
+            result += separator() + "origin_upper_left";
+        }
+        if (fFlags & kBlendSupportAllEquations_Flag) {
+            result += separator() + "blend_support_all_equations";
+        }
+        if (fFlags & kPushConstant_Flag) {
+            result += separator() + "push_constant";
+        }
+        if (fFlags & kColor_Flag) {
+            result += separator() + "color";
+        }
+        if (result.size() > 0) {
+            result = "layout (" + result + ")";
+        }
+        return result;
+    }
+
+    bool operator==(const Layout& other) const {
+        return fFlags                == other.fFlags &&
+               fLocation             == other.fLocation &&
+               fOffset               == other.fOffset &&
+               fBinding              == other.fBinding &&
+               fIndex                == other.fIndex &&
+               fSet                  == other.fSet &&
+               fBuiltin              == other.fBuiltin &&
+               fInputAttachmentIndex == other.fInputAttachmentIndex;
+    }
+
+    bool operator!=(const Layout& other) const {
+        return !(*this == other);
+    }
+
+    int fFlags;
+    int fLocation;
+    int fOffset;
+    int fBinding;
+    int fIndex;
+    int fSet;
+    // builtin comes from SPIR-V and identifies which particular builtin value this object
+    // represents.
+    int fBuiltin;
+    // input_attachment_index comes from Vulkan/SPIR-V to connect a shader variable to the a
+    // corresponding attachment on the subpass in which the shader is being used.
+    int fInputAttachmentIndex;
+};
+
+}  // namespace SkSL
+
+#endif
diff --git a/src/deps/skia/include/private/SkSLModifiers.h b/src/deps/skia/include/private/SkSLModifiers.h
new file mode 100644
index 000000000..a881e57e0
--- /dev/null
+++ b/src/deps/skia/include/private/SkSLModifiers.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKSL_MODIFIERS
+#define SKSL_MODIFIERS
+
+#include "include/private/SkSLLayout.h"
+
+#include <vector>
+
+namespace SkSL {
+
+class Context;
+
+/**
+ * A set of modifier keywords (in, out, uniform, etc.) appearing before a declaration.
+ */
+struct Modifiers {
+    /**
+     * OpenGL requires modifiers to be in a strict order:
+     * - invariant-qualifier:     (invariant)
+     * - interpolation-qualifier: flat, noperspective, (smooth)
+     * - storage-qualifier:       const, uniform
+     * - parameter-qualifier:     in, out, inout
+     * - precision-qualifier:     highp, mediump, lowp
+     *
+     * SkSL does not have `invariant` or `smooth`.
+     */
+
+    enum Flag {
+        kNo_Flag             =       0,
+        // Real GLSL modifiers
+        kFlat_Flag           = 1 <<  0,
+        kNoPerspective_Flag  = 1 <<  1,
+        kConst_Flag          = 1 <<  2,
+        kUniform_Flag        = 1 <<  3,
+        kIn_Flag             = 1 <<  4,
+        kOut_Flag            = 1 <<  5,
+        kHighp_Flag          = 1 <<  6,
+        kMediump_Flag        = 1 <<  7,
+        kLowp_Flag           = 1 <<  8,
+        // SkSL extensions, not present in GLSL
+        kES3_Flag            = 1 <<  9,
+        kHasSideEffects_Flag = 1 <<  10,
+        kInline_Flag         = 1 <<  11,
+        kNoInline_Flag       = 1 <<  12,
+    };
+
+    Modifiers()
+    : fLayout(Layout())
+    , fFlags(0) {}
+
+    Modifiers(const Layout& layout, int flags)
+    : fLayout(layout)
+    , fFlags(flags) {}
+
+    String description() const {
+        String result = fLayout.description();
+
+        // SkSL extensions
+        if (fFlags & kES3_Flag) {
+            result += "$es3 ";
+        }
+        if (fFlags & kHasSideEffects_Flag) {
+            result += "sk_has_side_effects ";
+        }
+        if (fFlags & kNoInline_Flag) {
+            result += "noinline ";
+        }
+
+        // Real GLSL qualifiers (must be specified in order in GLSL 4.1 and below)
+        if (fFlags & kFlat_Flag) {
+            result += "flat ";
+        }
+        if (fFlags & kNoPerspective_Flag) {
+            result += "noperspective ";
+        }
+        if (fFlags & kConst_Flag) {
+            result += "const ";
+        }
+        if (fFlags & kUniform_Flag) {
+            result += "uniform ";
+        }
+        if ((fFlags & kIn_Flag) && (fFlags & kOut_Flag)) {
+            result += "inout ";
+        } else if (fFlags & kIn_Flag) {
+            result += "in ";
+        } else if (fFlags & kOut_Flag) {
+            result += "out ";
+        }
+        if (fFlags & kHighp_Flag) {
+            result += "highp ";
+        }
+        if (fFlags & kMediump_Flag) {
+            result += "mediump ";
+        }
+        if (fFlags & kLowp_Flag) {
+            result += "lowp ";
+        }
+
+        return result;
+    }
+
+    bool operator==(const Modifiers& other) const {
+        return fLayout == other.fLayout && fFlags == other.fFlags;
+    }
+
+    bool operator!=(const Modifiers& other) const {
+        return !(*this == other);
+    }
+
+    /**
+     * Verifies that only permitted modifiers and layout flags are included. Reports errors and
+     * returns false in the event of a violation.
+     */
+    bool checkPermitted(const Context& context, int line, int permittedModifierFlags,
+            int permittedLayoutFlags) const;
+
+    Layout fLayout;
+    int fFlags;
+};
+
+} // namespace SkSL
+
+namespace std {
+
+template <>
+struct hash<SkSL::Modifiers> {
+    size_t operator()(const SkSL::Modifiers& key) const {
+        return (size_t) key.fFlags ^ ((size_t) key.fLayout.fFlags << 8) ^
+               ((size_t) key.fLayout.fBuiltin << 16);
+    }
+};
+
+} // namespace std
+
+#endif
diff --git a/src/deps/skia/include/private/SkSLProgramElement.h b/src/deps/skia/include/private/SkSLProgramElement.h
new file mode 100644
index 000000000..88c4129ee
--- /dev/null
+++ b/src/deps/skia/include/private/SkSLProgramElement.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKSL_PROGRAMELEMENT
+#define SKSL_PROGRAMELEMENT
+
+#include "include/private/SkSLIRNode.h"
+
+#include <memory>
+
+namespace SkSL {
+
+/**
+ * Represents a top-level element (e.g. function or global variable) in a program.
+ */
+class ProgramElement : public IRNode {
+public:
+    enum class Kind {
+        kExtension = 0,
+        kFunction,
+        kFunctionPrototype,
+        kGlobalVar,
+        kInterfaceBlock,
+        kModifiers,
+        kStructDefinition,
+
+        kFirst = kExtension,
+        kLast = kStructDefinition
+    };
+
+    ProgramElement(int offset, Kind kind)
+        : INHERITED(offset, (int) kind) {
+        SkASSERT(kind >= Kind::kFirst && kind <= Kind::kLast);
+    }
+
+    Kind kind() const {
+        return (Kind) fKind;
+    }
+
+    /**
+     *  Use is<T> to check the type of a program element.
+     *  e.g. replace `el.kind() == ProgramElement::Kind::kExtension` with `el.is<Extension>()`.
+     */
+    template <typename T>
+    bool is() const {
+        return this->kind() == T::kProgramElementKind;
+    }
+
+    /**
+     *  Use as<T> to downcast program elements. e.g. replace `(Extension&) el` with
+     * `el.as<Extension>()`.
+     */
+    template <typename T>
+    const T& as() const {
+        SkASSERT(this->is<T>());
+        return static_cast<const T&>(*this);
+    }
+
+    template <typename T>
+    T& as() {
+        SkASSERT(this->is<T>());
+        return static_cast<T&>(*this);
+    }
+
+    virtual std::unique_ptr<ProgramElement> clone() const = 0;
+
+private:
+    using INHERITED = IRNode;
+};
+
+}  // namespace SkSL
+
+#endif
diff --git a/src/deps/skia/include/private/SkSLProgramKind.h b/src/deps/skia/include/private/SkSLProgramKind.h
new file mode 100644
index 000000000..96826a70b
--- /dev/null
+++ b/src/deps/skia/include/private/SkSLProgramKind.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2021 Google LLC.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkSLProgramKind_DEFINED
+#define SkSLProgramKind_DEFINED
+
+#include <cinttypes>
+
+namespace SkSL {
+
+/**
+ * SkSL supports several different program kinds.
+ */
+enum class ProgramKind : int8_t {
+    kFragment,
+    kVertex,
+    kRuntimeColorFilter,  // Runtime effect only suitable as SkColorFilter
+    kRuntimeShader,       //   "       "     "      "     "  SkShader
+    kRuntimeBlender,      //   "       "     "      "     "  SkBlender
+    kCustomMeshVertex,    // Vertex   portion of a custom mesh
+    kCustomMeshFragment,  // Fragment  "      "  "  "     "
+    kGeneric,
+};
+
+} // namespace SkSL
+
+#endif
diff --git a/src/deps/skia/include/private/SkSLSampleUsage.h b/src/deps/skia/include/private/SkSLSampleUsage.h
new file mode 100644
index 000000000..a8d67a025
--- /dev/null
+++ b/src/deps/skia/include/private/SkSLSampleUsage.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkSLSampleUsage_DEFINED
+#define SkSLSampleUsage_DEFINED
+
+#include "include/core/SkTypes.h"
+
+#include <string>
+
+namespace SkSL {
+
+/**
+ * Represents all of the ways that a fragment processor is sampled by its parent.
+ */
+class SampleUsage {
+public:
+    enum class Kind {
+        // Child is never sampled
+        kNone,
+        // Child is only sampled at the same coordinates as the parent
+        kPassThrough,
+        // Child is sampled with a matrix whose value is uniform
+        kUniformMatrix,
+        // Child is sampled with sk_FragCoord.xy
+        kFragCoord,
+        // Child is sampled using explicit coordinates
+        kExplicit,
+    };
+
+    // Make a SampleUsage that corresponds to no sampling of the child at all
+    SampleUsage() = default;
+
+    SampleUsage(Kind kind, bool hasPerspective) : fKind(kind), fHasPerspective(hasPerspective) {
+        if (kind != Kind::kUniformMatrix) {
+            SkASSERT(!fHasPerspective);
+        }
+    }
+
+    // Child is sampled with a matrix whose value is uniform. The name is fixed.
+    static SampleUsage UniformMatrix(bool hasPerspective) {
+        return SampleUsage(Kind::kUniformMatrix, hasPerspective);
+    }
+
+    static SampleUsage Explicit() {
+        return SampleUsage(Kind::kExplicit, false);
+    }
+
+    static SampleUsage PassThrough() {
+        return SampleUsage(Kind::kPassThrough, false);
+    }
+
+    static SampleUsage FragCoord() { return SampleUsage(Kind::kFragCoord, false); }
+
+    bool operator==(const SampleUsage& that) const {
+        return fKind == that.fKind && fHasPerspective == that.fHasPerspective;
+    }
+
+    bool operator!=(const SampleUsage& that) const { return !(*this == that); }
+
+    // Arbitrary name used by all uniform sampling matrices
+    static const char* MatrixUniformName() { return "matrix"; }
+
+    SampleUsage merge(const SampleUsage& other);
+
+    Kind kind() const { return fKind; }
+
+    bool hasPerspective() const { return fHasPerspective; }
+
+    bool isSampled()       const { return fKind != Kind::kNone; }
+    bool isPassThrough()   const { return fKind == Kind::kPassThrough; }
+    bool isExplicit()      const { return fKind == Kind::kExplicit; }
+    bool isUniformMatrix() const { return fKind == Kind::kUniformMatrix; }
+    bool isFragCoord()     const { return fKind == Kind::kFragCoord; }
+
+    std::string constructor() const;
+
+private:
+    Kind fKind = Kind::kNone;
+    bool fHasPerspective = false;  // Only valid if fKind is kUniformMatrix
+};
+
+}  // namespace SkSL
+
+#endif
diff --git a/src/deps/skia/include/private/SkSLStatement.h b/src/deps/skia/include/private/SkSLStatement.h
new file mode 100644
index 000000000..8913369e9
--- /dev/null
+++ b/src/deps/skia/include/private/SkSLStatement.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKSL_STATEMENT
+#define SKSL_STATEMENT
+
+#include "include/private/SkSLIRNode.h"
+#include "include/private/SkSLSymbol.h"
+
+namespace SkSL {
+
+/**
+ * Abstract supertype of all statements.
+ */
+class Statement : public IRNode {
+public:
+    enum Kind {
+        kBlock = (int) Symbol::Kind::kLast + 1,
+        kBreak,
+        kContinue,
+        kDiscard,
+        kDo,
+        kExpression,
+        kFor,
+        kIf,
+        kInlineMarker,
+        kNop,
+        kReturn,
+        kSwitch,
+        kSwitchCase,
+        kVarDeclaration,
+
+        kFirst = kBlock,
+        kLast = kVarDeclaration,
+    };
+
+    Statement(int line, Kind kind)
+    : INHERITED(line, (int) kind) {
+        SkASSERT(kind >= Kind::kFirst && kind <= Kind::kLast);
+    }
+
+    Kind kind() const {
+        return (Kind) fKind;
+    }
+
+    /**
+     *  Use is<T> to check the type of a statement.
+     *  e.g. replace `s.kind() == Statement::Kind::kReturn` with `s.is<ReturnStatement>()`.
+     */
+    template <typename T>
+    bool is() const {
+        return this->fKind == T::kStatementKind;
+    }
+
+    /**
+     *  Use as<T> to downcast statements.
+     *  e.g. replace `(ReturnStatement&) s` with `s.as<ReturnStatement>()`.
+     */
+    template <typename T>
+    const T& as() const {
+        SkASSERT(this->is<T>());
+        return static_cast<const T&>(*this);
+    }
+
+    template <typename T>
+    T& as() {
+        SkASSERT(this->is<T>());
+        return static_cast<T&>(*this);
+    }
+
+    virtual bool isEmpty() const {
+        return false;
+    }
+
+    virtual std::unique_ptr<Statement> clone() const = 0;
+
+private:
+    using INHERITED = IRNode;
+};
+
+}  // namespace SkSL
+
+#endif
diff --git a/src/deps/skia/include/private/SkSLString.h b/src/deps/skia/include/private/SkSLString.h
new file mode 100644
index 000000000..7d828760d
--- /dev/null
+++ b/src/deps/skia/include/private/SkSLString.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKSL_STRING
+#define SKSL_STRING
+
+#include "include/core/SkStringView.h"
+#include "include/private/SkSLDefines.h"
+#include <cstring>
+#include <stdarg.h>
+#include <string>
+
+#ifndef SKSL_STANDALONE
+#include "include/core/SkString.h"
+#endif
+
+namespace SkSL {
+
+class String;
+
+class SK_API String : public std::string {
+public:
+    using std::string::string;
+
+    explicit String(std::string s) : INHERITED(std::move(s)) {}
+    explicit String(skstd::string_view s) : INHERITED(s.data(), s.length()) {}
+    // TODO(johnstiles): add operator skstd::string_view
+
+    static String printf(const char* fmt, ...) SK_PRINTF_LIKE(1, 2);
+    void appendf(const char* fmt, ...) SK_PRINTF_LIKE(2, 3);
+    void vappendf(const char* fmt, va_list va);
+
+    bool starts_with(const char prefix[]) const {
+        return skstd::string_view(data(), size()).starts_with(prefix);
+    }
+    bool ends_with(const char suffix[]) const {
+        return skstd::string_view(data(), size()).ends_with(suffix);
+    }
+
+    bool consumeSuffix(const char suffix[]);
+
+    String operator+(const char* s) const;
+    String operator+(const String& s) const;
+    String operator+(skstd::string_view s) const;
+    String& operator+=(char c);
+    String& operator+=(const char* s);
+    String& operator+=(const String& s);
+    String& operator+=(skstd::string_view s);
+    friend String operator+(const char* s1, const String& s2);
+
+private:
+    using INHERITED = std::string;
+};
+
+String operator+(skstd::string_view left, skstd::string_view right);
+
+String to_string(double value);
+String to_string(int32_t value);
+String to_string(uint32_t value);
+String to_string(int64_t value);
+String to_string(uint64_t value);
+
+bool stod(skstd::string_view s, SKSL_FLOAT* value);
+bool stoi(skstd::string_view s, SKSL_INT* value);
+
+} // namespace SkSL
+
+namespace std {
+    template<> struct hash<SkSL::String> {
+        size_t operator()(const SkSL::String& s) const {
+            return hash<std::string>{}(s);
+        }
+    };
+} // namespace std
+
+#endif
diff --git a/src/deps/skia/include/private/SkSLSymbol.h b/src/deps/skia/include/private/SkSLSymbol.h
new file mode 100644
index 000000000..cca74b819
--- /dev/null
+++ b/src/deps/skia/include/private/SkSLSymbol.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKSL_SYMBOL
+#define SKSL_SYMBOL
+
+#include "include/private/SkSLIRNode.h"
+#include "include/private/SkSLProgramElement.h"
+
+namespace SkSL {
+
+/**
+ * Represents a symboltable entry.
+ */
+class Symbol : public IRNode {
+public:
+    enum class Kind {
+        kExternal = (int) ProgramElement::Kind::kLast + 1,
+        kField,
+        kFunctionDeclaration,
+        kType,
+        kUnresolvedFunction,
+        kVariable,
+
+        kFirst = kExternal,
+        kLast = kVariable
+    };
+
+    Symbol(int offset, Kind kind, skstd::string_view name, const Type* type = nullptr)
+        : INHERITED(offset, (int) kind)
+        , fName(name)
+        , fType(type) {
+        SkASSERT(kind >= Kind::kFirst && kind <= Kind::kLast);
+    }
+
+    ~Symbol() override {}
+
+    const Type& type() const {
+        SkASSERT(fType);
+        return *fType;
+    }
+
+    Kind kind() const {
+        return (Kind) fKind;
+    }
+
+    skstd::string_view name() const {
+        return fName;
+    }
+
+    /**
+     *  Use is<T> to check the type of a symbol.
+     *  e.g. replace `sym.kind() == Symbol::Kind::kVariable` with `sym.is<Variable>()`.
+     */
+    template <typename T>
+    bool is() const {
+        return this->kind() == T::kSymbolKind;
+    }
+
+    /**
+     *  Use as<T> to downcast symbols. e.g. replace `(Variable&) sym` with `sym.as<Variable>()`.
+     */
+    template <typename T>
+    const T& as() const {
+        SkASSERT(this->is<T>());
+        return static_cast<const T&>(*this);
+    }
+
+    template <typename T>
+    T& as() {
+        SkASSERT(this->is<T>());
+        return static_cast<T&>(*this);
+    }
+
+private:
+    skstd::string_view fName;
+    const Type* fType;
+
+    using INHERITED = IRNode;
+
+    friend class Type;
+};
+
+}  // namespace SkSL
+
+#endif
diff --git a/src/deps/skia/include/private/SkSafe32.h b/src/deps/skia/include/private/SkSafe32.h
new file mode 100644
index 000000000..7e59f2b00
--- /dev/null
+++ b/src/deps/skia/include/private/SkSafe32.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkSafe32_DEFINED
+#define SkSafe32_DEFINED
+
+#include "include/core/SkTypes.h"
+
+static constexpr int32_t Sk64_pin_to_s32(int64_t x) {
+    return x < SK_MinS32 ? SK_MinS32 : (x > SK_MaxS32 ? SK_MaxS32 : (int32_t)x);
+}
+
+static constexpr int32_t Sk32_sat_add(int32_t a, int32_t b) {
+    return Sk64_pin_to_s32((int64_t)a + (int64_t)b);
+}
+
+static constexpr int32_t Sk32_sat_sub(int32_t a, int32_t b) {
+    return Sk64_pin_to_s32((int64_t)a - (int64_t)b);
+}
+
+// To avoid UBSAN complaints about 2's compliment overflows
+//
+static constexpr int32_t Sk32_can_overflow_add(int32_t a, int32_t b) {
+    return (int32_t)((uint32_t)a + (uint32_t)b);
+}
+static constexpr int32_t Sk32_can_overflow_sub(int32_t a, int32_t b) {
+    return (int32_t)((uint32_t)a - (uint32_t)b);
+}
+
+#endif
diff --git a/src/deps/skia/include/private/SkSafe_math.h b/src/deps/skia/include/private/SkSafe_math.h
new file mode 100644
index 000000000..144b28a4a
--- /dev/null
+++ b/src/deps/skia/include/private/SkSafe_math.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkSafe_math_DEFINED
+#define SkSafe_math_DEFINED
+
+// This file protects against known bugs in ucrt\math.h.
+// Namely, that header defines inline methods without marking them static,
+// which makes it very easy to cause ODR violations and ensuing chaos.
+//
+// TODO: other headers?  Here are some potential problem headers:
+// $ grep -R __inline * | grep -v static | cut -f 1 -d: | sort | uniq
+//   corecrt.h
+//   corecrt_stdio_config.h
+//   ctype.h
+//   fenv.h
+//   locale.h
+//   malloc.h
+//   math.h
+//   tchar.h
+//   wchar.h
+// I took a quick look through other headers outside math.h.
+// Nothing looks anywhere near as likely to be used by Skia as math.h.
+
+#if defined(_MSC_VER) && !defined(_INC_MATH)
+    // Our strategy here is to simply inject "static" into the headers
+    // where it should have been written, just before __inline.
+    //
+    // Most inline-but-not-static methods in math.h are 32-bit only,
+    // but not all of them (see frexpf, hypothf, ldexpf...).  So to
+    // be safe, 32- and 64-bit builds both get this treatment.
+
+    #define __inline static __inline
+    #include <math.h>
+    #undef __inline
+
+    #if !defined(_INC_MATH)
+        #error Hmm.  Looks like math.h has changed its header guards.
+    #endif
+
+    #define INC_MATH_IS_SAFE_NOW
+
+#else
+    #include <math.h>
+
+#endif
+
+#endif//SkSafe_math_DEFINED
diff --git a/src/deps/skia/include/private/SkSemaphore.h b/src/deps/skia/include/private/SkSemaphore.h
new file mode 100644
index 000000000..d7318be57
--- /dev/null
+++ b/src/deps/skia/include/private/SkSemaphore.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkSemaphore_DEFINED
+#define SkSemaphore_DEFINED
+
+#include "include/core/SkTypes.h"
+#include "include/private/SkOnce.h"
+#include "include/private/SkThreadAnnotations.h"
+#include <algorithm>
+#include <atomic>
+
+class SkSemaphore {
+public:
+    constexpr SkSemaphore(int count = 0) : fCount(count), fOSSemaphore(nullptr) {}
+
+    // Cleanup the underlying OS semaphore.
+    SK_SPI ~SkSemaphore();
+
+    // Increment the counter n times.
+    // Generally it's better to call signal(n) instead of signal() n times.
+    void signal(int n = 1);
+
+    // Decrement the counter by 1,
+    // then if the counter is < 0, sleep this thread until the counter is >= 0.
+    void wait();
+
+    // If the counter is positive, decrement it by 1 and return true, otherwise return false.
+    SK_SPI bool try_wait();
+
+private:
+    // This implementation follows the general strategy of
+    //     'A Lightweight Semaphore with Partial Spinning'
+    // found here
+    //     http://preshing.com/20150316/semaphores-are-surprisingly-versatile/
+    // That article (and entire blog) are very much worth reading.
+    //
+    // We wrap an OS-provided semaphore with a user-space atomic counter that
+    // lets us avoid interacting with the OS semaphore unless strictly required:
+    // moving the count from >=0 to <0 or vice-versa, i.e. sleeping or waking threads.
+    struct OSSemaphore;
+
+    SK_SPI void osSignal(int n);
+    SK_SPI void osWait();
+
+    std::atomic<int> fCount;
+    SkOnce           fOSSemaphoreOnce;
+    OSSemaphore*     fOSSemaphore;
+};
+
+inline void SkSemaphore::signal(int n) {
+    int prev = fCount.fetch_add(n, std::memory_order_release);
+
+    // We only want to call the OS semaphore when our logical count crosses
+    // from <0 to >=0 (when we need to wake sleeping threads).
+    //
+    // This is easiest to think about with specific examples of prev and n.
+    // If n == 5 and prev == -3, there are 3 threads sleeping and we signal
+    // std::min(-(-3), 5) == 3 times on the OS semaphore, leaving the count at 2.
+    //
+    // If prev >= 0, no threads are waiting, std::min(-prev, n) is always <= 0,
+    // so we don't call the OS semaphore, leaving the count at (prev + n).
+    int toSignal = std::min(-prev, n);
+    if (toSignal > 0) {
+        this->osSignal(toSignal);
+    }
+}
+
+inline void SkSemaphore::wait() {
+    // Since this fetches the value before the subtract, zero and below means that there are no
+    // resources left, so the thread needs to wait.
+    if (fCount.fetch_sub(1, std::memory_order_acquire) <= 0) {
+        SK_POTENTIALLY_BLOCKING_REGION_BEGIN;
+        this->osWait();
+        SK_POTENTIALLY_BLOCKING_REGION_END;
+    }
+}
+
+#endif//SkSemaphore_DEFINED
diff --git a/src/deps/skia/include/private/SkShaderCodeDictionary.h b/src/deps/skia/include/private/SkShaderCodeDictionary.h
new file mode 100644
index 000000000..1eb86fb87
--- /dev/null
+++ b/src/deps/skia/include/private/SkShaderCodeDictionary.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkShaderCodeDictionary_DEFINED
+#define SkShaderCodeDictionary_DEFINED
+
+#include <unordered_map>
+#include "include/private/SkPaintParamsKey.h"
+#include "include/private/SkSpinlock.h"
+#include "include/private/SkUniquePaintParamsID.h"
+#include "src/core/SkArenaAlloc.h"
+
+class SkShaderCodeDictionary {
+public:
+    SkShaderCodeDictionary();
+
+    struct Entry {
+    public:
+        SkUniquePaintParamsID uniqueID() const {
+            SkASSERT(fUniqueID.isValid());
+            return fUniqueID;
+        }
+        const SkPaintParamsKey& paintParamsKey() const { return fPaintParamsKey; }
+
+    private:
+        friend class SkShaderCodeDictionary;
+
+        Entry(const SkPaintParamsKey& paintParamsKey) : fPaintParamsKey(paintParamsKey) {}
+
+        void setUniqueID(uint32_t newID) {
+            SkASSERT(!fUniqueID.isValid());
+            fUniqueID = SkUniquePaintParamsID(newID);
+        }
+
+        SkUniquePaintParamsID fUniqueID;  // fixed-size (uint32_t) unique ID assigned to a key
+        SkPaintParamsKey fPaintParamsKey; // variable-length paint key descriptor
+    };
+
+    const Entry* findOrCreate(const SkPaintParamsKey&) SK_EXCLUDES(fSpinLock);
+
+    const Entry* lookup(SkUniquePaintParamsID) const SK_EXCLUDES(fSpinLock);
+
+private:
+    Entry* makeEntry(const SkPaintParamsKey&);
+
+    struct Hash {
+        size_t operator()(const SkPaintParamsKey&) const;
+    };
+
+    // TODO: can we do something better given this should have write-seldom/read-often behavior?
+    mutable SkSpinlock fSpinLock;
+
+    std::unordered_map<SkPaintParamsKey, Entry*, Hash> fHash SK_GUARDED_BY(fSpinLock);
+    std::vector<Entry*> fEntryVector SK_GUARDED_BY(fSpinLock);
+
+    SkArenaAlloc fArena{256};
+};
+
+#endif // SkShaderCodeDictionary_DEFINED
diff --git a/src/deps/skia/include/private/SkShadowFlags.h b/src/deps/skia/include/private/SkShadowFlags.h
new file mode 100644
index 000000000..6438f041a
--- /dev/null
+++ b/src/deps/skia/include/private/SkShadowFlags.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkShadowFlags_DEFINED
+#define SkShadowFlags_DEFINED
+
+// A set of flags shared between the SkAmbientShadowMaskFilter and the SkSpotShadowMaskFilter
+enum SkShadowFlags {
+    kNone_ShadowFlag = 0x00,
+    /** The occluding object is not opaque. Knowing that the occluder is opaque allows
+    * us to cull shadow geometry behind it and improve performance. */
+    kTransparentOccluder_ShadowFlag = 0x01,
+    /** Don't try to use analytic shadows. */
+    kGeometricOnly_ShadowFlag = 0x02,
+    /** Light position represents a direction, light radius is blur radius at elevation 1 */
+    kDirectionalLight_ShadowFlag = 0x04,
+    /** mask for all shadow flags */
+    kAll_ShadowFlag = 0x07
+};
+
+#endif
diff --git a/src/deps/skia/include/private/SkSpinlock.h b/src/deps/skia/include/private/SkSpinlock.h
new file mode 100644
index 000000000..e1d501168
--- /dev/null
+++ b/src/deps/skia/include/private/SkSpinlock.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkSpinlock_DEFINED
+#define SkSpinlock_DEFINED
+
+#include "include/core/SkTypes.h"
+#include "include/private/SkThreadAnnotations.h"
+#include <atomic>
+
+class SK_CAPABILITY("mutex") SkSpinlock {
+public:
+    constexpr SkSpinlock() = default;
+
+    void acquire() SK_ACQUIRE() {
+        // To act as a mutex, we need an acquire barrier when we acquire the lock.
+        if (fLocked.exchange(true, std::memory_order_acquire)) {
+            // Lock was contended.  Fall back to an out-of-line spin loop.
+            this->contendedAcquire();
+        }
+    }
+
+    // Acquire the lock or fail (quickly). Lets the caller decide to do something other than wait.
+    bool tryAcquire() SK_TRY_ACQUIRE(true) {
+        // To act as a mutex, we need an acquire barrier when we acquire the lock.
+        if (fLocked.exchange(true, std::memory_order_acquire)) {
+            // Lock was contended. Let the caller decide what to do.
+            return false;
+        }
+        return true;
+    }
+
+    void release() SK_RELEASE_CAPABILITY() {
+        // To act as a mutex, we need a release barrier when we release the lock.
+        fLocked.store(false, std::memory_order_release);
+    }
+
+private:
+    SK_API void contendedAcquire();
+
+    std::atomic<bool> fLocked{false};
+};
+
+class SK_SCOPED_CAPABILITY SkAutoSpinlock {
+public:
+    SkAutoSpinlock(SkSpinlock& mutex) SK_ACQUIRE(mutex) : fSpinlock(mutex) { fSpinlock.acquire(); }
+    ~SkAutoSpinlock() SK_RELEASE_CAPABILITY() { fSpinlock.release(); }
+
+private:
+    SkSpinlock& fSpinlock;
+};
+
+#endif//SkSpinlock_DEFINED
diff --git a/src/deps/skia/include/private/SkTArray.h b/src/deps/skia/include/private/SkTArray.h
new file mode 100644
index 000000000..9db5fd030
--- /dev/null
+++ b/src/deps/skia/include/private/SkTArray.h
@@ -0,0 +1,640 @@
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkTArray_DEFINED
+#define SkTArray_DEFINED
+
+#include "include/core/SkMath.h"
+#include "include/core/SkTypes.h"
+#include "include/private/SkMalloc.h"
+#include "include/private/SkSafe32.h"
+#include "include/private/SkTLogic.h"
+#include "include/private/SkTemplates.h"
+#include "include/private/SkTo.h"
+
+#include <string.h>
+#include <initializer_list>
+#include <memory>
+#include <new>
+#include <utility>
+
+/** SkTArray<T> implements a typical, mostly std::vector-like array.
+    Each T will be default-initialized on allocation, and ~T will be called on destruction.
+
+    MEM_MOVE controls the behavior when a T needs to be moved (e.g. when the array is resized)
+      - true: T will be bit-copied via memcpy.
+      - false: T will be moved via move-constructors.
+
+    Modern implementations of std::vector<T> will generally provide similar performance
+    characteristics when used with appropriate care. Consider using std::vector<T> in new code.
+*/
+template <typename T, bool MEM_MOVE = false> class SkTArray {
+private:
+    enum ReallocType { kExactFit, kGrowing, kShrinking };
+
+public:
+    using value_type = T;
+
+    /**
+     * Creates an empty array with no initial storage
+     */
+    SkTArray() { this->init(0); }
+
+    /**
+     * Creates an empty array that will preallocate space for reserveCount
+     * elements.
+     */
+    explicit SkTArray(int reserveCount) : SkTArray() { this->reserve_back(reserveCount); }
+
+    /**
+     * Copies one array to another. The new array will be heap allocated.
+     */
+    SkTArray(const SkTArray& that)
+        : SkTArray(that.fItemArray, that.fCount) {}
+
+    SkTArray(SkTArray&& that) {
+        if (that.fOwnMemory) {
+            fItemArray = that.fItemArray;
+            fCount = that.fCount;
+            fAllocCount = that.fAllocCount;
+            fOwnMemory = true;
+            fReserved = that.fReserved;
+
+            that.fItemArray = nullptr;
+            that.fCount = 0;
+            that.fAllocCount = 0;
+            that.fOwnMemory = true;
+            that.fReserved = false;
+        } else {
+            this->init(that.fCount);
+            that.move(fItemArray);
+            that.fCount = 0;
+        }
+    }
+
+    /**
+     * Creates a SkTArray by copying contents of a standard C array. The new
+     * array will be heap allocated. Be careful not to use this constructor
+     * when you really want the (void*, int) version.
+     */
+    SkTArray(const T* array, int count) {
+        this->init(count);
+        this->copy(array);
+    }
+    /**
+     * Creates a SkTArray by copying contents of an initializer list.
+     */
+    SkTArray(std::initializer_list<T> data)
+        : SkTArray(data.begin(), data.size()) {}
+
+    SkTArray& operator=(const SkTArray& that) {
+        if (this == &that) {
+            return *this;
+        }
+        for (int i = 0; i < this->count(); ++i) {
+            fItemArray[i].~T();
+        }
+        fCount = 0;
+        this->checkRealloc(that.count(), kExactFit);
+        fCount = that.fCount;
+        this->copy(that.fItemArray);
+        return *this;
+    }
+    SkTArray& operator=(SkTArray&& that) {
+        if (this == &that) {
+            return *this;
+        }
+        for (int i = 0; i < this->count(); ++i) {
+            fItemArray[i].~T();
+        }
+        fCount = 0;
+        this->checkRealloc(that.count(), kExactFit);
+        fCount = that.fCount;
+        that.move(fItemArray);
+        that.fCount = 0;
+        return *this;
+    }
+
+    ~SkTArray() {
+        for (int i = 0; i < this->count(); ++i) {
+            fItemArray[i].~T();
+        }
+        if (fOwnMemory) {
+            sk_free(fItemArray);
+        }
+    }
+
+    /**
+     * Resets to count() == 0 and resets any reserve count.
+     */
+    void reset() {
+        this->pop_back_n(fCount);
+        fReserved = false;
+    }
+
+    /**
+     * Resets to count() = n newly constructed T objects and resets any reserve count.
+     */
+    void reset(int n) {
+        SkASSERT(n >= 0);
+        for (int i = 0; i < this->count(); ++i) {
+            fItemArray[i].~T();
+        }
+        // Set fCount to 0 before calling checkRealloc so that no elements are moved.
+        fCount = 0;
+        this->checkRealloc(n, kExactFit);
+        fCount = n;
+        for (int i = 0; i < this->count(); ++i) {
+            new (fItemArray + i) T;
+        }
+        fReserved = false;
+    }
+
+    /**
+     * Resets to a copy of a C array and resets any reserve count.
+     */
+    void reset(const T* array, int count) {
+        for (int i = 0; i < this->count(); ++i) {
+            fItemArray[i].~T();
+        }
+        fCount = 0;
+        this->checkRealloc(count, kExactFit);
+        fCount = count;
+        this->copy(array);
+        fReserved = false;
+    }
+
+    /**
+     * Ensures there is enough reserved space for n additional elements. The is guaranteed at least
+     * until the array size grows above n and subsequently shrinks below n, any version of reset()
+     * is called, or reserve_back() is called again.
+     */
+    void reserve_back(int n) {
+        SkASSERT(n >= 0);
+        if (n > 0) {
+            this->checkRealloc(n, kExactFit);
+            fReserved = fOwnMemory;
+        } else {
+            fReserved = false;
+        }
+    }
+
+    void removeShuffle(int n) {
+        SkASSERT(n < this->count());
+        int newCount = fCount - 1;
+        fCount = newCount;
+        fItemArray[n].~T();
+        if (n != newCount) {
+            this->move(n, newCount);
+        }
+    }
+
+    /**
+     * Number of elements in the array.
+     */
+    int count() const { return fCount; }
+
+    /**
+     * Is the array empty.
+     */
+    bool empty() const { return !fCount; }
+
+    /**
+     * Adds 1 new default-initialized T value and returns it by reference. Note
+     * the reference only remains valid until the next call that adds or removes
+     * elements.
+     */
+    T& push_back() {
+        void* newT = this->push_back_raw(1);
+        return *new (newT) T;
+    }
+
+    /**
+     * Version of above that uses a copy constructor to initialize the new item
+     */
+    T& push_back(const T& t) {
+        void* newT = this->push_back_raw(1);
+        return *new (newT) T(t);
+    }
+
+    /**
+     * Version of above that uses a move constructor to initialize the new item
+     */
+    T& push_back(T&& t) {
+        void* newT = this->push_back_raw(1);
+        return *new (newT) T(std::move(t));
+    }
+
+    /**
+     *  Construct a new T at the back of this array.
+     */
+    template<class... Args> T& emplace_back(Args&&... args) {
+        void* newT = this->push_back_raw(1);
+        return *new (newT) T(std::forward<Args>(args)...);
+    }
+
+    /**
+     * Allocates n more default-initialized T values, and returns the address of
+     * the start of that new range. Note: this address is only valid until the
+     * next API call made on the array that might add or remove elements.
+     */
+    T* push_back_n(int n) {
+        SkASSERT(n >= 0);
+        void* newTs = this->push_back_raw(n);
+        for (int i = 0; i < n; ++i) {
+            new (static_cast<char*>(newTs) + i * sizeof(T)) T;
+        }
+        return static_cast<T*>(newTs);
+    }
+
+    /**
+     * Version of above that uses a copy constructor to initialize all n items
+     * to the same T.
+     */
+    T* push_back_n(int n, const T& t) {
+        SkASSERT(n >= 0);
+        void* newTs = this->push_back_raw(n);
+        for (int i = 0; i < n; ++i) {
+            new (static_cast<char*>(newTs) + i * sizeof(T)) T(t);
+        }
+        return static_cast<T*>(newTs);
+    }
+
+    /**
+     * Version of above that uses a copy constructor to initialize the n items
+     * to separate T values.
+     */
+    T* push_back_n(int n, const T t[]) {
+        SkASSERT(n >= 0);
+        this->checkRealloc(n, kGrowing);
+        for (int i = 0; i < n; ++i) {
+            new (fItemArray + fCount + i) T(t[i]);
+        }
+        fCount += n;
+        return fItemArray + fCount - n;
+    }
+
+    /**
+     * Version of above that uses the move constructor to set n items.
+     */
+    T* move_back_n(int n, T* t) {
+        SkASSERT(n >= 0);
+        this->checkRealloc(n, kGrowing);
+        for (int i = 0; i < n; ++i) {
+            new (fItemArray + fCount + i) T(std::move(t[i]));
+        }
+        fCount += n;
+        return fItemArray + fCount - n;
+    }
+
+    /**
+     * Removes the last element. Not safe to call when count() == 0.
+     */
+    void pop_back() {
+        SkASSERT(fCount > 0);
+        --fCount;
+        fItemArray[fCount].~T();
+        this->checkRealloc(0, kShrinking);
+    }
+
+    /**
+     * Removes the last n elements. Not safe to call when count() < n.
+     */
+    void pop_back_n(int n) {
+        SkASSERT(n >= 0);
+        SkASSERT(this->count() >= n);
+        fCount -= n;
+        for (int i = 0; i < n; ++i) {
+            fItemArray[fCount + i].~T();
+        }
+        this->checkRealloc(0, kShrinking);
+    }
+
+    /**
+     * Pushes or pops from the back to resize. Pushes will be default
+     * initialized.
+     */
+    void resize_back(int newCount) {
+        SkASSERT(newCount >= 0);
+
+        if (newCount > this->count()) {
+            this->push_back_n(newCount - fCount);
+        } else if (newCount < this->count()) {
+            this->pop_back_n(fCount - newCount);
+        }
+    }
+
+    /** Swaps the contents of this array with that array. Does a pointer swap if possible,
+        otherwise copies the T values. */
+    void swap(SkTArray& that) {
+        using std::swap;
+        if (this == &that) {
+            return;
+        }
+        if (fOwnMemory && that.fOwnMemory) {
+            swap(fItemArray, that.fItemArray);
+
+            auto count = fCount;
+            fCount = that.fCount;
+            that.fCount = count;
+
+            auto allocCount = fAllocCount;
+            fAllocCount = that.fAllocCount;
+            that.fAllocCount = allocCount;
+        } else {
+            // This could be more optimal...
+            SkTArray copy(std::move(that));
+            that = std::move(*this);
+            *this = std::move(copy);
+        }
+    }
+
+    T* begin() {
+        return fItemArray;
+    }
+    const T* begin() const {
+        return fItemArray;
+    }
+    T* end() {
+        return fItemArray ? fItemArray + fCount : nullptr;
+    }
+    const T* end() const {
+        return fItemArray ? fItemArray + fCount : nullptr;
+    }
+    T* data() { return fItemArray; }
+    const T* data() const { return fItemArray; }
+    size_t size() const { return (size_t)fCount; }
+    void resize(size_t count) { this->resize_back((int)count); }
+
+    /**
+     * Get the i^th element.
+     */
+    T& operator[] (int i) {
+        SkASSERT(i < this->count());
+        SkASSERT(i >= 0);
+        return fItemArray[i];
+    }
+
+    const T& operator[] (int i) const {
+        SkASSERT(i < this->count());
+        SkASSERT(i >= 0);
+        return fItemArray[i];
+    }
+
+    T& at(int i) { return (*this)[i]; }
+    const T& at(int i) const { return (*this)[i]; }
+
+    /**
+     * equivalent to operator[](0)
+     */
+    T& front() { SkASSERT(fCount > 0); return fItemArray[0];}
+
+    const T& front() const { SkASSERT(fCount > 0); return fItemArray[0];}
+
+    /**
+     * equivalent to operator[](count() - 1)
+     */
+    T& back() { SkASSERT(fCount); return fItemArray[fCount - 1];}
+
+    const T& back() const { SkASSERT(fCount > 0); return fItemArray[fCount - 1];}
+
+    /**
+     * equivalent to operator[](count()-1-i)
+     */
+    T& fromBack(int i) {
+        SkASSERT(i >= 0);
+        SkASSERT(i < this->count());
+        return fItemArray[fCount - i - 1];
+    }
+
+    const T& fromBack(int i) const {
+        SkASSERT(i >= 0);
+        SkASSERT(i < this->count());
+        return fItemArray[fCount - i - 1];
+    }
+
+    bool operator==(const SkTArray<T, MEM_MOVE>& right) const {
+        int leftCount = this->count();
+        if (leftCount != right.count()) {
+            return false;
+        }
+        for (int index = 0; index < leftCount; ++index) {
+            if (fItemArray[index] != right.fItemArray[index]) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    bool operator!=(const SkTArray<T, MEM_MOVE>& right) const {
+        return !(*this == right);
+    }
+
+    int capacity() const {
+        return fAllocCount;
+    }
+
+protected:
+    /**
+     * Creates an empty array that will use the passed storage block until it
+     * is insufficiently large to hold the entire array.
+     */
+    template <int N>
+    SkTArray(SkAlignedSTStorage<N,T>* storage) {
+        this->initWithPreallocatedStorage(0, storage->get(), N);
+    }
+
+    /**
+     * Copy a C array, using preallocated storage if preAllocCount >=
+     * count. Otherwise storage will only be used when array shrinks
+     * to fit.
+     */
+    template <int N>
+    SkTArray(const T* array, int count, SkAlignedSTStorage<N,T>* storage) {
+        this->initWithPreallocatedStorage(count, storage->get(), N);
+        this->copy(array);
+    }
+
+private:
+    void init(int count) {
+        fCount = SkToU32(count);
+        if (!count) {
+            fAllocCount = 0;
+            fItemArray = nullptr;
+        } else {
+            fAllocCount = SkToU32(std::max(count, kMinHeapAllocCount));
+            fItemArray = (T*)sk_malloc_throw((size_t)fAllocCount, sizeof(T));
+        }
+        fOwnMemory = true;
+        fReserved = false;
+    }
+
+    void initWithPreallocatedStorage(int count, void* preallocStorage, int preallocCount) {
+        SkASSERT(count >= 0);
+        SkASSERT(preallocCount > 0);
+        SkASSERT(preallocStorage);
+        fCount = count;
+        fItemArray = nullptr;
+        fReserved = false;
+        if (count > preallocCount) {
+            fAllocCount = std::max(count, kMinHeapAllocCount);
+            fItemArray = (T*)sk_malloc_throw(fAllocCount, sizeof(T));
+            fOwnMemory = true;
+        } else {
+            fAllocCount = preallocCount;
+            fItemArray = (T*)preallocStorage;
+            fOwnMemory = false;
+        }
+    }
+
+    /** In the following move and copy methods, 'dst' is assumed to be uninitialized raw storage.
+     *  In the following move methods, 'src' is destroyed leaving behind uninitialized raw storage.
+     */
+    void copy(const T* src) {
+        // Some types may be trivially copyable, in which case we *could* use memcopy; but
+        // MEM_MOVE == true implies that the type is trivially movable, and not necessarily
+        // trivially copyable (think sk_sp<>).  So short of adding another template arg, we
+        // must be conservative and use copy construction.
+        for (int i = 0; i < this->count(); ++i) {
+            new (fItemArray + i) T(src[i]);
+        }
+    }
+
+    template <bool E = MEM_MOVE> std::enable_if_t<E, void> move(int dst, int src) {
+        memcpy(&fItemArray[dst], &fItemArray[src], sizeof(T));
+    }
+    template <bool E = MEM_MOVE> std::enable_if_t<E, void> move(void* dst) {
+        sk_careful_memcpy(dst, fItemArray, fCount * sizeof(T));
+    }
+
+    template <bool E = MEM_MOVE> std::enable_if_t<!E, void> move(int dst, int src) {
+        new (&fItemArray[dst]) T(std::move(fItemArray[src]));
+        fItemArray[src].~T();
+    }
+    template <bool E = MEM_MOVE> std::enable_if_t<!E, void> move(void* dst) {
+        for (int i = 0; i < this->count(); ++i) {
+            new (static_cast<char*>(dst) + sizeof(T) * (size_t)i) T(std::move(fItemArray[i]));
+            fItemArray[i].~T();
+        }
+    }
+
+    static constexpr int kMinHeapAllocCount = 8;
+
+    // Helper function that makes space for n objects, adjusts the count, but does not initialize
+    // the new objects.
+    void* push_back_raw(int n) {
+        this->checkRealloc(n, kGrowing);
+        void* ptr = fItemArray + fCount;
+        fCount += n;
+        return ptr;
+    }
+
+    void checkRealloc(int delta, ReallocType reallocType) {
+        SkASSERT(fCount >= 0);
+        SkASSERT(fAllocCount >= 0);
+        SkASSERT(-delta <= this->count());
+
+        // Move into 64bit math temporarily, to avoid local overflows
+        int64_t newCount = fCount + delta;
+
+        // We allow fAllocCount to be in the range [newCount, 3*newCount]. We also never shrink
+        // when we're currently using preallocated memory, would allocate less than
+        // kMinHeapAllocCount, or a reserve count was specified that has yet to be exceeded.
+        bool mustGrow = newCount > fAllocCount;
+        bool shouldShrink = fAllocCount > 3 * newCount && fOwnMemory && !fReserved;
+        if (!mustGrow && !shouldShrink) {
+            return;
+        }
+
+        int64_t newAllocCount = newCount;
+        if (reallocType != kExactFit) {
+            // Whether we're growing or shrinking, leave at least 50% extra space for future growth.
+            newAllocCount += ((newCount + 1) >> 1);
+            // Align the new allocation count to kMinHeapAllocCount.
+            static_assert(SkIsPow2(kMinHeapAllocCount), "min alloc count not power of two.");
+            newAllocCount = (newAllocCount + (kMinHeapAllocCount - 1)) & ~(kMinHeapAllocCount - 1);
+        }
+
+        // At small sizes the old and new alloc count can both be kMinHeapAllocCount.
+        if (newAllocCount == fAllocCount) {
+            return;
+        }
+
+        fAllocCount = SkToU32(Sk64_pin_to_s32(newAllocCount));
+        SkASSERT(fAllocCount >= newCount);
+        T* newItemArray = (T*)sk_malloc_throw((size_t)fAllocCount, sizeof(T));
+        this->move(newItemArray);
+        if (fOwnMemory) {
+            sk_free(fItemArray);
+        }
+        fItemArray = newItemArray;
+        fOwnMemory = true;
+        fReserved = false;
+    }
+
+    T* fItemArray;
+    uint32_t fOwnMemory  :  1;
+    uint32_t fCount      : 31;
+    uint32_t fReserved   :  1;
+    uint32_t fAllocCount : 31;
+};
+
+template <typename T, bool M> static inline void swap(SkTArray<T, M>& a, SkTArray<T, M>& b) {
+    a.swap(b);
+}
+
+template<typename T, bool MEM_MOVE> constexpr int SkTArray<T, MEM_MOVE>::kMinHeapAllocCount;
+
+/**
+ * Subclass of SkTArray that contains a preallocated memory block for the array.
+ */
+template <int N, typename T, bool MEM_MOVE = false>
+class SkSTArray : private SkAlignedSTStorage<N,T>, public SkTArray<T, MEM_MOVE> {
+private:
+    using STORAGE   = SkAlignedSTStorage<N,T>;
+    using INHERITED = SkTArray<T, MEM_MOVE>;
+
+public:
+    SkSTArray()
+        : STORAGE{}, INHERITED(static_cast<STORAGE*>(this)) {}
+
+    SkSTArray(const T* array, int count)
+        : STORAGE{}, INHERITED(array, count, static_cast<STORAGE*>(this)) {}
+
+    SkSTArray(std::initializer_list<T> data)
+        : SkSTArray(data.begin(), data.size()) {}
+
+    explicit SkSTArray(int reserveCount)
+        : SkSTArray() {
+        this->reserve_back(reserveCount);
+    }
+
+    SkSTArray         (const SkSTArray&  that) : SkSTArray() { *this = that; }
+    explicit SkSTArray(const INHERITED&  that) : SkSTArray() { *this = that; }
+    SkSTArray         (      SkSTArray&& that) : SkSTArray() { *this = std::move(that); }
+    explicit SkSTArray(      INHERITED&& that) : SkSTArray() { *this = std::move(that); }
+
+    SkSTArray& operator=(const SkSTArray& that) {
+        INHERITED::operator=(that);
+        return *this;
+    }
+    SkSTArray& operator=(const INHERITED& that) {
+        INHERITED::operator=(that);
+        return *this;
+    }
+
+    SkSTArray& operator=(SkSTArray&& that) {
+        INHERITED::operator=(std::move(that));
+        return *this;
+    }
+    SkSTArray& operator=(INHERITED&& that) {
+        INHERITED::operator=(std::move(that));
+        return *this;
+    }
+};
+
+#endif
diff --git a/src/deps/skia/include/private/SkTDArray.h b/src/deps/skia/include/private/SkTDArray.h
new file mode 100644
index 000000000..d06b46f72
--- /dev/null
+++ b/src/deps/skia/include/private/SkTDArray.h
@@ -0,0 +1,385 @@
+/*
+ * Copyright 2006 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+
+#ifndef SkTDArray_DEFINED
+#define SkTDArray_DEFINED
+
+#include "include/core/SkTypes.h"
+#include "include/private/SkMalloc.h"
+#include "include/private/SkTo.h"
+
+#include <algorithm>
+#include <initializer_list>
+#include <utility>
+
+/** SkTDArray<T> implements a std::vector-like array for raw data-only objects that do not require
+    construction or destruction. The constructor and destructor for T will not be called; T objects
+    will always be moved via raw memcpy. Newly created T objects will contain uninitialized memory.
+
+    In most cases, std::vector<T> can provide a similar level of performance for POD objects when
+    used with appropriate care. In new code, consider std::vector<T> instead.
+*/
+template <typename T> class SkTDArray {
+public:
+    SkTDArray() : fArray(nullptr), fReserve(0), fCount(0) {}
+    SkTDArray(const T src[], int count) {
+        SkASSERT(src || count == 0);
+
+        fReserve = fCount = 0;
+        fArray = nullptr;
+        if (count) {
+            fArray = (T*)sk_malloc_throw(SkToSizeT(count) * sizeof(T));
+            memcpy(fArray, src, sizeof(T) * SkToSizeT(count));
+            fReserve = fCount = count;
+        }
+    }
+    SkTDArray(const std::initializer_list<T>& list) : SkTDArray(list.begin(), list.size()) {}
+    SkTDArray(const SkTDArray<T>& src) : fArray(nullptr), fReserve(0), fCount(0) {
+        SkTDArray<T> tmp(src.fArray, src.fCount);
+        this->swap(tmp);
+    }
+    SkTDArray(SkTDArray<T>&& src) : fArray(nullptr), fReserve(0), fCount(0) {
+        this->swap(src);
+    }
+    ~SkTDArray() {
+        sk_free(fArray);
+    }
+
+    SkTDArray<T>& operator=(const SkTDArray<T>& src) {
+        if (this != &src) {
+            if (src.fCount > fReserve) {
+                SkTDArray<T> tmp(src.fArray, src.fCount);
+                this->swap(tmp);
+            } else {
+                sk_careful_memcpy(fArray, src.fArray, sizeof(T) * SkToSizeT(src.fCount));
+                fCount = src.fCount;
+            }
+        }
+        return *this;
+    }
+    SkTDArray<T>& operator=(SkTDArray<T>&& src) {
+        if (this != &src) {
+            this->swap(src);
+            src.reset();
+        }
+        return *this;
+    }
+
+    friend bool operator==(const SkTDArray<T>& a, const SkTDArray<T>& b) {
+        return  a.fCount == b.fCount &&
+                (a.fCount == 0 ||
+                 !memcmp(a.fArray, b.fArray, SkToSizeT(a.fCount) * sizeof(T)));
+    }
+    friend bool operator!=(const SkTDArray<T>& a, const SkTDArray<T>& b) {
+        return !(a == b);
+    }
+
+    void swap(SkTDArray<T>& that) {
+        using std::swap;
+        swap(fArray, that.fArray);
+        swap(fReserve, that.fReserve);
+        swap(fCount, that.fCount);
+    }
+
+    bool isEmpty() const { return fCount == 0; }
+    bool empty() const { return this->isEmpty(); }
+
+    /**
+     *  Return the number of elements in the array
+     */
+    int count() const { return fCount; }
+    size_t size() const { return fCount; }
+
+    /**
+     *  Return the total number of elements allocated.
+     *  reserved() - count() gives you the number of elements you can add
+     *  without causing an allocation.
+     */
+    int reserved() const { return fReserve; }
+
+    /**
+     *  return the number of bytes in the array: count * sizeof(T)
+     */
+    size_t bytes() const { return fCount * sizeof(T); }
+
+    T*        begin() { return fArray; }
+    const T*  begin() const { return fArray; }
+    T*        end() { return fArray ? fArray + fCount : nullptr; }
+    const T*  end() const { return fArray ? fArray + fCount : nullptr; }
+
+    T&  operator[](int index) {
+        SkASSERT(index < fCount);
+        return fArray[index];
+    }
+    const T&  operator[](int index) const {
+        SkASSERT(index < fCount);
+        return fArray[index];
+    }
+
+    T&  getAt(int index)  {
+        return (*this)[index];
+    }
+
+    const T& back() const { SkASSERT(fCount > 0); return fArray[fCount-1]; }
+          T& back()       { SkASSERT(fCount > 0); return fArray[fCount-1]; }
+
+    void reset() {
+        if (fArray) {
+            sk_free(fArray);
+            fArray = nullptr;
+            fReserve = fCount = 0;
+        } else {
+            SkASSERT(fReserve == 0 && fCount == 0);
+        }
+    }
+
+    void rewind() {
+        // same as setCount(0)
+        fCount = 0;
+    }
+
+    /**
+     *  Sets the number of elements in the array.
+     *  If the array does not have space for count elements, it will increase
+     *  the storage allocated to some amount greater than that required.
+     *  It will never shrink the storage.
+     */
+    void setCount(int count) {
+        SkASSERT(count >= 0);
+        if (count > fReserve) {
+            this->resizeStorageToAtLeast(count);
+        }
+        fCount = count;
+    }
+
+    void setReserve(int reserve) {
+        SkASSERT(reserve >= 0);
+        if (reserve > fReserve) {
+            this->resizeStorageToAtLeast(reserve);
+        }
+    }
+    void reserve(size_t n) {
+        SkASSERT_RELEASE(SkTFitsIn<int>(n));
+        this->setReserve(SkToInt(n));
+    }
+
+    T* prepend() {
+        this->adjustCount(1);
+        memmove(fArray + 1, fArray, (fCount - 1) * sizeof(T));
+        return fArray;
+    }
+
+    T* append() {
+        return this->append(1, nullptr);
+    }
+    T* append(int count, const T* src = nullptr) {
+        int oldCount = fCount;
+        if (count)  {
+            SkASSERT(src == nullptr || fArray == nullptr ||
+                    src + count <= fArray || fArray + oldCount <= src);
+
+            this->adjustCount(count);
+            if (src) {
+                memcpy(fArray + oldCount, src, sizeof(T) * count);
+            }
+        }
+        return fArray + oldCount;
+    }
+
+    T* insert(int index) {
+        return this->insert(index, 1, nullptr);
+    }
+    T* insert(int index, int count, const T* src = nullptr) {
+        SkASSERT(count);
+        SkASSERT(index <= fCount);
+        size_t oldCount = fCount;
+        this->adjustCount(count);
+        T* dst = fArray + index;
+        memmove(dst + count, dst, sizeof(T) * (oldCount - index));
+        if (src) {
+            memcpy(dst, src, sizeof(T) * count);
+        }
+        return dst;
+    }
+
+    void remove(int index, int count = 1) {
+        SkASSERT(index + count <= fCount);
+        fCount = fCount - count;
+        memmove(fArray + index, fArray + index + count, sizeof(T) * (fCount - index));
+    }
+
+    void removeShuffle(int index) {
+        SkASSERT(index < fCount);
+        int newCount = fCount - 1;
+        fCount = newCount;
+        if (index != newCount) {
+            memcpy(fArray + index, fArray + newCount, sizeof(T));
+        }
+    }
+
+    int find(const T& elem) const {
+        const T* iter = fArray;
+        const T* stop = fArray + fCount;
+
+        for (; iter < stop; iter++) {
+            if (*iter == elem) {
+                return SkToInt(iter - fArray);
+            }
+        }
+        return -1;
+    }
+
+    int rfind(const T& elem) const {
+        const T* iter = fArray + fCount;
+        const T* stop = fArray;
+
+        while (iter > stop) {
+            if (*--iter == elem) {
+                return SkToInt(iter - stop);
+            }
+        }
+        return -1;
+    }
+
+    /**
+     * Returns true iff the array contains this element.
+     */
+    bool contains(const T& elem) const {
+        return (this->find(elem) >= 0);
+    }
+
+    /**
+     * Copies up to max elements into dst. The number of items copied is
+     * capped by count - index. The actual number copied is returned.
+     */
+    int copyRange(T* dst, int index, int max) const {
+        SkASSERT(max >= 0);
+        SkASSERT(!max || dst);
+        if (index >= fCount) {
+            return 0;
+        }
+        int count = std::min(max, fCount - index);
+        memcpy(dst, fArray + index, sizeof(T) * count);
+        return count;
+    }
+
+    void copy(T* dst) const {
+        this->copyRange(dst, 0, fCount);
+    }
+
+    // routines to treat the array like a stack
+    void push_back(const T& v) { *this->append() = v; }
+    T*      push() { return this->append(); }
+    const T& top() const { return (*this)[fCount - 1]; }
+    T&       top() { return (*this)[fCount - 1]; }
+    void     pop(T* elem) { SkASSERT(fCount > 0); if (elem) *elem = (*this)[fCount - 1]; --fCount; }
+    void     pop() { SkASSERT(fCount > 0); --fCount; }
+
+    void deleteAll() {
+        T*  iter = fArray;
+        T*  stop = fArray + fCount;
+        while (iter < stop) {
+            delete *iter;
+            iter += 1;
+        }
+        this->reset();
+    }
+
+    void freeAll() {
+        T*  iter = fArray;
+        T*  stop = fArray + fCount;
+        while (iter < stop) {
+            sk_free(*iter);
+            iter += 1;
+        }
+        this->reset();
+    }
+
+    void unrefAll() {
+        T*  iter = fArray;
+        T*  stop = fArray + fCount;
+        while (iter < stop) {
+            (*iter)->unref();
+            iter += 1;
+        }
+        this->reset();
+    }
+
+    void safeUnrefAll() {
+        T*  iter = fArray;
+        T*  stop = fArray + fCount;
+        while (iter < stop) {
+            SkSafeUnref(*iter);
+            iter += 1;
+        }
+        this->reset();
+    }
+
+#ifdef SK_DEBUG
+    void validate() const {
+        SkASSERT((fReserve == 0 && fArray == nullptr) ||
+                 (fReserve > 0 && fArray != nullptr));
+        SkASSERT(fCount <= fReserve);
+    }
+#endif
+
+    void shrinkToFit() {
+        if (fReserve != fCount) {
+            SkASSERT(fReserve > fCount);
+            fReserve = fCount;
+            fArray = (T*)sk_realloc_throw(fArray, fReserve * sizeof(T));
+        }
+    }
+
+private:
+    T*      fArray;
+    int     fReserve;   // size of the allocation in fArray (#elements)
+    int     fCount;     // logical number of elements (fCount <= fReserve)
+
+    /**
+     *  Adjusts the number of elements in the array.
+     *  This is the same as calling setCount(count() + delta).
+     */
+    void adjustCount(int delta) {
+        SkASSERT(delta > 0);
+
+        // We take care to avoid overflow here.
+        // The sum of fCount and delta is at most 4294967294, which fits fine in uint32_t.
+        uint32_t count = (uint32_t)fCount + (uint32_t)delta;
+        SkASSERT_RELEASE( SkTFitsIn<int>(count) );
+
+        this->setCount(SkTo<int>(count));
+    }
+
+    /**
+     *  Increase the storage allocation such that it can hold (fCount + extra)
+     *  elements.
+     *  It never shrinks the allocation, and it may increase the allocation by
+     *  more than is strictly required, based on a private growth heuristic.
+     *
+     *  note: does NOT modify fCount
+     */
+    void resizeStorageToAtLeast(int count) {
+        SkASSERT(count > fReserve);
+
+        // We take care to avoid overflow here.
+        // The maximum value we can get for reserve here is 2684354563, which fits in uint32_t.
+        uint32_t reserve = (uint32_t)count + 4;
+        reserve += reserve / 4;
+        SkASSERT_RELEASE( SkTFitsIn<int>(reserve) );
+
+        fReserve = SkTo<int>(reserve);
+        fArray = (T*)sk_realloc_throw(fArray, (size_t)fReserve * sizeof(T));
+    }
+};
+
+template <typename T> static inline void swap(SkTDArray<T>& a, SkTDArray<T>& b) {
+    a.swap(b);
+}
+
+#endif
diff --git a/src/deps/skia/include/private/SkTFitsIn.h b/src/deps/skia/include/private/SkTFitsIn.h
new file mode 100644
index 000000000..a912f13e0
--- /dev/null
+++ b/src/deps/skia/include/private/SkTFitsIn.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkTFitsIn_DEFINED
+#define SkTFitsIn_DEFINED
+
+#include <limits>
+#include <stdint.h>
+#include <type_traits>
+
+/**
+ * std::underlying_type is only defined for enums. For integral types, we just want the type.
+ */
+template <typename T, class Enable = void>
+struct sk_strip_enum {
+    typedef T type;
+};
+
+template <typename T>
+struct sk_strip_enum<T, typename std::enable_if<std::is_enum<T>::value>::type> {
+    typedef typename std::underlying_type<T>::type type;
+};
+
+
+/**
+ * In C++ an unsigned to signed cast where the source value cannot be represented in the destination
+ * type results in an implementation defined destination value. Unlike C, C++ does not allow a trap.
+ * This makes "(S)(D)s == s" a possibly useful test. However, there are two cases where this is
+ * incorrect:
+ *
+ * when testing if a value of a smaller signed type can be represented in a larger unsigned type
+ * (int8_t)(uint16_t)-1 == -1 => (int8_t)0xFFFF == -1 => [implementation defined] == -1
+ *
+ * when testing if a value of a larger unsigned type can be represented in a smaller signed type
+ * (uint16_t)(int8_t)0xFFFF == 0xFFFF => (uint16_t)-1 == 0xFFFF => 0xFFFF == 0xFFFF => true.
+ *
+ * Consider the cases:
+ *   u = unsigned, less digits
+ *   U = unsigned, more digits
+ *   s = signed, less digits
+ *   S = signed, more digits
+ *   v is the value we're considering.
+ *
+ * u -> U: (u)(U)v == v, trivially true
+ * U -> u: (U)(u)v == v, both casts well defined, test works
+ * s -> S: (s)(S)v == v, trivially true
+ * S -> s: (S)(s)v == v, first cast implementation value, second cast defined, test works
+ * s -> U: (s)(U)v == v, *this is bad*, the second cast results in implementation defined value
+ * S -> u: (S)(u)v == v, the second cast is required to prevent promotion of rhs to unsigned
+ * u -> S: (u)(S)v == v, trivially true
+ * U -> s: (U)(s)v == v, *this is bad*,
+ *                             first cast results in implementation defined value,
+ *                             second cast is defined. However, this creates false positives
+ *                             uint16_t x = 0xFFFF
+ *                                (uint16_t)(int8_t)x == x
+ *                             => (uint16_t)-1        == x
+ *                             => 0xFFFF              == x
+ *                             => true
+ *
+ * So for the eight cases three are trivially true, three more are valid casts, and two are special.
+ * The two 'full' checks which otherwise require two comparisons are valid cast checks.
+ * The two remaining checks s -> U [v >= 0] and U -> s [v <= max(s)] can be done with one op.
+ */
+
+template <typename D, typename S>
+static constexpr inline
+typename std::enable_if<(std::is_integral<S>::value || std::is_enum<S>::value) &&
+                        (std::is_integral<D>::value || std::is_enum<D>::value), bool>::type
+/*bool*/ SkTFitsIn(S src) {
+    // SkTFitsIn() is used in public headers, so needs to be written targeting at most C++11.
+    return
+
+    // E.g. (int8_t)(uint8_t) int8_t(-1) == -1, but the uint8_t == 255, not -1.
+    (std::is_signed<S>::value && std::is_unsigned<D>::value && sizeof(S) <= sizeof(D)) ?
+        (S)0 <= src :
+
+    // E.g. (uint8_t)(int8_t) uint8_t(255) == 255, but the int8_t == -1.
+    (std::is_signed<D>::value && std::is_unsigned<S>::value && sizeof(D) <= sizeof(S)) ?
+        src <= (S)std::numeric_limits<typename sk_strip_enum<D>::type>::max() :
+
+#if !defined(SK_DEBUG) && !defined(__MSVC_RUNTIME_CHECKS )
+    // Correct (simple) version. This trips up MSVC's /RTCc run-time checking.
+    (S)(D)src == src;
+#else
+    // More complex version that's safe with /RTCc. Used in all debug builds, for coverage.
+    (std::is_signed<S>::value) ?
+        (intmax_t)src >= (intmax_t)std::numeric_limits<typename sk_strip_enum<D>::type>::min() &&
+        (intmax_t)src <= (intmax_t)std::numeric_limits<typename sk_strip_enum<D>::type>::max() :
+
+    // std::is_unsigned<S> ?
+        (uintmax_t)src <= (uintmax_t)std::numeric_limits<typename sk_strip_enum<D>::type>::max();
+#endif
+}
+
+#endif
diff --git a/src/deps/skia/include/private/SkTHash.h b/src/deps/skia/include/private/SkTHash.h
new file mode 100644
index 000000000..9ed039748
--- /dev/null
+++ b/src/deps/skia/include/private/SkTHash.h
@@ -0,0 +1,548 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkTHash_DEFINED
+#define SkTHash_DEFINED
+
+#include "include/core/SkTypes.h"
+#include "include/private/SkChecksum.h"
+#include "include/private/SkTemplates.h"
+#include <new>
+#include <utility>
+
+// Before trying to use SkTHashTable, look below to see if SkTHashMap or SkTHashSet works for you.
+// They're easier to use, usually perform the same, and have fewer sharp edges.
+
+// T and K are treated as ordinary copyable C++ types.
+// Traits must have:
+//   - static K GetKey(T)
+//   - static uint32_t Hash(K)
+// If the key is large and stored inside T, you may want to make K a const&.
+// Similarly, if T is large you might want it to be a pointer.
+template <typename T, typename K, typename Traits = T>
+class SkTHashTable {
+public:
+    SkTHashTable()  = default;
+    ~SkTHashTable() = default;
+
+    SkTHashTable(const SkTHashTable&  that) { *this = that; }
+    SkTHashTable(      SkTHashTable&& that) { *this = std::move(that); }
+
+    SkTHashTable& operator=(const SkTHashTable& that) {
+        if (this != &that) {
+            fCount     = that.fCount;
+            fCapacity  = that.fCapacity;
+            fSlots.reset(that.fCapacity);
+            for (int i = 0; i < fCapacity; i++) {
+                fSlots[i] = that.fSlots[i];
+            }
+        }
+        return *this;
+    }
+
+    SkTHashTable& operator=(SkTHashTable&& that) {
+        if (this != &that) {
+            fCount    = that.fCount;
+            fCapacity = that.fCapacity;
+            fSlots    = std::move(that.fSlots);
+
+            that.fCount = that.fCapacity = 0;
+        }
+        return *this;
+    }
+
+    // Clear the table.
+    void reset() { *this = SkTHashTable(); }
+
+    // How many entries are in the table?
+    int count() const { return fCount; }
+
+    // How many slots does the table contain? (Note that unlike an array, hash tables can grow
+    // before reaching 100% capacity.)
+    int capacity() const { return fCapacity; }
+
+    // Approximately how many bytes of memory do we use beyond sizeof(*this)?
+    size_t approxBytesUsed() const { return fCapacity * sizeof(Slot); }
+
+    // !!!!!!!!!!!!!!!!!                 CAUTION                   !!!!!!!!!!!!!!!!!
+    // set(), find() and foreach() all allow mutable access to table entries.
+    // If you change an entry so that it no longer has the same key, all hell
+    // will break loose.  Do not do that!
+    //
+    // Please prefer to use SkTHashMap or SkTHashSet, which do not have this danger.
+
+    // The pointers returned by set() and find() are valid only until the next call to set().
+    // The pointers you receive in foreach() are only valid for its duration.
+
+    // Copy val into the hash table, returning a pointer to the copy now in the table.
+    // If there already is an entry in the table with the same key, we overwrite it.
+    T* set(T val) {
+        if (4 * fCount >= 3 * fCapacity) {
+            this->resize(fCapacity > 0 ? fCapacity * 2 : 4);
+        }
+        return this->uncheckedSet(std::move(val));
+    }
+
+    // If there is an entry in the table with this key, return a pointer to it.  If not, null.
+    T* find(const K& key) const {
+        uint32_t hash = Hash(key);
+        int index = hash & (fCapacity-1);
+        for (int n = 0; n < fCapacity; n++) {
+            Slot& s = fSlots[index];
+            if (s.empty()) {
+                return nullptr;
+            }
+            if (hash == s.hash && key == Traits::GetKey(*s)) {
+                return &*s;
+            }
+            index = this->next(index);
+        }
+        SkASSERT(fCapacity == 0);
+        return nullptr;
+    }
+
+    // If there is an entry in the table with this key, return it.  If not, null.
+    // This only works for pointer type T, and cannot be used to find an nullptr entry.
+    T findOrNull(const K& key) const {
+        if (T* p = this->find(key)) {
+            return *p;
+        }
+        return nullptr;
+    }
+
+    // Remove the value with this key from the hash table.
+    void remove(const K& key) {
+        SkASSERT(this->find(key));
+
+        uint32_t hash = Hash(key);
+        int index = hash & (fCapacity-1);
+        for (int n = 0; n < fCapacity; n++) {
+            Slot& s = fSlots[index];
+            SkASSERT(s.has_value());
+            if (hash == s.hash && key == Traits::GetKey(*s)) {
+               this->removeSlot(index);
+               if (4 * fCount <= fCapacity && fCapacity > 4) {
+                   this->resize(fCapacity / 2);
+               }
+               return;
+            }
+            index = this->next(index);
+        }
+    }
+
+    // Call fn on every entry in the table.  You may mutate the entries, but be very careful.
+    template <typename Fn>  // f(T*)
+    void foreach(Fn&& fn) {
+        for (int i = 0; i < fCapacity; i++) {
+            if (fSlots[i].has_value()) {
+                fn(&*fSlots[i]);
+            }
+        }
+    }
+
+    // Call fn on every entry in the table.  You may not mutate anything.
+    template <typename Fn>  // f(T) or f(const T&)
+    void foreach(Fn&& fn) const {
+        for (int i = 0; i < fCapacity; i++) {
+            if (fSlots[i].has_value()) {
+                fn(*fSlots[i]);
+            }
+        }
+    }
+
+    // A basic iterator-like class which disallows mutation; sufficient for range-based for loops.
+    // Intended for use by SkTHashMap and SkTHashSet via begin() and end().
+    // Adding or removing elements may invalidate all iterators.
+    template <typename SlotVal>
+    class Iter {
+    public:
+        using TTable = SkTHashTable<T, K, Traits>;
+
+        Iter(const TTable* table, int slot) : fTable(table), fSlot(slot) {}
+
+        static Iter MakeBegin(const TTable* table) {
+            return Iter{table, table->firstPopulatedSlot()};
+        }
+
+        static Iter MakeEnd(const TTable* table) {
+            return Iter{table, table->capacity()};
+        }
+
+        const SlotVal& operator*() const {
+            return *fTable->slot(fSlot);
+        }
+
+        const SlotVal* operator->() const {
+            return fTable->slot(fSlot);
+        }
+
+        bool operator==(const Iter& that) const {
+            // Iterators from different tables shouldn't be compared against each other.
+            SkASSERT(fTable == that.fTable);
+            return fSlot == that.fSlot;
+        }
+
+        bool operator!=(const Iter& that) const {
+            return !(*this == that);
+        }
+
+        Iter& operator++() {
+            fSlot = fTable->nextPopulatedSlot(fSlot);
+            return *this;
+        }
+
+        Iter operator++(int) {
+            Iter old = *this;
+            this->operator++();
+            return old;
+        }
+
+    protected:
+        const TTable* fTable;
+        int fSlot;
+    };
+
+private:
+    // Finds the first non-empty slot for an iterator.
+    int firstPopulatedSlot() const {
+        for (int i = 0; i < fCapacity; i++) {
+            if (fSlots[i].has_value()) {
+                return i;
+            }
+        }
+        return fCapacity;
+    }
+
+    // Increments an iterator's slot.
+    int nextPopulatedSlot(int currentSlot) const {
+        for (int i = currentSlot + 1; i < fCapacity; i++) {
+            if (fSlots[i].has_value()) {
+                return i;
+            }
+        }
+        return fCapacity;
+    }
+
+    // Reads from an iterator's slot.
+    const T* slot(int i) const {
+        SkASSERT(fSlots[i].has_value());
+        return &*fSlots[i];
+    }
+
+    T* uncheckedSet(T&& val) {
+        const K& key = Traits::GetKey(val);
+        SkASSERT(key == key);
+        uint32_t hash = Hash(key);
+        int index = hash & (fCapacity-1);
+        for (int n = 0; n < fCapacity; n++) {
+            Slot& s = fSlots[index];
+            if (s.empty()) {
+                // New entry.
+                s.emplace(std::move(val), hash);
+                fCount++;
+                return &*s;
+            }
+            if (hash == s.hash && key == Traits::GetKey(*s)) {
+                // Overwrite previous entry.
+                // Note: this triggers extra copies when adding the same value repeatedly.
+                s.emplace(std::move(val), hash);
+                return &*s;
+            }
+
+            index = this->next(index);
+        }
+        SkASSERT(false);
+        return nullptr;
+    }
+
+    void resize(int capacity) {
+        int oldCapacity = fCapacity;
+        SkDEBUGCODE(int oldCount = fCount);
+
+        fCount = 0;
+        fCapacity = capacity;
+        SkAutoTArray<Slot> oldSlots = std::move(fSlots);
+        fSlots = SkAutoTArray<Slot>(capacity);
+
+        for (int i = 0; i < oldCapacity; i++) {
+            Slot& s = oldSlots[i];
+            if (s.has_value()) {
+                this->uncheckedSet(*std::move(s));
+            }
+        }
+        SkASSERT(fCount == oldCount);
+    }
+
+    void removeSlot(int index) {
+        fCount--;
+
+        // Rearrange elements to restore the invariants for linear probing.
+        for (;;) {
+            Slot& emptySlot = fSlots[index];
+            int emptyIndex = index;
+            int originalIndex;
+            // Look for an element that can be moved into the empty slot.
+            // If the empty slot is in between where an element landed, and its native slot, then
+            // move it to the empty slot. Don't move it if its native slot is in between where
+            // the element landed and the empty slot.
+            // [native] <= [empty] < [candidate] == GOOD, can move candidate to empty slot
+            // [empty] < [native] < [candidate] == BAD, need to leave candidate where it is
+            do {
+                index = this->next(index);
+                Slot& s = fSlots[index];
+                if (s.empty()) {
+                    // We're done shuffling elements around.  Clear the last empty slot.
+                    emptySlot.reset();
+                    return;
+                }
+                originalIndex = s.hash & (fCapacity - 1);
+            } while ((index <= originalIndex && originalIndex < emptyIndex)
+                     || (originalIndex < emptyIndex && emptyIndex < index)
+                     || (emptyIndex < index && index <= originalIndex));
+            // Move the element to the empty slot.
+            Slot& moveFrom = fSlots[index];
+            emptySlot = std::move(moveFrom);
+        }
+    }
+
+    int next(int index) const {
+        index--;
+        if (index < 0) { index += fCapacity; }
+        return index;
+    }
+
+    static uint32_t Hash(const K& key) {
+        uint32_t hash = Traits::Hash(key) & 0xffffffff;
+        return hash ? hash : 1;  // We reserve hash 0 to mark empty.
+    }
+
+    struct Slot {
+        Slot() = default;
+        ~Slot() { this->reset(); }
+
+        Slot(const Slot& that) { *this = that; }
+        Slot& operator=(const Slot& that) {
+            if (this == &that) {
+                return *this;
+            }
+            if (hash) {
+                if (that.hash) {
+                    val.storage = that.val.storage;
+                    hash = that.hash;
+                } else {
+                    this->reset();
+                }
+            } else {
+                if (that.hash) {
+                    new (&val.storage) T(that.val.storage);
+                    hash = that.hash;
+                } else {
+                    // do nothing, no value on either side
+                }
+            }
+            return *this;
+        }
+
+        Slot(Slot&& that) { *this = std::move(that); }
+        Slot& operator=(Slot&& that) {
+            if (this == &that) {
+                return *this;
+            }
+            if (hash) {
+                if (that.hash) {
+                    val.storage = std::move(that.val.storage);
+                    hash = that.hash;
+                } else {
+                    this->reset();
+                }
+            } else {
+                if (that.hash) {
+                    new (&val.storage) T(std::move(that.val.storage));
+                    hash = that.hash;
+                } else {
+                    // do nothing, no value on either side
+                }
+            }
+            return *this;
+        }
+
+        T& operator*() & { return val.storage; }
+        const T& operator*() const& { return val.storage; }
+        T&& operator*() && { return std::move(val.storage); }
+        const T&& operator*() const&& { return std::move(val.storage); }
+
+        Slot& emplace(T&& v, uint32_t h) {
+            this->reset();
+            new (&val.storage) T(std::move(v));
+            hash = h;
+            return *this;
+        }
+
+        bool has_value() const { return hash != 0; }
+        explicit operator bool() const { return this->has_value(); }
+        bool empty() const { return !this->has_value(); }
+
+        void reset() {
+            if (hash) {
+                val.storage.~T();
+                hash = 0;
+            }
+        }
+
+        uint32_t hash = 0;
+
+    private:
+        union Storage {
+            T storage;
+            Storage() {}
+            ~Storage() {}
+        } val;
+    };
+
+    int fCount    = 0,
+        fCapacity = 0;
+    SkAutoTArray<Slot> fSlots;
+};
+
+// Maps K->V.  A more user-friendly wrapper around SkTHashTable, suitable for most use cases.
+// K and V are treated as ordinary copyable C++ types, with no assumed relationship between the two.
+template <typename K, typename V, typename HashK = SkGoodHash>
+class SkTHashMap {
+public:
+    // Clear the map.
+    void reset() { fTable.reset(); }
+
+    // How many key/value pairs are in the table?
+    int count() const { return fTable.count(); }
+
+    // Approximately how many bytes of memory do we use beyond sizeof(*this)?
+    size_t approxBytesUsed() const { return fTable.approxBytesUsed(); }
+
+    // N.B. The pointers returned by set() and find() are valid only until the next call to set().
+
+    // Set key to val in the table, replacing any previous value with the same key.
+    // We copy both key and val, and return a pointer to the value copy now in the table.
+    V* set(K key, V val) {
+        Pair* out = fTable.set({std::move(key), std::move(val)});
+        return &out->second;
+    }
+
+    // If there is key/value entry in the table with this key, return a pointer to the value.
+    // If not, return null.
+    V* find(const K& key) const {
+        if (Pair* p = fTable.find(key)) {
+            return &p->second;
+        }
+        return nullptr;
+    }
+
+    V& operator[](const K& key) {
+        if (V* val = this->find(key)) {
+            return *val;
+        }
+        return *this->set(key, V{});
+    }
+
+    // Remove the key/value entry in the table with this key.
+    void remove(const K& key) {
+        SkASSERT(this->find(key));
+        fTable.remove(key);
+    }
+
+    // Call fn on every key/value pair in the table.  You may mutate the value but not the key.
+    template <typename Fn>  // f(K, V*) or f(const K&, V*)
+    void foreach(Fn&& fn) {
+        fTable.foreach([&fn](Pair* p){ fn(p->first, &p->second); });
+    }
+
+    // Call fn on every key/value pair in the table.  You may not mutate anything.
+    template <typename Fn>  // f(K, V), f(const K&, V), f(K, const V&) or f(const K&, const V&).
+    void foreach(Fn&& fn) const {
+        fTable.foreach([&fn](const Pair& p){ fn(p.first, p.second); });
+    }
+
+    // Dereferencing an iterator gives back a key-value pair, suitable for structured binding.
+    struct Pair : public std::pair<K, V> {
+        using std::pair<K, V>::pair;
+        static const K& GetKey(const Pair& p) { return p.first; }
+        static auto Hash(const K& key) { return HashK()(key); }
+    };
+
+    using Iter = typename SkTHashTable<Pair, K>::template Iter<std::pair<K, V>>;
+
+    Iter begin() const {
+        return Iter::MakeBegin(&fTable);
+    }
+
+    Iter end() const {
+        return Iter::MakeEnd(&fTable);
+    }
+
+private:
+    SkTHashTable<Pair, K> fTable;
+};
+
+// A set of T.  T is treated as an ordinary copyable C++ type.
+template <typename T, typename HashT = SkGoodHash>
+class SkTHashSet {
+public:
+    // Clear the set.
+    void reset() { fTable.reset(); }
+
+    // How many items are in the set?
+    int count() const { return fTable.count(); }
+
+    // Is empty?
+    bool empty() const { return fTable.count() == 0; }
+
+    // Approximately how many bytes of memory do we use beyond sizeof(*this)?
+    size_t approxBytesUsed() const { return fTable.approxBytesUsed(); }
+
+    // Copy an item into the set.
+    void add(T item) { fTable.set(std::move(item)); }
+
+    // Is this item in the set?
+    bool contains(const T& item) const { return SkToBool(this->find(item)); }
+
+    // If an item equal to this is in the set, return a pointer to it, otherwise null.
+    // This pointer remains valid until the next call to add().
+    const T* find(const T& item) const { return fTable.find(item); }
+
+    // Remove the item in the set equal to this.
+    void remove(const T& item) {
+        SkASSERT(this->contains(item));
+        fTable.remove(item);
+    }
+
+    // Call fn on every item in the set.  You may not mutate anything.
+    template <typename Fn>  // f(T), f(const T&)
+    void foreach (Fn&& fn) const {
+        fTable.foreach(fn);
+    }
+
+private:
+    struct Traits {
+        static const T& GetKey(const T& item) { return item; }
+        static auto Hash(const T& item) { return HashT()(item); }
+    };
+
+public:
+    using Iter = typename SkTHashTable<T, T, Traits>::template Iter<T>;
+
+    Iter begin() const {
+        return Iter::MakeBegin(&fTable);
+    }
+
+    Iter end() const {
+        return Iter::MakeEnd(&fTable);
+    }
+
+private:
+    SkTHashTable<T, T, Traits> fTable;
+};
+
+#endif//SkTHash_DEFINED
diff --git a/src/deps/skia/include/private/SkTLogic.h b/src/deps/skia/include/private/SkTLogic.h
new file mode 100644
index 000000000..a2c2f4cfd
--- /dev/null
+++ b/src/deps/skia/include/private/SkTLogic.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ *
+ *
+ * This header provides some std:: features early in the skstd namespace
+ * and several Skia-specific additions in the sknonstd namespace.
+ */
+
+#ifndef SkTLogic_DEFINED
+#define SkTLogic_DEFINED
+
+#include <cstddef>
+#include <type_traits>
+#include <utility>
+#include "include/private/SkTo.h"
+
+namespace skstd {
+
+// C++17, <variant>
+struct monostate {};
+
+// C++17, <type_traits>
+template<typename...> struct conjunction : std::true_type { };
+template<typename T> struct conjunction<T> : T { };
+template<typename T, typename... Ts>
+struct conjunction<T, Ts...> : std::conditional<bool(T::value), conjunction<Ts...>, T>::type { };
+
+// C++17, std::data, std::size
+template<typename Container>
+constexpr auto data(Container& c) -> decltype(c.data()) { return c.data(); }
+template<typename Container>
+constexpr auto data(const Container& c) -> decltype(c.data()) { return c.data(); }
+template<typename Array, size_t N>
+constexpr auto data(Array(&a)[N]) -> decltype(a) { return a; }
+template<typename T>
+constexpr const T* data(std::initializer_list<T> i) { return i.begin(); }
+
+template<typename Container>
+constexpr auto size(Container& c) -> decltype(c.size()) { return c.size(); }
+template<typename Array, size_t N>
+constexpr size_t size(Array(&)[N]) { return N; }
+template<typename T>
+constexpr const T* size(std::initializer_list<T> i) { return i.end() - i.begin(); }
+}  // namespace skstd
+
+// The sknonstd namespace contains things we would like to be proposed and feel std-ish.
+namespace sknonstd {
+
+// The name 'copy' here is fraught with peril. In this case it means 'append', not 'overwrite'.
+// Alternate proposed names are 'propagate', 'augment', or 'append' (and 'add', but already taken).
+// std::experimental::propagate_const already exists for other purposes in TSv2.
+// These also follow the <dest, source> pattern used by boost.
+template <typename D, typename S> struct copy_const {
+    using type = std::conditional_t<std::is_const<S>::value, std::add_const_t<D>, D>;
+};
+template <typename D, typename S> using copy_const_t = typename copy_const<D, S>::type;
+
+template <typename D, typename S> struct copy_volatile {
+    using type = std::conditional_t<std::is_volatile<S>::value, std::add_volatile_t<D>, D>;
+};
+template <typename D, typename S> using copy_volatile_t = typename copy_volatile<D, S>::type;
+
+template <typename D, typename S> struct copy_cv {
+    using type = copy_volatile_t<copy_const_t<D, S>, S>;
+};
+template <typename D, typename S> using copy_cv_t = typename copy_cv<D, S>::type;
+
+// The name 'same' here means 'overwrite'.
+// Alternate proposed names are 'replace', 'transfer', or 'qualify_from'.
+// same_xxx<D, S> can be written as copy_xxx<remove_xxx_t<D>, S>
+template <typename D, typename S> using same_const = copy_const<std::remove_const_t<D>, S>;
+template <typename D, typename S> using same_const_t = typename same_const<D, S>::type;
+template <typename D, typename S> using same_volatile =copy_volatile<std::remove_volatile_t<D>,S>;
+template <typename D, typename S> using same_volatile_t = typename same_volatile<D, S>::type;
+template <typename D, typename S> using same_cv = copy_cv<std::remove_cv_t<D>, S>;
+template <typename D, typename S> using same_cv_t = typename same_cv<D, S>::type;
+
+}  // namespace sknonstd
+
+template <typename Container>
+constexpr int SkCount(const Container& c) { return SkTo<int>(skstd::size(c)); }
+
+#endif
diff --git a/src/deps/skia/include/private/SkTOptional.h b/src/deps/skia/include/private/SkTOptional.h
new file mode 100644
index 000000000..f610493b0
--- /dev/null
+++ b/src/deps/skia/include/private/SkTOptional.h
@@ -0,0 +1,362 @@
+/*
+ * Copyright 2021 Google LLC.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkTOptional_DEFINED
+#define SkTOptional_DEFINED
+
+#include "include/core/SkTypes.h"
+
+#include <utility>
+
+namespace skstd {
+
+/**
+ * An empty optional is represented with `nullopt`.
+ */
+struct nullopt_t {
+    struct tag {};
+
+    // nullopt_t must not be default-constructible.
+    explicit constexpr nullopt_t(tag) {}
+};
+
+static constexpr nullopt_t nullopt{nullopt_t::tag{}};
+
+/**
+ * Simple drop-in replacement for std::optional until we move to C++17. This does not have all of
+ * std::optional's capabilities, but it covers our needs for the time being.
+ */
+template<typename T>
+class optional {
+public:
+    optional(const T& value)
+        : fHasValue(true) {
+        new(&fPayload.fValue) T(value);
+    }
+
+    optional(T&& value)
+        : fHasValue(true) {
+        new(&fPayload.fValue) T(std::move(value));
+    }
+
+    optional() {}
+
+    optional(const optional& other) {
+        *this = other;
+    }
+
+    // Construction with nullopt is the same as default construction.
+    optional(nullopt_t) : optional() {}
+
+    // We need a non-const copy constructor because otherwise optional(nonConstSrc) isn't an exact
+    // match for the copy constructor, and we'd end up invoking the Args&&... template by mistake.
+    optional(optional& other) {
+        *this = other;
+    }
+
+    optional(optional&& other) {
+        *this = std::move(other);
+    }
+
+    template<typename... Args>
+    optional(Args&&... args) {
+        fHasValue = true;
+        new(&fPayload.fValue) T(std::forward<Args>(args)...);
+    }
+
+    ~optional() {
+        this->reset();
+    }
+
+    optional& operator=(const optional& other) {
+        if (this != &other) {
+            if (fHasValue) {
+                if (other.fHasValue) {
+                    fPayload.fValue = other.fPayload.fValue;
+                } else {
+                    this->reset();
+                }
+            } else {
+                if (other.fHasValue) {
+                    fHasValue = true;
+                    new (&fPayload.fValue) T(other.fPayload.fValue);
+                } else {
+                    // do nothing, no value on either side
+                }
+            }
+        }
+        return *this;
+    }
+
+    optional& operator=(optional&& other) {
+        if (this != &other) {
+            if (fHasValue) {
+                if (other.fHasValue) {
+                    fPayload.fValue = std::move(other.fPayload.fValue);
+                } else {
+                    this->reset();
+                }
+            } else {
+                if (other.fHasValue) {
+                    fHasValue = true;
+                    new (&fPayload.fValue) T(std::move(other.fPayload.fValue));
+                } else {
+                    // do nothing, no value on either side
+                }
+            }
+        }
+        return *this;
+    }
+
+    template<typename... Args>
+    optional& emplace(Args&&... args) {
+        this->reset();
+        fHasValue = true;
+        new(&fPayload.fValue) T(std::forward<Args>(args)...);
+        return *this;
+    }
+
+    template<typename U, typename... Args>
+    optional& emplace(std::initializer_list<U> il, Args&&... args) {
+        this->reset();
+        fHasValue = true;
+        new(&fPayload.fValue) T(il, std::forward<Args>(args)...);
+        return *this;
+    }
+
+    // Assignment to nullopt is the same as reset().
+    optional& operator=(nullopt_t) {
+        this->reset();
+        return *this;
+    }
+
+    T& operator*() & {
+        SkASSERT(fHasValue);
+        return fPayload.fValue;
+    }
+
+    const T& operator*() const& {
+        SkASSERT(fHasValue);
+        return fPayload.fValue;
+    }
+
+    T&& operator*() && {
+        SkASSERT(fHasValue);
+        return std::move(fPayload.fValue);
+    }
+
+    const T&& operator*() const&& {
+        SkASSERT(fHasValue);
+        return std::move(fPayload.fValue);
+    }
+
+    const T& value() const& {
+        SkASSERT_RELEASE(fHasValue);
+        return **this;
+    }
+
+    T& value() & {
+        SkASSERT_RELEASE(fHasValue);
+        return **this;
+    }
+
+    const T&& value() const&& {
+        SkASSERT_RELEASE(fHasValue);
+        return std::move(**this);
+    }
+
+    T&& value() && {
+        SkASSERT_RELEASE(fHasValue);
+        return std::move(**this);
+    }
+
+    T* operator->() {
+        return &**this;
+    }
+
+    const T* operator->() const {
+        return &**this;
+    }
+
+    template<typename U>
+    T value_or(U&& value) const& {
+        return this->has_value() ? **this : static_cast<T>(std::forward<U>(value));
+    }
+
+    template<typename U>
+    T value_or(U&& value) && {
+        return this->has_value() ? std::move(**this) : static_cast<T>(std::forward<U>(value));
+    }
+
+    bool has_value() const {
+        return fHasValue;
+    }
+
+    explicit operator bool() const {
+        return this->has_value();
+    }
+
+    void reset() {
+        if (fHasValue) {
+            fPayload.fValue.~T();
+            fHasValue = false;
+        }
+    }
+
+private:
+    union Payload {
+        T fValue;
+
+        Payload() {}
+
+        ~Payload() {}
+    } fPayload;
+
+    bool fHasValue = false;
+};
+
+// Comparison operators for optional x optional
+template <typename T, typename U> bool operator==(const optional<T>& a, const optional<U>& b) {
+    return (a.has_value() != b.has_value()) ? false :
+                            !a.has_value()  ? true :
+                                              (*a == *b);
+}
+
+template <typename T, typename U> bool operator!=(const optional<T>& a, const optional<U>& b) {
+    return (a.has_value() != b.has_value()) ? true :
+                            !a.has_value()  ? false :
+                                              (*a != *b);
+}
+
+template <typename T, typename U> bool operator<(const optional<T>& a, const optional<U>& b) {
+    return !b.has_value() ? false :
+           !a.has_value() ? true :
+                            (*a < *b);
+}
+
+template <typename T, typename U> bool operator<=(const optional<T>& a, const optional<U>& b) {
+    return !a.has_value() ? true :
+           !b.has_value() ? false :
+                            (*a <= *b);
+}
+
+template <typename T, typename U> bool operator>(const optional<T>& a, const optional<U>& b) {
+    return !a.has_value() ? false :
+           !b.has_value() ? true :
+                            (*a > *b);
+}
+
+template <typename T, typename U> bool operator>=(const optional<T>& a, const optional<U>& b) {
+    return !b.has_value() ? true :
+           !a.has_value() ? false :
+                            (*a >= *b);
+}
+
+// Comparison operators for optional x nullopt
+template <typename T> bool operator==(const optional<T>& a, nullopt_t) {
+    return !a.has_value();
+}
+
+template <typename T> bool operator!=(const optional<T>& a, nullopt_t) {
+    return a.has_value();
+}
+
+template <typename T> bool operator<(const optional<T>&, nullopt_t) {
+    return false;
+}
+
+template <typename T> bool operator<=(const optional<T>& a, nullopt_t) {
+    return !a.has_value();
+}
+
+template <typename T> bool operator>(const optional<T>& a, nullopt_t) {
+    return a.has_value();
+}
+
+template <typename T>
+bool operator>=(const optional<T>&, nullopt_t) {
+    return true;
+}
+
+// Comparison operators for nullopt x optional
+template <typename U> bool operator==(nullopt_t, const optional<U>& b) {
+    return !b.has_value();
+}
+
+template <typename U> bool operator!=(nullopt_t, const optional<U>& b) {
+    return b.has_value();
+}
+
+template <typename U> bool operator<(nullopt_t, const optional<U>& b) {
+  return b.has_value();
+}
+
+template <typename U> bool operator<=(nullopt_t, const optional<U>&) {
+    return true;
+}
+
+template <typename U> bool operator>(nullopt_t, const optional<U>&) {
+    return false;
+}
+
+template <typename U> bool operator>=(nullopt_t, const optional<U>& b) {
+    return !b.has_value();
+}
+
+// Comparison operators for optional x value
+template <typename T, typename U> bool operator==(const optional<T>& a, const U& b) {
+    return a.has_value() && (*a == b);
+}
+
+template <typename T, typename U> bool operator!=(const optional<T>& a, const U& b) {
+    return !a.has_value() || (*a != b);
+}
+
+template <typename T, typename U> bool operator<(const optional<T>& a, const U& b) {
+    return !a.has_value() || (*a < b);
+}
+
+template <typename T, typename U> bool operator<=(const optional<T>& a, const U& b) {
+    return !a.has_value() || (*a <= b);
+}
+
+template <typename T, typename U> bool operator>(const optional<T>& a, const U& b) {
+  return a.has_value() && (*a > b);
+}
+
+template <typename T, typename U> bool operator>=(const optional<T>& a, const U& b) {
+  return a.has_value() && (*a >= b);
+}
+
+// Comparison operators for value x optional
+template <typename T, typename U> bool operator==(const T& a, const optional<U>& b) {
+    return b.has_value() && (a == *b);
+}
+
+template <typename T, typename U> bool operator!=(const T& a, const optional<U>& b) {
+    return !b.has_value() || (a != *b);
+}
+
+template <typename T, typename U> bool operator<(const T& a, const optional<U>& b) {
+    return b.has_value() && (a < *b);
+}
+
+template <typename T, typename U> bool operator<=(const T& a, const optional<U>& b) {
+    return b.has_value() && (a <= *b);
+}
+
+template <typename T, typename U> bool operator>(const T& a, const optional<U>& b) {
+    return !b.has_value() || (a > *b);
+}
+
+template <typename T, typename U> bool operator>=(const T& a, const optional<U>& b) {
+    return !b.has_value() || (a >= *b);
+}
+
+} // namespace skstd
+
+#endif
diff --git a/src/deps/skia/include/private/SkTPin.h b/src/deps/skia/include/private/SkTPin.h
new file mode 100644
index 000000000..c824c4464
--- /dev/null
+++ b/src/deps/skia/include/private/SkTPin.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2020 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkTPin_DEFINED
+#define SkTPin_DEFINED
+
+#include <algorithm>
+
+/** @return x pinned (clamped) between lo and hi, inclusively.
+
+    Unlike std::clamp(), SkTPin() always returns a value between lo and hi.
+    If x is NaN, SkTPin() returns lo but std::clamp() returns NaN.
+*/
+template <typename T>
+static constexpr const T& SkTPin(const T& x, const T& lo, const T& hi) {
+    return std::max(lo, std::min(x, hi));
+}
+
+#endif
diff --git a/src/deps/skia/include/private/SkTemplates.h b/src/deps/skia/include/private/SkTemplates.h
new file mode 100644
index 000000000..4221ee14d
--- /dev/null
+++ b/src/deps/skia/include/private/SkTemplates.h
@@ -0,0 +1,453 @@
+/*
+ * Copyright 2006 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkTemplates_DEFINED
+#define SkTemplates_DEFINED
+
+#include "include/core/SkTypes.h"
+#include "include/private/SkMalloc.h"
+#include "include/private/SkTLogic.h"
+
+#include <string.h>
+#include <array>
+#include <cstddef>
+#include <memory>
+#include <new>
+#include <type_traits>
+#include <utility>
+
+/** \file SkTemplates.h
+
+    This file contains light-weight template classes for type-safe and exception-safe
+    resource management.
+*/
+
+/**
+ *  Marks a local variable as known to be unused (to avoid warnings).
+ *  Note that this does *not* prevent the local variable from being optimized away.
+ */
+template<typename T> inline void sk_ignore_unused_variable(const T&) { }
+
+/**
+ *  Returns a pointer to a D which comes immediately after S[count].
+ */
+template <typename D, typename S> static D* SkTAfter(S* ptr, size_t count = 1) {
+    return reinterpret_cast<D*>(ptr + count);
+}
+
+/**
+ *  Returns a pointer to a D which comes byteOffset bytes after S.
+ */
+template <typename D, typename S> static D* SkTAddOffset(S* ptr, ptrdiff_t byteOffset) {
+    // The intermediate char* has the same cv-ness as D as this produces better error messages.
+    // This relies on the fact that reinterpret_cast can add constness, but cannot remove it.
+    return reinterpret_cast<D*>(reinterpret_cast<sknonstd::same_cv_t<char, D>*>(ptr) + byteOffset);
+}
+
+// TODO: when C++17 the language is available, use template <auto P>
+template <typename T, T* P> struct SkFunctionWrapper {
+    template <typename... Args>
+    auto operator()(Args&&... args) const -> decltype(P(std::forward<Args>(args)...)) {
+        return P(std::forward<Args>(args)...);
+    }
+};
+
+/** \class SkAutoTCallVProc
+
+    Call a function when this goes out of scope. The template uses two
+    parameters, the object, and a function that is to be called in the destructor.
+    If release() is called, the object reference is set to null. If the object
+    reference is null when the destructor is called, we do not call the
+    function.
+*/
+template <typename T, void (*P)(T*)> class SkAutoTCallVProc
+    : public std::unique_ptr<T, SkFunctionWrapper<std::remove_pointer_t<decltype(P)>, P>> {
+    using inherited = std::unique_ptr<T, SkFunctionWrapper<std::remove_pointer_t<decltype(P)>, P>>;
+public:
+    using inherited::inherited;
+    SkAutoTCallVProc(const SkAutoTCallVProc&) = delete;
+    SkAutoTCallVProc(SkAutoTCallVProc&& that) : inherited(std::move(that)) {}
+
+    operator T*() const { return this->get(); }
+};
+
+/** Allocate an array of T elements, and free the array in the destructor
+ */
+template <typename T> class SkAutoTArray  {
+public:
+    SkAutoTArray() {}
+    /** Allocate count number of T elements
+     */
+    explicit SkAutoTArray(int count) {
+        SkASSERT(count >= 0);
+        if (count) {
+            fArray.reset(new T[count]);
+        }
+        SkDEBUGCODE(fCount = count;)
+    }
+
+    SkAutoTArray(SkAutoTArray&& other) : fArray(std::move(other.fArray)) {
+        SkDEBUGCODE(fCount = other.fCount; other.fCount = 0;)
+    }
+    SkAutoTArray& operator=(SkAutoTArray&& other) {
+        if (this != &other) {
+            fArray = std::move(other.fArray);
+            SkDEBUGCODE(fCount = other.fCount; other.fCount = 0;)
+        }
+        return *this;
+    }
+
+    /** Reallocates given a new count. Reallocation occurs even if new count equals old count.
+     */
+    void reset(int count = 0) { *this = SkAutoTArray(count); }
+
+    /** Return the array of T elements. Will be NULL if count == 0
+     */
+    T* get() const { return fArray.get(); }
+
+    /** Return the nth element in the array
+     */
+    T&  operator[](int index) const {
+        SkASSERT((unsigned)index < (unsigned)fCount);
+        return fArray[index];
+    }
+
+    /** Aliases matching other types, like std::vector. */
+    const T* data() const { return fArray.get(); }
+    T* data() { return fArray.get(); }
+
+private:
+    std::unique_ptr<T[]> fArray;
+    SkDEBUGCODE(int fCount = 0;)
+};
+
+/** Wraps SkAutoTArray, with room for kCountRequested elements preallocated.
+ */
+template <int kCountRequested, typename T> class SkAutoSTArray {
+public:
+    SkAutoSTArray(SkAutoSTArray&&) = delete;
+    SkAutoSTArray(const SkAutoSTArray&) = delete;
+    SkAutoSTArray& operator=(SkAutoSTArray&&) = delete;
+    SkAutoSTArray& operator=(const SkAutoSTArray&) = delete;
+
+    /** Initialize with no objects */
+    SkAutoSTArray() {
+        fArray = nullptr;
+        fCount = 0;
+    }
+
+    /** Allocate count number of T elements
+     */
+    SkAutoSTArray(int count) {
+        fArray = nullptr;
+        fCount = 0;
+        this->reset(count);
+    }
+
+    ~SkAutoSTArray() {
+        this->reset(0);
+    }
+
+    /** Destroys previous objects in the array and default constructs count number of objects */
+    void reset(int count) {
+        T* start = fArray;
+        T* iter = start + fCount;
+        while (iter > start) {
+            (--iter)->~T();
+        }
+
+        SkASSERT(count >= 0);
+        if (fCount != count) {
+            if (fCount > kCount) {
+                // 'fArray' was allocated last time so free it now
+                SkASSERT((T*) fStorage != fArray);
+                sk_free(fArray);
+            }
+
+            if (count > kCount) {
+                fArray = (T*) sk_malloc_throw(count, sizeof(T));
+            } else if (count > 0) {
+                fArray = (T*) fStorage;
+            } else {
+                fArray = nullptr;
+            }
+
+            fCount = count;
+        }
+
+        iter = fArray;
+        T* stop = fArray + count;
+        while (iter < stop) {
+            new (iter++) T;
+        }
+    }
+
+    /** Return the number of T elements in the array
+     */
+    int count() const { return fCount; }
+
+    /** Return the array of T elements. Will be NULL if count == 0
+     */
+    T* get() const { return fArray; }
+
+    T* begin() { return fArray; }
+
+    const T* begin() const { return fArray; }
+
+    T* end() { return fArray + fCount; }
+
+    const T* end() const { return fArray + fCount; }
+
+    /** Return the nth element in the array
+     */
+    T&  operator[](int index) const {
+        SkASSERT(index < fCount);
+        return fArray[index];
+    }
+
+    /** Aliases matching other types, like std::vector. */
+    const T* data() const { return fArray; }
+    T* data() { return fArray; }
+    size_t size() const { return fCount; }
+
+private:
+#if defined(SK_BUILD_FOR_GOOGLE3)
+    // Stack frame size is limited for SK_BUILD_FOR_GOOGLE3. 4k is less than the actual max, but some functions
+    // have multiple large stack allocations.
+    static const int kMaxBytes = 4 * 1024;
+    static const int kCount = kCountRequested * sizeof(T) > kMaxBytes
+        ? kMaxBytes / sizeof(T)
+        : kCountRequested;
+#else
+    static const int kCount = kCountRequested;
+#endif
+
+    int     fCount;
+    T*      fArray;
+    // since we come right after fArray, fStorage should be properly aligned
+    char    fStorage[kCount * sizeof(T)];
+};
+
+/** Manages an array of T elements, freeing the array in the destructor.
+ *  Does NOT call any constructors/destructors on T (T must be POD).
+ */
+template <typename T,
+          typename = std::enable_if_t<std::is_trivially_default_constructible<T>::value &&
+                                      std::is_trivially_destructible<T>::value>>
+class SkAutoTMalloc  {
+public:
+    /** Takes ownership of the ptr. The ptr must be a value which can be passed to sk_free. */
+    explicit SkAutoTMalloc(T* ptr = nullptr) : fPtr(ptr) {}
+
+    /** Allocates space for 'count' Ts. */
+    explicit SkAutoTMalloc(size_t count)
+        : fPtr(count ? (T*)sk_malloc_throw(count, sizeof(T)) : nullptr) {}
+
+    SkAutoTMalloc(SkAutoTMalloc&&) = default;
+    SkAutoTMalloc& operator=(SkAutoTMalloc&&) = default;
+
+    /** Resize the memory area pointed to by the current ptr preserving contents. */
+    void realloc(size_t count) {
+        fPtr.reset(count ? (T*)sk_realloc_throw(fPtr.release(), count * sizeof(T)) : nullptr);
+    }
+
+    /** Resize the memory area pointed to by the current ptr without preserving contents. */
+    T* reset(size_t count = 0) {
+        fPtr.reset(count ? (T*)sk_malloc_throw(count, sizeof(T)) : nullptr);
+        return this->get();
+    }
+
+    T* get() const { return fPtr.get(); }
+
+    operator T*() { return fPtr.get(); }
+
+    operator const T*() const { return fPtr.get(); }
+
+    T& operator[](int index) { return fPtr.get()[index]; }
+
+    const T& operator[](int index) const { return fPtr.get()[index]; }
+
+    /** Aliases matching other types, like std::vector. */
+    const T* data() const { return fPtr.get(); }
+    T* data() { return fPtr.get(); }
+
+    /**
+     *  Transfer ownership of the ptr to the caller, setting the internal
+     *  pointer to NULL. Note that this differs from get(), which also returns
+     *  the pointer, but it does not transfer ownership.
+     */
+    T* release() { return fPtr.release(); }
+
+private:
+    std::unique_ptr<T, SkFunctionWrapper<void(void*), sk_free>> fPtr;
+};
+
+template <size_t kCountRequested,
+          typename T,
+          typename = std::enable_if_t<std::is_trivially_default_constructible<T>::value &&
+                                      std::is_trivially_destructible<T>::value>>
+class SkAutoSTMalloc {
+public:
+    SkAutoSTMalloc() : fPtr(fTStorage) {}
+
+    SkAutoSTMalloc(size_t count) {
+        if (count > kCount) {
+            fPtr = (T*)sk_malloc_throw(count, sizeof(T));
+        } else if (count) {
+            fPtr = fTStorage;
+        } else {
+            fPtr = nullptr;
+        }
+    }
+
+    SkAutoSTMalloc(SkAutoSTMalloc&&) = delete;
+    SkAutoSTMalloc(const SkAutoSTMalloc&) = delete;
+    SkAutoSTMalloc& operator=(SkAutoSTMalloc&&) = delete;
+    SkAutoSTMalloc& operator=(const SkAutoSTMalloc&) = delete;
+
+    ~SkAutoSTMalloc() {
+        if (fPtr != fTStorage) {
+            sk_free(fPtr);
+        }
+    }
+
+    // doesn't preserve contents
+    T* reset(size_t count) {
+        if (fPtr != fTStorage) {
+            sk_free(fPtr);
+        }
+        if (count > kCount) {
+            fPtr = (T*)sk_malloc_throw(count, sizeof(T));
+        } else if (count) {
+            fPtr = fTStorage;
+        } else {
+            fPtr = nullptr;
+        }
+        return fPtr;
+    }
+
+    T* get() const { return fPtr; }
+
+    operator T*() {
+        return fPtr;
+    }
+
+    operator const T*() const {
+        return fPtr;
+    }
+
+    T& operator[](int index) {
+        return fPtr[index];
+    }
+
+    const T& operator[](int index) const {
+        return fPtr[index];
+    }
+
+    /** Aliases matching other types, like std::vector. */
+    const T* data() const { return fPtr; }
+    T* data() { return fPtr; }
+
+    // Reallocs the array, can be used to shrink the allocation.  Makes no attempt to be intelligent
+    void realloc(size_t count) {
+        if (count > kCount) {
+            if (fPtr == fTStorage) {
+                fPtr = (T*)sk_malloc_throw(count, sizeof(T));
+                memcpy((void*)fPtr, fTStorage, kCount * sizeof(T));
+            } else {
+                fPtr = (T*)sk_realloc_throw(fPtr, count, sizeof(T));
+            }
+        } else if (count) {
+            if (fPtr != fTStorage) {
+                fPtr = (T*)sk_realloc_throw(fPtr, count, sizeof(T));
+            }
+        } else {
+            this->reset(0);
+        }
+    }
+
+private:
+    // Since we use uint32_t storage, we might be able to get more elements for free.
+    static const size_t kCountWithPadding = SkAlign4(kCountRequested*sizeof(T)) / sizeof(T);
+#if defined(SK_BUILD_FOR_GOOGLE3)
+    // Stack frame size is limited for SK_BUILD_FOR_GOOGLE3. 4k is less than the actual max, but some functions
+    // have multiple large stack allocations.
+    static const size_t kMaxBytes = 4 * 1024;
+    static const size_t kCount = kCountRequested * sizeof(T) > kMaxBytes
+        ? kMaxBytes / sizeof(T)
+        : kCountWithPadding;
+#else
+    static const size_t kCount = kCountWithPadding;
+#endif
+
+    T*          fPtr;
+    union {
+        uint32_t    fStorage32[SkAlign4(kCount*sizeof(T)) >> 2];
+        T           fTStorage[1];   // do NOT want to invoke T::T()
+    };
+};
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+ *  Pass the object and the storage that was offered during SkInPlaceNewCheck, and this will
+ *  safely destroy (and free if it was dynamically allocated) the object.
+ */
+template <typename T> void SkInPlaceDeleteCheck(T* obj, void* storage) {
+    if (storage == obj) {
+        obj->~T();
+    } else {
+        delete obj;
+    }
+}
+
+/**
+ *  Allocates T, using storage if it is large enough, and allocating on the heap (via new) if
+ *  storage is not large enough.
+ *
+ *      obj = SkInPlaceNewCheck<Type>(storage, size);
+ *      ...
+ *      SkInPlaceDeleteCheck(obj, storage);
+ */
+template<typename T, typename... Args>
+T* SkInPlaceNewCheck(void* storage, size_t size, Args&&... args) {
+    return (sizeof(T) <= size) ? new (storage) T(std::forward<Args>(args)...)
+                               : new T(std::forward<Args>(args)...);
+}
+
+template <int N, typename T> class SkAlignedSTStorage {
+public:
+    SkAlignedSTStorage() {}
+    SkAlignedSTStorage(SkAlignedSTStorage&&) = delete;
+    SkAlignedSTStorage(const SkAlignedSTStorage&) = delete;
+    SkAlignedSTStorage& operator=(SkAlignedSTStorage&&) = delete;
+    SkAlignedSTStorage& operator=(const SkAlignedSTStorage&) = delete;
+
+    /**
+     * Returns void* because this object does not initialize the
+     * memory. Use placement new for types that require a constructor.
+     */
+    void* get() { return fStorage; }
+    const void* get() const { return fStorage; }
+private:
+    alignas(T) char fStorage[sizeof(T)*N];
+};
+
+using SkAutoFree = std::unique_ptr<void, SkFunctionWrapper<void(void*), sk_free>>;
+
+template<typename C, std::size_t... Is>
+constexpr auto SkMakeArrayFromIndexSequence(C c, std::index_sequence<Is...> is)
+-> std::array<decltype(c(std::declval<typename decltype(is)::value_type>())), sizeof...(Is)> {
+    return {{ c(Is)... }};
+}
+
+template<size_t N, typename C> constexpr auto SkMakeArray(C c)
+-> std::array<decltype(c(std::declval<typename std::index_sequence<N>::value_type>())), N> {
+    return SkMakeArrayFromIndexSequence(c, std::make_index_sequence<N>{});
+}
+
+#endif
diff --git a/src/deps/skia/include/private/SkThreadAnnotations.h b/src/deps/skia/include/private/SkThreadAnnotations.h
new file mode 100644
index 000000000..07652a3fb
--- /dev/null
+++ b/src/deps/skia/include/private/SkThreadAnnotations.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2019 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkThreadAnnotations_DEFINED
+#define SkThreadAnnotations_DEFINED
+
+// The bulk of this code is cribbed from:
+// http://clang.llvm.org/docs/ThreadSafetyAnalysis.html
+
+#if defined(__clang__) && (!defined(SWIG))
+#define SK_THREAD_ANNOTATION_ATTRIBUTE(x)   __attribute__((x))
+#else
+#define SK_THREAD_ANNOTATION_ATTRIBUTE(x)   // no-op
+#endif
+
+#define SK_CAPABILITY(x) \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(capability(x))
+
+#define SK_SCOPED_CAPABILITY \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(scoped_lockable)
+
+#define SK_GUARDED_BY(x) \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(guarded_by(x))
+
+#define SK_PT_GUARDED_BY(x) \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(pt_guarded_by(x))
+
+#define SK_ACQUIRED_BEFORE(...) \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(acquired_before(__VA_ARGS__))
+
+#define SK_ACQUIRED_AFTER(...) \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(acquired_after(__VA_ARGS__))
+
+#define SK_REQUIRES(...) \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(requires_capability(__VA_ARGS__))
+
+#define SK_REQUIRES_SHARED(...) \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(requires_shared_capability(__VA_ARGS__))
+
+#define SK_ACQUIRE(...) \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(acquire_capability(__VA_ARGS__))
+
+#define SK_ACQUIRE_SHARED(...) \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(acquire_shared_capability(__VA_ARGS__))
+
+// Would be SK_RELEASE, but that is already in use as SK_DEBUG vs. SK_RELEASE.
+#define SK_RELEASE_CAPABILITY(...) \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(release_capability(__VA_ARGS__))
+
+// For symmetry with SK_RELEASE_CAPABILITY.
+#define SK_RELEASE_SHARED_CAPABILITY(...) \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(release_shared_capability(__VA_ARGS__))
+
+#define SK_TRY_ACQUIRE(...) \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(try_acquire_capability(__VA_ARGS__))
+
+#define SK_TRY_ACQUIRE_SHARED(...) \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(try_acquire_shared_capability(__VA_ARGS__))
+
+#define SK_EXCLUDES(...) \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(locks_excluded(__VA_ARGS__))
+
+#define SK_ASSERT_CAPABILITY(x) \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(assert_capability(x))
+
+#define SK_ASSERT_SHARED_CAPABILITY(x) \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(assert_shared_capability(x))
+
+#define SK_RETURN_CAPABILITY(x) \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(lock_returned(x))
+
+#define SK_NO_THREAD_SAFETY_ANALYSIS \
+  SK_THREAD_ANNOTATION_ATTRIBUTE(no_thread_safety_analysis)
+
+#if defined(SK_BUILD_FOR_GOOGLE3) && !defined(SK_BUILD_FOR_WASM_IN_GOOGLE3)
+    extern "C" {
+        void __google_potentially_blocking_region_begin(void);
+        void __google_potentially_blocking_region_end  (void);
+    }
+    #define SK_POTENTIALLY_BLOCKING_REGION_BEGIN __google_potentially_blocking_region_begin()
+    #define SK_POTENTIALLY_BLOCKING_REGION_END   __google_potentially_blocking_region_end()
+#else
+    #define SK_POTENTIALLY_BLOCKING_REGION_BEGIN
+    #define SK_POTENTIALLY_BLOCKING_REGION_END
+#endif
+
+#endif  // SkThreadAnnotations_DEFINED
diff --git a/src/deps/skia/include/private/SkThreadID.h b/src/deps/skia/include/private/SkThreadID.h
new file mode 100644
index 000000000..e14388b3d
--- /dev/null
+++ b/src/deps/skia/include/private/SkThreadID.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkThreadID_DEFINED
+#define SkThreadID_DEFINED
+
+#include "include/core/SkTypes.h"
+
+typedef int64_t SkThreadID;
+
+// SkMutex.h uses SkGetThredID in debug only code.
+SkDEBUGCODE(SK_SPI) SkThreadID SkGetThreadID();
+
+const SkThreadID kIllegalThreadID = 0;
+
+#endif  // SkThreadID_DEFINED
diff --git a/src/deps/skia/include/private/SkTo.h b/src/deps/skia/include/private/SkTo.h
new file mode 100644
index 000000000..d788f7b26
--- /dev/null
+++ b/src/deps/skia/include/private/SkTo.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef SkTo_DEFINED
+#define SkTo_DEFINED
+
+#include "include/core/SkTypes.h"
+#include "include/private/SkTFitsIn.h"
+
+template <typename D, typename S> constexpr D SkTo(S s) {
+    return SkASSERT(SkTFitsIn<D>(s)),
+           static_cast<D>(s);
+}
+
+template <typename S> constexpr int8_t   SkToS8(S x)    { return SkTo<int8_t>(x);   }
+template <typename S> constexpr uint8_t  SkToU8(S x)    { return SkTo<uint8_t>(x);  }
+template <typename S> constexpr int16_t  SkToS16(S x)   { return SkTo<int16_t>(x);  }
+template <typename S> constexpr uint16_t SkToU16(S x)   { return SkTo<uint16_t>(x); }
+template <typename S> constexpr int32_t  SkToS32(S x)   { return SkTo<int32_t>(x);  }
+template <typename S> constexpr uint32_t SkToU32(S x)   { return SkTo<uint32_t>(x); }
+template <typename S> constexpr int      SkToInt(S x)   { return SkTo<int>(x);      }
+template <typename S> constexpr unsigned SkToUInt(S x)  { return SkTo<unsigned>(x); }
+template <typename S> constexpr size_t   SkToSizeT(S x) { return SkTo<size_t>(x);   }
+
+#endif  // SkTo_DEFINED
diff --git a/src/deps/skia/include/private/SkUniquePaintParamsID.h b/src/deps/skia/include/private/SkUniquePaintParamsID.h
new file mode 100644
index 000000000..2cd89fd2f
--- /dev/null
+++ b/src/deps/skia/include/private/SkUniquePaintParamsID.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkUniquePaintParamsID_DEFINED
+#define SkUniquePaintParamsID_DEFINED
+
+#include "include/core/SkTypes.h"
+
+// This class boils down to a unique uint that can be used instead of a variable length
+// key derived from a PaintParams.
+class SkUniquePaintParamsID {
+public:
+    explicit SkUniquePaintParamsID(uint32_t id) : fID(id) {
+        SkASSERT(id != SK_InvalidUniqueID);
+    }
+
+    static SkUniquePaintParamsID InvalidID() { return SkUniquePaintParamsID(); }
+
+    SkUniquePaintParamsID() : fID(SK_InvalidUniqueID) {}
+
+    bool operator==(const SkUniquePaintParamsID &that) const { return fID == that.fID; }
+    bool operator!=(const SkUniquePaintParamsID &that) const { return !(*this == that); }
+
+    bool isValid() const { return fID != SK_InvalidUniqueID; }
+    uint32_t asUInt() const { return fID; }
+
+private:
+    uint32_t fID;
+};
+
+#endif // SkUniquePaintParamsID_DEFINED
diff --git a/src/deps/skia/include/private/SkVx.h b/src/deps/skia/include/private/SkVx.h
new file mode 100644
index 000000000..4f0f4ace0
--- /dev/null
+++ b/src/deps/skia/include/private/SkVx.h
@@ -0,0 +1,943 @@
+/*
+ * Copyright 2019 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKVX_DEFINED
+#define SKVX_DEFINED
+
+// skvx::Vec<N,T> are SIMD vectors of N T's, a v1.5 successor to SkNx<N,T>.
+//
+// This time we're leaning a bit less on platform-specific intrinsics and a bit
+// more on Clang/GCC vector extensions, but still keeping the option open to
+// drop in platform-specific intrinsics, actually more easily than before.
+//
+// We've also fixed a few of the caveats that used to make SkNx awkward to work
+// with across translation units.  skvx::Vec<N,T> always has N*sizeof(T) size
+// and alignment and is safe to use across translation units freely.
+// (Ideally we'd only align to T, but that tanks ARMv7 NEON codegen.)
+
+// Please try to keep this file independent of Skia headers.
+#include <algorithm>         // std::min, std::max
+#include <cassert>           // assert()
+#include <cmath>             // ceilf, floorf, truncf, roundf, sqrtf, etc.
+#include <cstdint>           // intXX_t
+#include <cstring>           // memcpy()
+#include <initializer_list>  // std::initializer_list
+#include <utility>           // std::index_sequence
+
+// Users may disable SIMD with SKNX_NO_SIMD, which may be set via compiler flags.
+// The gn build has no option which sets SKNX_NO_SIMD.
+// Use SKVX_USE_SIMD internally to avoid confusing double negation.
+// Do not use 'defined' in a macro expansion.
+#if !defined(SKNX_NO_SIMD)
+    #define SKVX_USE_SIMD 1
+#else
+    #define SKVX_USE_SIMD 0
+#endif
+
+#if SKVX_USE_SIMD
+    #if defined(__SSE__) || defined(__AVX__) || defined(__AVX2__)
+        #include <immintrin.h>
+    #elif defined(__ARM_NEON)
+        #include <arm_neon.h>
+    #elif defined(__wasm_simd128__)
+        #include <wasm_simd128.h>
+    #endif
+#endif
+
+// To avoid ODR violations, all methods must be force-inlined...
+#if defined(_MSC_VER)
+    #define SKVX_ALWAYS_INLINE __forceinline
+#else
+    #define SKVX_ALWAYS_INLINE __attribute__((always_inline))
+#endif
+
+// ... and all standalone functions must be static.  Please use these helpers:
+#define SI    static inline
+#define SIT   template <       typename T> SI
+#define SIN   template <int N            > SI
+#define SINT  template <int N, typename T> SI
+#define SINTU template <int N, typename T, typename U, \
+                        typename=std::enable_if_t<std::is_convertible<U,T>::value>> SI
+
+namespace skvx {
+
+template <int N, typename T>
+struct alignas(N*sizeof(T)) Vec;
+
+template <int... Ix, int N, typename T>
+SI Vec<sizeof...(Ix),T> shuffle(const Vec<N,T>&);
+
+template <typename D, typename S>
+SI D bit_pun(const S&);
+
+// All Vec have the same simple memory layout, the same as `T vec[N]`.
+template <int N, typename T>
+struct alignas(N*sizeof(T)) VecStorage {
+    SKVX_ALWAYS_INLINE VecStorage() = default;
+    SKVX_ALWAYS_INLINE VecStorage(T s) : lo(s), hi(s) {}
+
+    Vec<N/2,T> lo, hi;
+};
+
+template <typename T>
+struct VecStorage<4,T> {
+    SKVX_ALWAYS_INLINE VecStorage() = default;
+    SKVX_ALWAYS_INLINE VecStorage(T s) : lo(s), hi(s) {}
+    SKVX_ALWAYS_INLINE VecStorage(T x, T y, T z, T w) : lo(x,y), hi(z, w) {}
+    SKVX_ALWAYS_INLINE VecStorage(Vec<2,T> xy, T z, T w) : lo(xy), hi(z,w) {}
+    SKVX_ALWAYS_INLINE VecStorage(T x, T y, Vec<2,T> zw) : lo(x,y), hi(zw) {}
+    SKVX_ALWAYS_INLINE VecStorage(Vec<2,T> xy, Vec<2,T> zw) : lo(xy), hi(zw) {}
+
+    SKVX_ALWAYS_INLINE Vec<2,T>& xy() { return lo; }
+    SKVX_ALWAYS_INLINE Vec<2,T>& zw() { return hi; }
+    SKVX_ALWAYS_INLINE T& x() { return lo.lo.val; }
+    SKVX_ALWAYS_INLINE T& y() { return lo.hi.val; }
+    SKVX_ALWAYS_INLINE T& z() { return hi.lo.val; }
+    SKVX_ALWAYS_INLINE T& w() { return hi.hi.val; }
+
+    SKVX_ALWAYS_INLINE Vec<2,T> xy() const { return lo; }
+    SKVX_ALWAYS_INLINE Vec<2,T> zw() const { return hi; }
+    SKVX_ALWAYS_INLINE T x() const { return lo.lo.val; }
+    SKVX_ALWAYS_INLINE T y() const { return lo.hi.val; }
+    SKVX_ALWAYS_INLINE T z() const { return hi.lo.val; }
+    SKVX_ALWAYS_INLINE T w() const { return hi.hi.val; }
+
+    // Exchange-based swizzles. These should take 1 cycle on NEON and 3 (pipelined) cycles on SSE.
+    SKVX_ALWAYS_INLINE Vec<4,T> yxwz() const { return shuffle<1,0,3,2>(bit_pun<Vec<4,T>>(*this)); }
+    SKVX_ALWAYS_INLINE Vec<4,T> zwxy() const { return shuffle<2,3,0,1>(bit_pun<Vec<4,T>>(*this)); }
+
+    Vec<2,T> lo, hi;
+};
+
+template <typename T>
+struct VecStorage<2,T> {
+    SKVX_ALWAYS_INLINE VecStorage() = default;
+    SKVX_ALWAYS_INLINE VecStorage(T s) : lo(s), hi(s) {}
+    SKVX_ALWAYS_INLINE VecStorage(T x, T y) : lo(x), hi(y) {}
+
+    SKVX_ALWAYS_INLINE T& x() { return lo.val; }
+    SKVX_ALWAYS_INLINE T& y() { return hi.val; }
+
+    SKVX_ALWAYS_INLINE T x() const { return lo.val; }
+    SKVX_ALWAYS_INLINE T y() const { return hi.val; }
+
+    // This exchange-based swizzle should take 1 cycle on NEON and 3 (pipelined) cycles on SSE.
+    SKVX_ALWAYS_INLINE Vec<2,T> yx() const { return shuffle<1,0>(bit_pun<Vec<2,T>>(*this)); }
+
+    SKVX_ALWAYS_INLINE Vec<4,T> xyxy() const {
+        return Vec<4,T>(bit_pun<Vec<2,T>>(*this), bit_pun<Vec<2,T>>(*this));
+    }
+
+    Vec<1,T> lo, hi;
+};
+
+template <int N, typename T>
+struct alignas(N*sizeof(T)) Vec : public VecStorage<N,T> {
+    static_assert((N & (N-1)) == 0,        "N must be a power of 2.");
+    static_assert(sizeof(T) >= alignof(T), "What kind of unusual T is this?");
+
+    // Methods belong here in the class declaration of Vec only if:
+    //   - they must be here, like constructors or operator[];
+    //   - they'll definitely never want a specialized implementation.
+    // Other operations on Vec should be defined outside the type.
+
+    SKVX_ALWAYS_INLINE Vec() = default;
+
+    using VecStorage<N,T>::VecStorage;
+
+    SKVX_ALWAYS_INLINE Vec(std::initializer_list<T> xs) {
+        T vals[N] = {0};
+        memcpy(vals, xs.begin(), std::min(xs.size(), (size_t)N)*sizeof(T));
+
+        this->lo = Vec<N/2,T>::Load(vals +   0);
+        this->hi = Vec<N/2,T>::Load(vals + N/2);
+    }
+
+    SKVX_ALWAYS_INLINE T  operator[](int i) const { return i<N/2 ? this->lo[i] : this->hi[i-N/2]; }
+    SKVX_ALWAYS_INLINE T& operator[](int i)       { return i<N/2 ? this->lo[i] : this->hi[i-N/2]; }
+
+    SKVX_ALWAYS_INLINE static Vec Load(const void* ptr) {
+        Vec v;
+        memcpy(&v, ptr, sizeof(Vec));
+        return v;
+    }
+    SKVX_ALWAYS_INLINE void store(void* ptr) const {
+        memcpy(ptr, this, sizeof(Vec));
+    }
+};
+
+template <typename T>
+struct Vec<1,T> {
+    T val;
+
+    SKVX_ALWAYS_INLINE Vec() = default;
+
+    Vec(T s) : val(s) {}
+
+    SKVX_ALWAYS_INLINE Vec(std::initializer_list<T> xs) : val(xs.size() ? *xs.begin() : 0) {}
+
+    SKVX_ALWAYS_INLINE T  operator[](int) const { return val; }
+    SKVX_ALWAYS_INLINE T& operator[](int)       { return val; }
+
+    SKVX_ALWAYS_INLINE static Vec Load(const void* ptr) {
+        Vec v;
+        memcpy(&v, ptr, sizeof(Vec));
+        return v;
+    }
+    SKVX_ALWAYS_INLINE void store(void* ptr) const {
+        memcpy(ptr, this, sizeof(Vec));
+    }
+};
+
+// Ideally we'd only use bit_pun(), but until this file is always built as C++17 with constexpr if,
+// we'll sometimes find need to use unchecked_bit_pun().  Please do check the call sites yourself!
+template <typename D, typename S>
+SI D unchecked_bit_pun(const S& s) {
+    D d;
+    memcpy(&d, &s, sizeof(D));
+    return d;
+}
+
+template <typename D, typename S>
+SI D bit_pun(const S& s) {
+    static_assert(sizeof(D) == sizeof(S), "");
+    return unchecked_bit_pun<D>(s);
+}
+
+// Translate from a value type T to its corresponding Mask, the result of a comparison.
+template <typename T> struct Mask { using type = T; };
+template <> struct Mask<float > { using type = int32_t; };
+template <> struct Mask<double> { using type = int64_t; };
+template <typename T> using M = typename Mask<T>::type;
+
+// Join two Vec<N,T> into one Vec<2N,T>.
+SINT Vec<2*N,T> join(const Vec<N,T>& lo, const Vec<N,T>& hi) {
+    Vec<2*N,T> v;
+    v.lo = lo;
+    v.hi = hi;
+    return v;
+}
+
+// We have three strategies for implementing Vec operations:
+//    1) lean on Clang/GCC vector extensions when available;
+//    2) use map() to apply a scalar function lane-wise;
+//    3) recurse on lo/hi to scalar portable implementations.
+// We can slot in platform-specific implementations as overloads for particular Vec<N,T>,
+// or often integrate them directly into the recursion of style 3), allowing fine control.
+
+#if SKVX_USE_SIMD && (defined(__clang__) || defined(__GNUC__))
+
+    // VExt<N,T> types have the same size as Vec<N,T> and support most operations directly.
+    #if defined(__clang__)
+        template <int N, typename T>
+        using VExt = T __attribute__((ext_vector_type(N)));
+
+    #elif defined(__GNUC__)
+        template <int N, typename T>
+        struct VExtHelper {
+            typedef T __attribute__((vector_size(N*sizeof(T)))) type;
+        };
+
+        template <int N, typename T>
+        using VExt = typename VExtHelper<N,T>::type;
+
+        // For some reason some (new!) versions of GCC cannot seem to deduce N in the generic
+        // to_vec<N,T>() below for N=4 and T=float.  This workaround seems to help...
+        SI Vec<4,float> to_vec(VExt<4,float> v) { return bit_pun<Vec<4,float>>(v); }
+    #endif
+
+    SINT VExt<N,T> to_vext(const Vec<N,T>& v) { return bit_pun<VExt<N,T>>(v); }
+    SINT Vec <N,T> to_vec(const VExt<N,T>& v) { return bit_pun<Vec <N,T>>(v); }
+
+    SINT Vec<N,T> operator+(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return to_vec<N,T>(to_vext(x) + to_vext(y));
+    }
+    SINT Vec<N,T> operator-(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return to_vec<N,T>(to_vext(x) - to_vext(y));
+    }
+    SINT Vec<N,T> operator*(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return to_vec<N,T>(to_vext(x) * to_vext(y));
+    }
+    SINT Vec<N,T> operator/(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return to_vec<N,T>(to_vext(x) / to_vext(y));
+    }
+
+    SINT Vec<N,T> operator^(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return to_vec<N,T>(to_vext(x) ^ to_vext(y));
+    }
+    SINT Vec<N,T> operator&(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return to_vec<N,T>(to_vext(x) & to_vext(y));
+    }
+    SINT Vec<N,T> operator|(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return to_vec<N,T>(to_vext(x) | to_vext(y));
+    }
+
+    SINT Vec<N,T> operator!(const Vec<N,T>& x) { return to_vec<N,T>(!to_vext(x)); }
+    SINT Vec<N,T> operator-(const Vec<N,T>& x) { return to_vec<N,T>(-to_vext(x)); }
+    SINT Vec<N,T> operator~(const Vec<N,T>& x) { return to_vec<N,T>(~to_vext(x)); }
+
+    SINT Vec<N,T> operator<<(const Vec<N,T>& x, int k) { return to_vec<N,T>(to_vext(x) << k); }
+    SINT Vec<N,T> operator>>(const Vec<N,T>& x, int k) { return to_vec<N,T>(to_vext(x) >> k); }
+
+    SINT Vec<N,M<T>> operator==(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return bit_pun<Vec<N,M<T>>>(to_vext(x) == to_vext(y));
+    }
+    SINT Vec<N,M<T>> operator!=(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return bit_pun<Vec<N,M<T>>>(to_vext(x) != to_vext(y));
+    }
+    SINT Vec<N,M<T>> operator<=(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return bit_pun<Vec<N,M<T>>>(to_vext(x) <= to_vext(y));
+    }
+    SINT Vec<N,M<T>> operator>=(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return bit_pun<Vec<N,M<T>>>(to_vext(x) >= to_vext(y));
+    }
+    SINT Vec<N,M<T>> operator< (const Vec<N,T>& x, const Vec<N,T>& y) {
+        return bit_pun<Vec<N,M<T>>>(to_vext(x) <  to_vext(y));
+    }
+    SINT Vec<N,M<T>> operator> (const Vec<N,T>& x, const Vec<N,T>& y) {
+        return bit_pun<Vec<N,M<T>>>(to_vext(x) >  to_vext(y));
+    }
+
+#else
+
+    // Either SKNX_NO_SIMD is defined, or Clang/GCC vector extensions are not available.
+    // We'll implement things portably with N==1 scalar implementations and recursion onto them.
+
+    // N == 1 scalar implementations.
+    SIT Vec<1,T> operator+(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val + y.val; }
+    SIT Vec<1,T> operator-(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val - y.val; }
+    SIT Vec<1,T> operator*(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val * y.val; }
+    SIT Vec<1,T> operator/(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val / y.val; }
+
+    SIT Vec<1,T> operator^(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val ^ y.val; }
+    SIT Vec<1,T> operator&(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val & y.val; }
+    SIT Vec<1,T> operator|(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val | y.val; }
+
+    SIT Vec<1,T> operator!(const Vec<1,T>& x) { return !x.val; }
+    SIT Vec<1,T> operator-(const Vec<1,T>& x) { return -x.val; }
+    SIT Vec<1,T> operator~(const Vec<1,T>& x) { return ~x.val; }
+
+    SIT Vec<1,T> operator<<(const Vec<1,T>& x, int k) { return x.val << k; }
+    SIT Vec<1,T> operator>>(const Vec<1,T>& x, int k) { return x.val >> k; }
+
+    SIT Vec<1,M<T>> operator==(const Vec<1,T>& x, const Vec<1,T>& y) {
+        return x.val == y.val ? ~0 : 0;
+    }
+    SIT Vec<1,M<T>> operator!=(const Vec<1,T>& x, const Vec<1,T>& y) {
+        return x.val != y.val ? ~0 : 0;
+    }
+    SIT Vec<1,M<T>> operator<=(const Vec<1,T>& x, const Vec<1,T>& y) {
+        return x.val <= y.val ? ~0 : 0;
+    }
+    SIT Vec<1,M<T>> operator>=(const Vec<1,T>& x, const Vec<1,T>& y) {
+        return x.val >= y.val ? ~0 : 0;
+    }
+    SIT Vec<1,M<T>> operator< (const Vec<1,T>& x, const Vec<1,T>& y) {
+        return x.val <  y.val ? ~0 : 0;
+    }
+    SIT Vec<1,M<T>> operator> (const Vec<1,T>& x, const Vec<1,T>& y) {
+        return x.val >  y.val ? ~0 : 0;
+    }
+
+    // Recurse on lo/hi down to N==1 scalar implementations.
+    SINT Vec<N,T> operator+(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo + y.lo, x.hi + y.hi);
+    }
+    SINT Vec<N,T> operator-(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo - y.lo, x.hi - y.hi);
+    }
+    SINT Vec<N,T> operator*(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo * y.lo, x.hi * y.hi);
+    }
+    SINT Vec<N,T> operator/(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo / y.lo, x.hi / y.hi);
+    }
+
+    SINT Vec<N,T> operator^(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo ^ y.lo, x.hi ^ y.hi);
+    }
+    SINT Vec<N,T> operator&(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo & y.lo, x.hi & y.hi);
+    }
+    SINT Vec<N,T> operator|(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo | y.lo, x.hi | y.hi);
+    }
+
+    SINT Vec<N,T> operator!(const Vec<N,T>& x) { return join(!x.lo, !x.hi); }
+    SINT Vec<N,T> operator-(const Vec<N,T>& x) { return join(-x.lo, -x.hi); }
+    SINT Vec<N,T> operator~(const Vec<N,T>& x) { return join(~x.lo, ~x.hi); }
+
+    SINT Vec<N,T> operator<<(const Vec<N,T>& x, int k) { return join(x.lo << k, x.hi << k); }
+    SINT Vec<N,T> operator>>(const Vec<N,T>& x, int k) { return join(x.lo >> k, x.hi >> k); }
+
+    SINT Vec<N,M<T>> operator==(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo == y.lo, x.hi == y.hi);
+    }
+    SINT Vec<N,M<T>> operator!=(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo != y.lo, x.hi != y.hi);
+    }
+    SINT Vec<N,M<T>> operator<=(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo <= y.lo, x.hi <= y.hi);
+    }
+    SINT Vec<N,M<T>> operator>=(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo >= y.lo, x.hi >= y.hi);
+    }
+    SINT Vec<N,M<T>> operator< (const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo <  y.lo, x.hi <  y.hi);
+    }
+    SINT Vec<N,M<T>> operator> (const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo >  y.lo, x.hi >  y.hi);
+    }
+#endif
+
+// Scalar/vector operations splat the scalar to a vector.
+SINTU Vec<N,T>    operator+ (U x, const Vec<N,T>& y) { return Vec<N,T>(x) +  y; }
+SINTU Vec<N,T>    operator- (U x, const Vec<N,T>& y) { return Vec<N,T>(x) -  y; }
+SINTU Vec<N,T>    operator* (U x, const Vec<N,T>& y) { return Vec<N,T>(x) *  y; }
+SINTU Vec<N,T>    operator/ (U x, const Vec<N,T>& y) { return Vec<N,T>(x) /  y; }
+SINTU Vec<N,T>    operator^ (U x, const Vec<N,T>& y) { return Vec<N,T>(x) ^  y; }
+SINTU Vec<N,T>    operator& (U x, const Vec<N,T>& y) { return Vec<N,T>(x) &  y; }
+SINTU Vec<N,T>    operator| (U x, const Vec<N,T>& y) { return Vec<N,T>(x) |  y; }
+SINTU Vec<N,M<T>> operator==(U x, const Vec<N,T>& y) { return Vec<N,T>(x) == y; }
+SINTU Vec<N,M<T>> operator!=(U x, const Vec<N,T>& y) { return Vec<N,T>(x) != y; }
+SINTU Vec<N,M<T>> operator<=(U x, const Vec<N,T>& y) { return Vec<N,T>(x) <= y; }
+SINTU Vec<N,M<T>> operator>=(U x, const Vec<N,T>& y) { return Vec<N,T>(x) >= y; }
+SINTU Vec<N,M<T>> operator< (U x, const Vec<N,T>& y) { return Vec<N,T>(x) <  y; }
+SINTU Vec<N,M<T>> operator> (U x, const Vec<N,T>& y) { return Vec<N,T>(x) >  y; }
+
+SINTU Vec<N,T>    operator+ (const Vec<N,T>& x, U y) { return x +  Vec<N,T>(y); }
+SINTU Vec<N,T>    operator- (const Vec<N,T>& x, U y) { return x -  Vec<N,T>(y); }
+SINTU Vec<N,T>    operator* (const Vec<N,T>& x, U y) { return x *  Vec<N,T>(y); }
+SINTU Vec<N,T>    operator/ (const Vec<N,T>& x, U y) { return x /  Vec<N,T>(y); }
+SINTU Vec<N,T>    operator^ (const Vec<N,T>& x, U y) { return x ^  Vec<N,T>(y); }
+SINTU Vec<N,T>    operator& (const Vec<N,T>& x, U y) { return x &  Vec<N,T>(y); }
+SINTU Vec<N,T>    operator| (const Vec<N,T>& x, U y) { return x |  Vec<N,T>(y); }
+SINTU Vec<N,M<T>> operator==(const Vec<N,T>& x, U y) { return x == Vec<N,T>(y); }
+SINTU Vec<N,M<T>> operator!=(const Vec<N,T>& x, U y) { return x != Vec<N,T>(y); }
+SINTU Vec<N,M<T>> operator<=(const Vec<N,T>& x, U y) { return x <= Vec<N,T>(y); }
+SINTU Vec<N,M<T>> operator>=(const Vec<N,T>& x, U y) { return x >= Vec<N,T>(y); }
+SINTU Vec<N,M<T>> operator< (const Vec<N,T>& x, U y) { return x <  Vec<N,T>(y); }
+SINTU Vec<N,M<T>> operator> (const Vec<N,T>& x, U y) { return x >  Vec<N,T>(y); }
+
+SINT Vec<N,T>& operator+=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x + y); }
+SINT Vec<N,T>& operator-=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x - y); }
+SINT Vec<N,T>& operator*=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x * y); }
+SINT Vec<N,T>& operator/=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x / y); }
+SINT Vec<N,T>& operator^=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x ^ y); }
+SINT Vec<N,T>& operator&=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x & y); }
+SINT Vec<N,T>& operator|=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x | y); }
+
+SINTU Vec<N,T>& operator+=(Vec<N,T>& x, U y) { return (x = x + Vec<N,T>(y)); }
+SINTU Vec<N,T>& operator-=(Vec<N,T>& x, U y) { return (x = x - Vec<N,T>(y)); }
+SINTU Vec<N,T>& operator*=(Vec<N,T>& x, U y) { return (x = x * Vec<N,T>(y)); }
+SINTU Vec<N,T>& operator/=(Vec<N,T>& x, U y) { return (x = x / Vec<N,T>(y)); }
+SINTU Vec<N,T>& operator^=(Vec<N,T>& x, U y) { return (x = x ^ Vec<N,T>(y)); }
+SINTU Vec<N,T>& operator&=(Vec<N,T>& x, U y) { return (x = x & Vec<N,T>(y)); }
+SINTU Vec<N,T>& operator|=(Vec<N,T>& x, U y) { return (x = x | Vec<N,T>(y)); }
+
+SINT Vec<N,T>& operator<<=(Vec<N,T>& x, int bits) { return (x = x << bits); }
+SINT Vec<N,T>& operator>>=(Vec<N,T>& x, int bits) { return (x = x >> bits); }
+
+// Some operations we want are not expressible with Clang/GCC vector extensions.
+
+// Clang can reason about naive_if_then_else() and optimize through it better
+// than if_then_else(), so it's sometimes useful to call it directly when we
+// think an entire expression should optimize away, e.g. min()/max().
+SINT Vec<N,T> naive_if_then_else(const Vec<N,M<T>>& cond, const Vec<N,T>& t, const Vec<N,T>& e) {
+    return bit_pun<Vec<N,T>>(( cond & bit_pun<Vec<N, M<T>>>(t)) |
+                             (~cond & bit_pun<Vec<N, M<T>>>(e)) );
+}
+
+SIT Vec<1,T> if_then_else(const Vec<1,M<T>>& cond, const Vec<1,T>& t, const Vec<1,T>& e) {
+    // In practice this scalar implementation is unlikely to be used.  See next if_then_else().
+    return bit_pun<Vec<1,T>>(( cond & bit_pun<Vec<1, M<T>>>(t)) |
+                             (~cond & bit_pun<Vec<1, M<T>>>(e)) );
+}
+SINT Vec<N,T> if_then_else(const Vec<N,M<T>>& cond, const Vec<N,T>& t, const Vec<N,T>& e) {
+    // Specializations inline here so they can generalize what types the apply to.
+    // (This header is used in C++14 contexts, so we have to kind of fake constexpr if.)
+#if SKVX_USE_SIMD && defined(__AVX2__)
+    if /*constexpr*/ (N*sizeof(T) == 32) {
+        return unchecked_bit_pun<Vec<N,T>>(_mm256_blendv_epi8(unchecked_bit_pun<__m256i>(e),
+                                                              unchecked_bit_pun<__m256i>(t),
+                                                              unchecked_bit_pun<__m256i>(cond)));
+    }
+#endif
+#if SKVX_USE_SIMD && defined(__SSE4_1__)
+    if /*constexpr*/ (N*sizeof(T) == 16) {
+        return unchecked_bit_pun<Vec<N,T>>(_mm_blendv_epi8(unchecked_bit_pun<__m128i>(e),
+                                                           unchecked_bit_pun<__m128i>(t),
+                                                           unchecked_bit_pun<__m128i>(cond)));
+    }
+#endif
+#if SKVX_USE_SIMD && defined(__ARM_NEON)
+    if /*constexpr*/ (N*sizeof(T) == 16) {
+        return unchecked_bit_pun<Vec<N,T>>(vbslq_u8(unchecked_bit_pun<uint8x16_t>(cond),
+                                                    unchecked_bit_pun<uint8x16_t>(t),
+                                                    unchecked_bit_pun<uint8x16_t>(e)));
+    }
+#endif
+    // Recurse for large vectors to try to hit the specializations above.
+    if /*constexpr*/ (N*sizeof(T) > 16) {
+        return join(if_then_else(cond.lo, t.lo, e.lo),
+                    if_then_else(cond.hi, t.hi, e.hi));
+    }
+    // This default can lead to better code than the recursing onto scalars.
+    return naive_if_then_else(cond, t, e);
+}
+
+SIT  bool any(const Vec<1,T>& x) { return x.val != 0; }
+SINT bool any(const Vec<N,T>& x) {
+#if SKVX_USE_SIMD && defined(__wasm_simd128__)
+    if constexpr (N == 4 && sizeof(T) == 4) {
+        return wasm_i32x4_any_true(unchecked_bit_pun<VExt<4,int>>(x));
+    }
+#endif
+    return any(x.lo)
+        || any(x.hi);
+}
+
+SIT  bool all(const Vec<1,T>& x) { return x.val != 0; }
+SINT bool all(const Vec<N,T>& x) {
+#if SKVX_USE_SIMD && defined(__AVX2__)
+    if /*constexpr*/ (N*sizeof(T) == 32) {
+        return _mm256_testc_si256(unchecked_bit_pun<__m256i>(x),
+                                  _mm256_set1_epi32(-1));
+    }
+#endif
+#if SKVX_USE_SIMD && defined(__SSE4_1__)
+    if /*constexpr*/ (N*sizeof(T) == 16) {
+        return _mm_testc_si128(unchecked_bit_pun<__m128i>(x),
+                               _mm_set1_epi32(-1));
+    }
+#endif
+#if SKVX_USE_SIMD && defined(__wasm_simd128__)
+    if /*constexpr*/ (N == 4 && sizeof(T) == 4) {
+        return wasm_i32x4_all_true(unchecked_bit_pun<VExt<4,int>>(x));
+    }
+#endif
+    return all(x.lo)
+        && all(x.hi);
+}
+
+// cast() Vec<N,S> to Vec<N,D>, as if applying a C-cast to each lane.
+// TODO: implement with map()?
+template <typename D, typename S>
+SI Vec<1,D> cast(const Vec<1,S>& src) { return (D)src.val; }
+
+template <typename D, int N, typename S>
+SI Vec<N,D> cast(const Vec<N,S>& src) {
+#if SKVX_USE_SIMD && defined(__clang__)
+    return to_vec(__builtin_convertvector(to_vext(src), VExt<N,D>));
+#else
+    return join(cast<D>(src.lo), cast<D>(src.hi));
+#endif
+}
+
+// min/max match logic of std::min/std::max, which is important when NaN is involved.
+SIT  T min(const Vec<1,T>& x) { return x.val; }
+SIT  T max(const Vec<1,T>& x) { return x.val; }
+SINT T min(const Vec<N,T>& x) { return std::min(min(x.lo), min(x.hi)); }
+SINT T max(const Vec<N,T>& x) { return std::max(max(x.lo), max(x.hi)); }
+
+SINT Vec<N,T> min(const Vec<N,T>& x, const Vec<N,T>& y) { return naive_if_then_else(y < x, y, x); }
+SINT Vec<N,T> max(const Vec<N,T>& x, const Vec<N,T>& y) { return naive_if_then_else(x < y, y, x); }
+
+SINTU Vec<N,T> min(const Vec<N,T>& x, U y) { return min(x, Vec<N,T>(y)); }
+SINTU Vec<N,T> max(const Vec<N,T>& x, U y) { return max(x, Vec<N,T>(y)); }
+SINTU Vec<N,T> min(U x, const Vec<N,T>& y) { return min(Vec<N,T>(x), y); }
+SINTU Vec<N,T> max(U x, const Vec<N,T>& y) { return max(Vec<N,T>(x), y); }
+
+// pin matches the logic of SkTPin, which is important when NaN is involved. It always returns
+// values in the range lo..hi, and if x is NaN, it returns lo.
+SINT Vec<N,T> pin(const Vec<N,T>& x, const Vec<N,T>& lo, const Vec<N,T>& hi) {
+    return max(lo, min(x, hi));
+}
+
+// Shuffle values from a vector pretty arbitrarily:
+//    skvx::Vec<4,float> rgba = {R,G,B,A};
+//    shuffle<2,1,0,3>        (rgba) ~> {B,G,R,A}
+//    shuffle<2,1>            (rgba) ~> {B,G}
+//    shuffle<2,1,2,1,2,1,2,1>(rgba) ~> {B,G,B,G,B,G,B,G}
+//    shuffle<3,3,3,3>        (rgba) ~> {A,A,A,A}
+// The only real restriction is that the output also be a legal N=power-of-two sknx::Vec.
+template <int... Ix, int N, typename T>
+SI Vec<sizeof...(Ix),T> shuffle(const Vec<N,T>& x) {
+#if SKVX_USE_SIMD && defined(__clang__)
+    // TODO: can we just always use { x[Ix]... }?
+    return to_vec<sizeof...(Ix),T>(__builtin_shufflevector(to_vext(x), to_vext(x), Ix...));
+#else
+    return { x[Ix]... };
+#endif
+}
+
+// Call map(fn, x) for a vector with fn() applied to each lane of x, { fn(x[0]), fn(x[1]), ... },
+// or map(fn, x,y) for a vector of fn(x[i], y[i]), etc.
+
+template <typename Fn, typename... Args, size_t... I>
+SI auto map(std::index_sequence<I...>,
+            Fn&& fn, const Args&... args) -> skvx::Vec<sizeof...(I), decltype(fn(args[0]...))> {
+    auto lane = [&](size_t i)
+#if defined(__clang__)
+    // CFI, specifically -fsanitize=cfi-icall, seems to give a false positive here,
+    // with errors like "control flow integrity check for type 'float (float)
+    // noexcept' failed during indirect function call... note: sqrtf.cfi_jt defined
+    // here".  But we can be quite sure fn is the right type: it's all inferred!
+    // So, stifle CFI in this function.
+    __attribute__((no_sanitize("cfi")))
+#endif
+    { return fn(args[i]...); };
+
+    return { lane(I)... };
+}
+
+template <typename Fn, int N, typename T, typename... Rest>
+auto map(Fn&& fn, const Vec<N,T>& first, const Rest&... rest) {
+    // Derive an {0...N-1} index_sequence from the size of the first arg: N lanes in, N lanes out.
+    return map(std::make_index_sequence<N>{}, fn, first,rest...);
+}
+
+SIN Vec<N,float>  ceil(const Vec<N,float>& x) { return map( ceilf, x); }
+SIN Vec<N,float> floor(const Vec<N,float>& x) { return map(floorf, x); }
+SIN Vec<N,float> trunc(const Vec<N,float>& x) { return map(truncf, x); }
+SIN Vec<N,float> round(const Vec<N,float>& x) { return map(roundf, x); }
+SIN Vec<N,float>  sqrt(const Vec<N,float>& x) { return map( sqrtf, x); }
+SIN Vec<N,float>   abs(const Vec<N,float>& x) { return map( fabsf, x); }
+SIN Vec<N,float>   fma(const Vec<N,float>& x,
+                       const Vec<N,float>& y,
+                       const Vec<N,float>& z) {
+    // I don't understand why Clang's codegen is terrible if we write map(fmaf, x,y,z) directly.
+    auto fn = [](float x, float y, float z) { return fmaf(x,y,z); };
+    return map(fn, x,y,z);
+}
+
+SI Vec<1,int> lrint(const Vec<1,float>& x) {
+    return (int)lrintf(x.val);
+}
+SIN Vec<N,int> lrint(const Vec<N,float>& x) {
+#if SKVX_USE_SIMD && defined(__AVX__)
+    if /*constexpr*/ (N == 8) {
+        return unchecked_bit_pun<Vec<N,int>>(_mm256_cvtps_epi32(unchecked_bit_pun<__m256>(x)));
+    }
+#endif
+#if SKVX_USE_SIMD && defined(__SSE__)
+    if /*constexpr*/ (N == 4) {
+        return unchecked_bit_pun<Vec<N,int>>(_mm_cvtps_epi32(unchecked_bit_pun<__m128>(x)));
+    }
+#endif
+    return join(lrint(x.lo),
+                lrint(x.hi));
+}
+
+SIN Vec<N,float> fract(const Vec<N,float>& x) { return x - floor(x); }
+
+// The default logic for to_half/from_half is borrowed from skcms,
+// and assumes inputs are finite and treat/flush denorm half floats as/to zero.
+// Key constants to watch for:
+//    - a float is 32-bit, 1-8-23 sign-exponent-mantissa, with 127 exponent bias;
+//    - a half  is 16-bit, 1-5-10 sign-exponent-mantissa, with  15 exponent bias.
+SIN Vec<N,uint16_t> to_half_finite_ftz(const Vec<N,float>& x) {
+    Vec<N,uint32_t> sem = bit_pun<Vec<N,uint32_t>>(x),
+                    s   = sem & 0x8000'0000,
+                     em = sem ^ s,
+              is_denorm =  em < 0x3880'0000;
+    return cast<uint16_t>(if_then_else(is_denorm, Vec<N,uint32_t>(0)
+                                                , (s>>16) + (em>>13) - ((127-15)<<10)));
+}
+SIN Vec<N,float> from_half_finite_ftz(const Vec<N,uint16_t>& x) {
+    Vec<N,uint32_t> wide = cast<uint32_t>(x),
+                      s  = wide & 0x8000,
+                      em = wide ^ s;
+    auto is_denorm = bit_pun<Vec<N,int32_t>>(em < 0x0400);
+    return if_then_else(is_denorm, Vec<N,float>(0)
+                                 , bit_pun<Vec<N,float>>( (s<<16) + (em<<13) + ((127-15)<<23) ));
+}
+
+// Like if_then_else(), these N=1 base cases won't actually be used unless explicitly called.
+SI Vec<1,uint16_t> to_half(const Vec<1,float>&    x) { return   to_half_finite_ftz(x); }
+SI Vec<1,float>  from_half(const Vec<1,uint16_t>& x) { return from_half_finite_ftz(x); }
+
+SIN Vec<N,uint16_t> to_half(const Vec<N,float>& x) {
+#if SKVX_USE_SIMD && defined(__F16C__)
+    if /*constexpr*/ (N == 8) {
+        return unchecked_bit_pun<Vec<N,uint16_t>>(_mm256_cvtps_ph(unchecked_bit_pun<__m256>(x),
+                                                                  _MM_FROUND_CUR_DIRECTION));
+    }
+#endif
+#if SKVX_USE_SIMD && defined(__aarch64__)
+    if /*constexpr*/ (N == 4) {
+        return unchecked_bit_pun<Vec<N,uint16_t>>(vcvt_f16_f32(unchecked_bit_pun<float32x4_t>(x)));
+
+    }
+#endif
+    if /*constexpr*/ (N > 4) {
+        return join(to_half(x.lo),
+                    to_half(x.hi));
+    }
+    return to_half_finite_ftz(x);
+}
+
+SIN Vec<N,float> from_half(const Vec<N,uint16_t>& x) {
+#if SKVX_USE_SIMD && defined(__F16C__)
+    if /*constexpr*/ (N == 8) {
+        return unchecked_bit_pun<Vec<N,float>>(_mm256_cvtph_ps(unchecked_bit_pun<__m128i>(x)));
+    }
+#endif
+#if SKVX_USE_SIMD && defined(__aarch64__)
+    if /*constexpr*/ (N == 4) {
+        return unchecked_bit_pun<Vec<N,float>>(vcvt_f32_f16(unchecked_bit_pun<float16x4_t>(x)));
+    }
+#endif
+    if /*constexpr*/ (N > 4) {
+        return join(from_half(x.lo),
+                    from_half(x.hi));
+    }
+    return from_half_finite_ftz(x);
+}
+
+// div255(x) = (x + 127) / 255 is a bit-exact rounding divide-by-255, packing down to 8-bit.
+SIN Vec<N,uint8_t> div255(const Vec<N,uint16_t>& x) {
+    return cast<uint8_t>( (x+127)/255 );
+}
+
+// approx_scale(x,y) approximates div255(cast<uint16_t>(x)*cast<uint16_t>(y)) within a bit,
+// and is always perfect when x or y is 0 or 255.
+SIN Vec<N,uint8_t> approx_scale(const Vec<N,uint8_t>& x, const Vec<N,uint8_t>& y) {
+    // All of (x*y+x)/256, (x*y+y)/256, and (x*y+255)/256 meet the criteria above.
+    // We happen to have historically picked (x*y+x)/256.
+    auto X = cast<uint16_t>(x),
+         Y = cast<uint16_t>(y);
+    return cast<uint8_t>( (X*Y+X)/256 );
+}
+
+// The ScaledDividerU32 takes a divisor > 1, and creates a function divide(numerator) that
+// calculates a numerator / denominator. For this to be rounded properly, numerator should have
+// half added in:
+// divide(numerator + half) == floor(numerator/denominator + 1/2).
+//
+// This gives an answer within +/- 1 from the true value.
+//
+// Derivation of half:
+//    numerator/denominator + 1/2 = (numerator + half) / d
+//    numerator + denominator / 2 = numerator + half
+//    half = denominator / 2.
+//
+// Because half is divided by 2, that division must also be rounded.
+//    half == denominator / 2 = (denominator + 1) / 2.
+//
+// The divisorFactor is just a scaled value:
+//    divisorFactor = (1 / divisor) * 2 ^ 32.
+// The maximum that can be divided and rounded is UINT_MAX - half.
+class ScaledDividerU32 {
+public:
+    explicit ScaledDividerU32(uint32_t divisor)
+            : fDivisorFactor{(uint32_t)(std::round((1.0 / divisor) * (1ull << 32)))}
+            , fHalf{(divisor + 1) >> 1} {
+        assert(divisor > 1);
+    }
+
+    Vec<4, uint32_t> divide(const Vec<4, uint32_t>& numerator) const {
+#if SKVX_USE_SIMD && defined(__ARM_NEON)
+        uint64x2_t hi = vmull_n_u32(vget_high_u32(to_vext(numerator)), fDivisorFactor);
+        uint64x2_t lo = vmull_n_u32(vget_low_u32(to_vext(numerator)),  fDivisorFactor);
+
+        return to_vec<4, uint32_t>(vcombine_u32(vshrn_n_u64(lo,32), vshrn_n_u64(hi,32)));
+#else
+        return cast<uint32_t>((cast<uint64_t>(numerator) * fDivisorFactor) >> 32);
+#endif
+    }
+
+    uint32_t half() const { return fHalf; }
+
+private:
+    const uint32_t fDivisorFactor;
+    const uint32_t fHalf;
+};
+
+#if SKVX_USE_SIMD && defined(__ARM_NEON)
+// With NEON we can do eight u8*u8 -> u16 in one instruction, vmull_u8 (read, mul-long).
+SI Vec<8,uint16_t> mull(const Vec<8,uint8_t>& x,
+                        const Vec<8,uint8_t>& y) {
+    return to_vec<8,uint16_t>(vmull_u8(to_vext(x),
+                                        to_vext(y)));
+}
+
+SIN std::enable_if_t<(N < 8), Vec<N,uint16_t>> mull(const Vec<N,uint8_t>& x,
+                                                    const Vec<N,uint8_t>& y) {
+    // N < 8 --> double up data until N == 8, returning the part we need.
+    return mull(join(x,x),
+                join(y,y)).lo;
+}
+
+SIN std::enable_if_t<(N > 8), Vec<N,uint16_t>> mull(const Vec<N,uint8_t>& x,
+                                                    const Vec<N,uint8_t>& y) {
+    // N > 8 --> usual join(lo,hi) strategy to recurse down to N == 8.
+    return join(mull(x.lo, y.lo),
+                mull(x.hi, y.hi));
+}
+
+#else
+
+// Nothing special when we don't have NEON... just cast up to 16-bit and multiply.
+SIN Vec<N,uint16_t> mull(const Vec<N,uint8_t>& x,
+                            const Vec<N,uint8_t>& y) {
+    return cast<uint16_t>(x)
+            * cast<uint16_t>(y);
+}
+#endif
+
+// Allow floating point contraction. e.g., allow a*x + y to be compiled to a single FMA even though
+// it introduces LSB differences on platforms that don't have an FMA instruction.
+#if defined(__clang__)
+#pragma STDC FP_CONTRACT ON
+#endif
+
+// Approximates the inverse cosine of x within 0.96 degrees using the rational polynomial:
+//
+//     acos(x) ~= (bx^3 + ax) / (dx^4 + cx^2 + 1) + pi/2
+//
+// See: https://stackoverflow.com/a/36387954
+//
+// For a proof of max error, see the "SkVx_approx_acos" unit test.
+//
+// NOTE: This function deviates immediately from pi and 0 outside -1 and 1. (The derivatives are
+// infinite at -1 and 1). So the input must still be clamped between -1 and 1.
+#define SKVX_APPROX_ACOS_MAX_ERROR SkDegreesToRadians(.96f)
+SIN Vec<N,float> approx_acos(Vec<N,float> x) {
+    constexpr static float a = -0.939115566365855f;
+    constexpr static float b =  0.9217841528914573f;
+    constexpr static float c = -1.2845906244690837f;
+    constexpr static float d =  0.295624144969963174f;
+    constexpr static float pi_over_2 = 1.5707963267948966f;
+    auto xx = x*x;
+    auto numer = b*xx + a;
+    auto denom = xx*(d*xx + c) + 1;
+    return x * (numer/denom) + pi_over_2;
+}
+
+#if defined(__clang__)
+#pragma STDC FP_CONTRACT DEFAULT
+#endif
+
+// De-interleaving load of 4 vectors.
+//
+// WARNING: These are really only supported well on NEON. Consider restructuring your data before
+// resorting to these methods.
+SIT void strided_load4(const T* v,
+                       skvx::Vec<1,T>& a,
+                       skvx::Vec<1,T>& b,
+                       skvx::Vec<1,T>& c,
+                       skvx::Vec<1,T>& d) {
+    a.val = v[0];
+    b.val = v[1];
+    c.val = v[2];
+    d.val = v[3];
+}
+SINT void strided_load4(const T* v,
+                        skvx::Vec<N,T>& a,
+                        skvx::Vec<N,T>& b,
+                        skvx::Vec<N,T>& c,
+                        skvx::Vec<N,T>& d) {
+    strided_load4(v, a.lo, b.lo, c.lo, d.lo);
+    strided_load4(v + 4*(N/2), a.hi, b.hi, c.hi, d.hi);
+}
+#if SKVX_USE_SIMD && defined(__ARM_NEON)
+#define IMPL_LOAD4_TRANSPOSED(N, T, VLD) \
+SI void strided_load4(const T* v, \
+                      skvx::Vec<N,T>& a, \
+                      skvx::Vec<N,T>& b, \
+                      skvx::Vec<N,T>& c, \
+                      skvx::Vec<N,T>& d) { \
+    auto mat = VLD(v); \
+    a = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[0]); \
+    b = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[1]); \
+    c = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[2]); \
+    d = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[3]); \
+}
+IMPL_LOAD4_TRANSPOSED(2, uint32_t, vld4_u32);
+IMPL_LOAD4_TRANSPOSED(4, uint16_t, vld4_u16);
+IMPL_LOAD4_TRANSPOSED(8, uint8_t, vld4_u8);
+IMPL_LOAD4_TRANSPOSED(2, int32_t, vld4_s32);
+IMPL_LOAD4_TRANSPOSED(4, int16_t, vld4_s16);
+IMPL_LOAD4_TRANSPOSED(8, int8_t, vld4_s8);
+IMPL_LOAD4_TRANSPOSED(2, float, vld4_f32);
+IMPL_LOAD4_TRANSPOSED(4, uint32_t, vld4q_u32);
+IMPL_LOAD4_TRANSPOSED(8, uint16_t, vld4q_u16);
+IMPL_LOAD4_TRANSPOSED(16, uint8_t, vld4q_u8);
+IMPL_LOAD4_TRANSPOSED(4, int32_t, vld4q_s32);
+IMPL_LOAD4_TRANSPOSED(8, int16_t, vld4q_s16);
+IMPL_LOAD4_TRANSPOSED(16, int8_t, vld4q_s8);
+IMPL_LOAD4_TRANSPOSED(4, float, vld4q_f32);
+#undef IMPL_LOAD4_TRANSPOSED
+
+#elif SKVX_USE_SIMD && defined(__SSE__)
+
+SI void strided_load4(const float* v,
+                      Vec<4,float>& a,
+                      Vec<4,float>& b,
+                      Vec<4,float>& c,
+                      Vec<4,float>& d) {
+    using skvx::bit_pun;
+    __m128 a_ = _mm_loadu_ps(v);
+    __m128 b_ = _mm_loadu_ps(v+4);
+    __m128 c_ = _mm_loadu_ps(v+8);
+    __m128 d_ = _mm_loadu_ps(v+12);
+    _MM_TRANSPOSE4_PS(a_, b_, c_, d_);
+    a = bit_pun<Vec<4,float>>(a_);
+    b = bit_pun<Vec<4,float>>(b_);
+    c = bit_pun<Vec<4,float>>(c_);
+    d = bit_pun<Vec<4,float>>(d_);
+}
+#endif
+
+// De-interleaving load of 2 vectors.
+//
+// WARNING: These are really only supported well on NEON. Consider restructuring your data before
+// resorting to these methods.
+SIT void strided_load2(const T* v, skvx::Vec<1,T>& a, skvx::Vec<1,T>& b) {
+    a.val = v[0];
+    b.val = v[1];
+}
+SINT void strided_load2(const T* v, skvx::Vec<N,T>& a, skvx::Vec<N,T>& b) {
+    strided_load2(v, a.lo, b.lo);
+    strided_load2(v + 2*(N/2), a.hi, b.hi);
+}
+#if SKVX_USE_SIMD && defined(__ARM_NEON)
+#define IMPL_LOAD2_TRANSPOSED(N, T, VLD) \
+SI void strided_load2(const T* v, skvx::Vec<N,T>& a, skvx::Vec<N,T>& b) { \
+    auto mat = VLD(v); \
+    a = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[0]); \
+    b = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[1]); \
+}
+IMPL_LOAD2_TRANSPOSED(2, uint32_t, vld2_u32);
+IMPL_LOAD2_TRANSPOSED(4, uint16_t, vld2_u16);
+IMPL_LOAD2_TRANSPOSED(8, uint8_t, vld2_u8);
+IMPL_LOAD2_TRANSPOSED(2, int32_t, vld2_s32);
+IMPL_LOAD2_TRANSPOSED(4, int16_t, vld2_s16);
+IMPL_LOAD2_TRANSPOSED(8, int8_t, vld2_s8);
+IMPL_LOAD2_TRANSPOSED(2, float, vld2_f32);
+IMPL_LOAD2_TRANSPOSED(4, uint32_t, vld2q_u32);
+IMPL_LOAD2_TRANSPOSED(8, uint16_t, vld2q_u16);
+IMPL_LOAD2_TRANSPOSED(16, uint8_t, vld2q_u8);
+IMPL_LOAD2_TRANSPOSED(4, int32_t, vld2q_s32);
+IMPL_LOAD2_TRANSPOSED(8, int16_t, vld2q_s16);
+IMPL_LOAD2_TRANSPOSED(16, int8_t, vld2q_s8);
+IMPL_LOAD2_TRANSPOSED(4, float, vld2q_f32);
+#undef IMPL_LOAD2_TRANSPOSED
+#endif
+
+}  // namespace skvx
+
+#undef SINTU
+#undef SINT
+#undef SIN
+#undef SIT
+#undef SI
+#undef SKVX_ALWAYS_INLINE
+#undef SKVX_USE_SIMD
+
+#endif//SKVX_DEFINED
diff --git a/src/deps/skia/include/private/SkWeakRefCnt.h b/src/deps/skia/include/private/SkWeakRefCnt.h
new file mode 100644
index 000000000..2b577342f
--- /dev/null
+++ b/src/deps/skia/include/private/SkWeakRefCnt.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright 2012 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkWeakRefCnt_DEFINED
+#define SkWeakRefCnt_DEFINED
+
+#include "include/core/SkRefCnt.h"
+#include <atomic>
+
+/** \class SkWeakRefCnt
+
+    SkWeakRefCnt is the base class for objects that may be shared by multiple
+    objects. When an existing strong owner wants to share a reference, it calls
+    ref(). When a strong owner wants to release its reference, it calls
+    unref(). When the shared object's strong reference count goes to zero as
+    the result of an unref() call, its (virtual) weak_dispose method is called.
+    It is an error for the destructor to be called explicitly (or via the
+    object going out of scope on the stack or calling delete) if
+    getRefCnt() > 1.
+
+    In addition to strong ownership, an owner may instead obtain a weak
+    reference by calling weak_ref(). A call to weak_ref() must be balanced by a
+    call to weak_unref(). To obtain a strong reference from a weak reference,
+    call try_ref(). If try_ref() returns true, the owner's pointer is now also
+    a strong reference on which unref() must be called. Note that this does not
+    affect the original weak reference, weak_unref() must still be called. When
+    the weak reference count goes to zero, the object is deleted. While the
+    weak reference count is positive and the strong reference count is zero the
+    object still exists, but will be in the disposed state. It is up to the
+    object to define what this means.
+
+    Note that a strong reference implicitly implies a weak reference. As a
+    result, it is allowable for the owner of a strong ref to call try_ref().
+    This will have the same effect as calling ref(), but may be more expensive.
+
+    Example:
+
+    SkWeakRefCnt myRef = strongRef.weak_ref();
+    ... // strongRef.unref() may or may not be called
+    if (myRef.try_ref()) {
+        ... // use myRef
+        myRef.unref();
+    } else {
+        // myRef is in the disposed state
+    }
+    myRef.weak_unref();
+*/
+class SK_API SkWeakRefCnt : public SkRefCnt {
+public:
+    /** Default construct, initializing the reference counts to 1.
+        The strong references collectively hold one weak reference. When the
+        strong reference count goes to zero, the collectively held weak
+        reference is released.
+    */
+    SkWeakRefCnt() : SkRefCnt(), fWeakCnt(1) {}
+
+    /** Destruct, asserting that the weak reference count is 1.
+    */
+    ~SkWeakRefCnt() override {
+#ifdef SK_DEBUG
+        SkASSERT(getWeakCnt() == 1);
+        fWeakCnt.store(0, std::memory_order_relaxed);
+#endif
+    }
+
+#ifdef SK_DEBUG
+    /** Return the weak reference count. */
+    int32_t getWeakCnt() const {
+        return fWeakCnt.load(std::memory_order_relaxed);
+    }
+#endif
+
+private:
+    /** If fRefCnt is 0, returns 0.
+     *  Otherwise increments fRefCnt, acquires, and returns the old value.
+     */
+    int32_t atomic_conditional_acquire_strong_ref() const {
+        int32_t prev = fRefCnt.load(std::memory_order_relaxed);
+        do {
+            if (0 == prev) {
+                break;
+            }
+        } while(!fRefCnt.compare_exchange_weak(prev, prev+1, std::memory_order_acquire,
+                                                             std::memory_order_relaxed));
+        return prev;
+    }
+
+public:
+    /** Creates a strong reference from a weak reference, if possible. The
+        caller must already be an owner. If try_ref() returns true the owner
+        is in posession of an additional strong reference. Both the original
+        reference and new reference must be properly unreferenced. If try_ref()
+        returns false, no strong reference could be created and the owner's
+        reference is in the same state as before the call.
+    */
+    bool SK_WARN_UNUSED_RESULT try_ref() const {
+        if (atomic_conditional_acquire_strong_ref() != 0) {
+            // Acquire barrier (L/SL), if not provided above.
+            // Prevents subsequent code from happening before the increment.
+            return true;
+        }
+        return false;
+    }
+
+    /** Increment the weak reference count. Must be balanced by a call to
+        weak_unref().
+    */
+    void weak_ref() const {
+        SkASSERT(getRefCnt() > 0);
+        SkASSERT(getWeakCnt() > 0);
+        // No barrier required.
+        (void)fWeakCnt.fetch_add(+1, std::memory_order_relaxed);
+    }
+
+    /** Decrement the weak reference count. If the weak reference count is 1
+        before the decrement, then call delete on the object. Note that if this
+        is the case, then the object needs to have been allocated via new, and
+        not on the stack.
+    */
+    void weak_unref() const {
+        SkASSERT(getWeakCnt() > 0);
+        // A release here acts in place of all releases we "should" have been doing in ref().
+        if (1 == fWeakCnt.fetch_add(-1, std::memory_order_acq_rel)) {
+            // Like try_ref(), the acquire is only needed on success, to make sure
+            // code in internal_dispose() doesn't happen before the decrement.
+#ifdef SK_DEBUG
+            // so our destructor won't complain
+            fWeakCnt.store(1, std::memory_order_relaxed);
+#endif
+            this->INHERITED::internal_dispose();
+        }
+    }
+
+    /** Returns true if there are no strong references to the object. When this
+        is the case all future calls to try_ref() will return false.
+    */
+    bool weak_expired() const {
+        return fRefCnt.load(std::memory_order_relaxed) == 0;
+    }
+
+protected:
+    /** Called when the strong reference count goes to zero. This allows the
+        object to free any resources it may be holding. Weak references may
+        still exist and their level of allowed access to the object is defined
+        by the object's class.
+    */
+    virtual void weak_dispose() const {
+    }
+
+private:
+    /** Called when the strong reference count goes to zero. Calls weak_dispose
+        on the object and releases the implicit weak reference held
+        collectively by the strong references.
+    */
+    void internal_dispose() const override {
+        weak_dispose();
+        weak_unref();
+    }
+
+    /* Invariant: fWeakCnt = #weak + (fRefCnt > 0 ? 1 : 0) */
+    mutable std::atomic<int32_t> fWeakCnt;
+
+    using INHERITED = SkRefCnt;
+};
+
+#endif
diff --git a/src/deps/skia/include/private/chromium/BUILD.bazel b/src/deps/skia/include/private/chromium/BUILD.bazel
new file mode 100644
index 000000000..8633eae54
--- /dev/null
+++ b/src/deps/skia/include/private/chromium/BUILD.bazel
@@ -0,0 +1,22 @@
+load("//bazel:macros.bzl", "generated_cc_atom")
+
+generated_cc_atom(
+    name = "GrSlug_hdr",
+    hdrs = ["GrSlug.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        "//include/core:SkRect_hdr",
+        "//include/core:SkRefCnt_hdr",
+    ],
+)
+
+generated_cc_atom(
+    name = "SkChromeRemoteGlyphCache_hdr",
+    hdrs = ["SkChromeRemoteGlyphCache.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        "//include/core:SkData_hdr",
+        "//include/core:SkRefCnt_hdr",
+        "//include/utils:SkNoDrawCanvas_hdr",
+    ],
+)
diff --git a/src/deps/skia/include/private/chromium/GrSlug.h b/src/deps/skia/include/private/chromium/GrSlug.h
new file mode 100644
index 000000000..8adbff45c
--- /dev/null
+++ b/src/deps/skia/include/private/chromium/GrSlug.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrSlug_DEFINED
+#define GrSlug_DEFINED
+
+#include "include/core/SkRect.h"
+#include "include/core/SkRefCnt.h"
+
+class SkCanvas;
+class SkPaint;
+class SkTextBlob;
+
+// You can use GrSlug to simulate drawTextBlob by defining the following at compile time.
+//    SK_EXPERIMENTAL_SIMULATE_DRAWGLYPHRUNLIST_WITH_SLUG
+// For Skia, add this to your args.gn file.
+//    extra_cflags = ["-D", "SK_EXPERIMENTAL_SIMULATE_DRAWGLYPHRUNLIST_WITH_SLUG"]
+
+// GrSlug encapsulates an SkTextBlob at a specific origin, using a specific paint. It can be
+// manipulated using matrix and clip changes to the canvas. If the canvas is transformed, then
+// the GrSlug will also transform with smaller glyphs using bi-linear interpolation to render. You
+// can think of a GrSlug as making a rubber stamp out of a SkTextBlob.
+class SK_API GrSlug : public SkRefCnt {
+public:
+    ~GrSlug() override;
+    // Return nullptr if the blob would not draw. This is not because of clipping, but because of
+    // some paint optimization. The GrSlug is captured as if drawn using drawTextBlob.
+    static sk_sp<GrSlug> ConvertBlob(
+            SkCanvas* canvas, const SkTextBlob& blob, SkPoint origin, const SkPaint& paint);
+
+    // Draw the GrSlug obeying the canvas's mapping and clipping.
+    void draw(SkCanvas* canvas);
+
+    virtual SkRect sourceBounds() const = 0;
+    virtual const SkPaint& paint() const = 0;
+};
+#endif  // GrSlug_DEFINED
diff --git a/src/deps/skia/include/private/chromium/SkChromeRemoteGlyphCache.h b/src/deps/skia/include/private/chromium/SkChromeRemoteGlyphCache.h
new file mode 100644
index 000000000..033b03fe6
--- /dev/null
+++ b/src/deps/skia/include/private/chromium/SkChromeRemoteGlyphCache.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2021 Google LLC.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkChromeRemoteGlyphCache_DEFINED
+#define SkChromeRemoteGlyphCache_DEFINED
+
+#include <memory>
+#include <vector>
+
+#include "include/core/SkData.h"
+#include "include/core/SkRefCnt.h"
+#include "include/utils/SkNoDrawCanvas.h"
+
+class SkAutoDescriptor;
+struct SkPackedGlyphID;
+class SkStrikeCache;
+class SkStrikeClientImpl;
+class SkStrikeServer;
+class SkStrikeServerImpl;
+class SkTypeface;
+
+using SkDiscardableHandleId = uint32_t;
+// This class is not thread-safe.
+class SkStrikeServer {
+public:
+    // An interface used by the server to create handles for pinning SkStrike
+    // entries on the remote client.
+    class DiscardableHandleManager {
+    public:
+        SK_SPI virtual ~DiscardableHandleManager() = default;
+
+        // Creates a new *locked* handle and returns a unique ID that can be used to identify
+        // it on the remote client.
+        SK_SPI virtual SkDiscardableHandleId createHandle() = 0;
+
+        // Returns true if the handle could be successfully locked. The server can
+        // assume it will remain locked until the next set of serialized entries is
+        // pulled from the SkStrikeServer.
+        // If returns false, the cache entry mapped to the handle has been deleted
+        // on the client. Any subsequent attempts to lock the same handle are not
+        // allowed.
+        SK_SPI virtual bool lockHandle(SkDiscardableHandleId) = 0;
+
+        // Returns true if a handle has been deleted on the remote client. It is
+        // invalid to use a handle id again with this manager once this returns true.
+        SK_SPI virtual bool isHandleDeleted(SkDiscardableHandleId) = 0;
+    };
+
+    SK_SPI explicit SkStrikeServer(DiscardableHandleManager* discardableHandleManager);
+    SK_SPI ~SkStrikeServer();
+
+    // Create an analysis SkCanvas used to populate the SkStrikeServer with ops
+    // which will be serialized and rendered using the SkStrikeClient.
+    SK_API std::unique_ptr<SkCanvas> makeAnalysisCanvas(int width, int height,
+                                                        const SkSurfaceProps& props,
+                                                        sk_sp<SkColorSpace> colorSpace,
+                                                        bool DFTSupport);
+
+    // Serializes the typeface to be transmitted using this server.
+    SK_SPI sk_sp<SkData> serializeTypeface(SkTypeface*);
+
+    // Serializes the strike data captured using a canvas returned by ::makeAnalysisCanvas. Any
+    // handles locked using the DiscardableHandleManager will be assumed to be
+    // unlocked after this call.
+    SK_SPI void writeStrikeData(std::vector<uint8_t>* memory);
+
+    // Testing helpers
+    void setMaxEntriesInDescriptorMapForTesting(size_t count);
+    size_t remoteStrikeMapSizeForTesting() const;
+
+private:
+    SkStrikeServerImpl* impl();
+
+    std::unique_ptr<SkStrikeServerImpl> fImpl;
+};
+
+class SkStrikeClient {
+public:
+    // This enum is used in histogram reporting in chromium. Please don't re-order the list of
+    // entries, and consider it to be append-only.
+    enum CacheMissType : uint32_t {
+        // Hard failures where no fallback could be found.
+        kFontMetrics = 0,
+        kGlyphMetrics = 1,
+        kGlyphImage = 2,
+        kGlyphPath = 3,
+
+        // (DEPRECATED) The original glyph could not be found and a fallback was used.
+        kGlyphMetricsFallback = 4,
+        kGlyphPathFallback    = 5,
+
+        kLast = kGlyphPath
+    };
+
+    // An interface to delete handles that may be pinned by the remote server.
+    class DiscardableHandleManager : public SkRefCnt {
+    public:
+        ~DiscardableHandleManager() override = default;
+
+        // Returns true if the handle was unlocked and can be safely deleted. Once
+        // successful, subsequent attempts to delete the same handle are invalid.
+        virtual bool deleteHandle(SkDiscardableHandleId) = 0;
+
+        virtual void notifyCacheMiss(CacheMissType type, int fontSize) = 0;
+
+        struct ReadFailureData {
+            size_t memorySize;
+            size_t bytesRead;
+            uint64_t typefaceSize;
+            uint64_t strikeCount;
+            uint64_t glyphImagesCount;
+            uint64_t glyphPathsCount;
+        };
+        virtual void notifyReadFailure(const ReadFailureData& data) {}
+    };
+
+    SK_SPI explicit SkStrikeClient(sk_sp<DiscardableHandleManager>,
+                                   bool isLogging = true,
+                                   SkStrikeCache* strikeCache = nullptr);
+    SK_SPI ~SkStrikeClient();
+
+    // Deserializes the typeface previously serialized using the SkStrikeServer. Returns null if the
+    // data is invalid.
+    SK_SPI sk_sp<SkTypeface> deserializeTypeface(const void* data, size_t length);
+
+    // Deserializes the strike data from a SkStrikeServer. All messages generated
+    // from a server when serializing the ops must be deserialized before the op
+    // is rasterized.
+    // Returns false if the data is invalid.
+    SK_SPI bool readStrikeData(const volatile void* memory, size_t memorySize);
+
+private:
+    std::unique_ptr<SkStrikeClientImpl> fImpl;
+};
+
+// For exposure to fuzzing only.
+bool SkFuzzDeserializeSkDescriptor(sk_sp<SkData> bytes, SkAutoDescriptor* ad);
+
+#endif  // SkChromeRemoteGlyphCache_DEFINED
author	Jarred Sumner <jarred@jarredsumner.com>	2022-04-03 16:34:10 -0700
committer	Jarred Sumner <jarred@jarredsumner.com>	2022-04-03 16:34:10 -0700
commit	a87508008dfa1604baf2d4e39bf44704c00f261c (patch)
tree	0be2ade96772037a02803b30e157c367d931e3d9 /src/deps/skia/include/private
parent	4a19a3f07f1887903e5638a3be167f0c7b377ba3 (diff)
download	bun-a87508008dfa1604baf2d4e39bf44704c00f261c.tar.gz bun-a87508008dfa1604baf2d4e39bf44704c00f261c.tar.zst bun-a87508008dfa1604baf2d4e39bf44704c00f261c.zip