Update astcenc to the upstream 5.3.0 release

This is mostly a maintenance update that brings the compressor inline with the recently published Khronos Data Format Specification 1.4 release which clarified some ambiguity in the specification. This update also gives minor codec optimizations, bug fixes, and image quality improvements. The biggest improvement for Godot is that builds using MSVC cl.exe will now correctly default to the SSE2-optimized backend rather than the reference C backend. This makes compression more than 3 times faster. Builds using other compilers (GCC, LLVM/Clang) were not impacted by the underlying issue, and see no performance uplift.
2025-03-21 16:02:50 -07:00
parent 2303ce843a
commit 75ce42d463
24 changed files with 2068 additions and 813 deletions
--- a/thirdparty/astcenc/astcenc_mathlib.h
+++ b/thirdparty/astcenc/astcenc_mathlib.h
@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2024 Arm Limited
+// Copyright 2011-2025 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@ -48,7 +48,7 @@
    #define ASTCENC_SSE 42
  #elif defined(__SSE4_1__)
    #define ASTCENC_SSE 41
-  #elif defined(__SSE2__)
+  #elif defined(__SSE2__) || (defined(_M_AMD64) && !defined(_M_ARM64EC))
    #define ASTCENC_SSE 20
  #else
    #define ASTCENC_SSE 0
@ -58,25 +58,42 @@
 #ifndef ASTCENC_AVX
  #if defined(__AVX2__)
    #define ASTCENC_AVX 2
+    #define ASTCENC_X86_GATHERS 1
  #elif defined(__AVX__)
    #define ASTCENC_AVX 1
+    #define ASTCENC_X86_GATHERS 1
  #else
    #define ASTCENC_AVX 0
  #endif
 #endif

 #ifndef ASTCENC_NEON
-  #if defined(__aarch64__)
+  #if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
    #define ASTCENC_NEON 1
  #else
    #define ASTCENC_NEON 0
  #endif
 #endif

+#ifndef ASTCENC_SVE
+  #if defined(__ARM_FEATURE_SVE)
+    #if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 256
+      #define ASTCENC_SVE 8
+    // Auto-detected SVE can only assume vector width of 4 is available, but
+    // must also allow for hardware being longer and so all use of intrinsics
+    // must explicitly use predicate masks to limit to 4-wide.
+    #else
+      #define ASTCENC_SVE 4
+    #endif
+    #else
+    #define ASTCENC_SVE 0
+  #endif
+#endif
+
 // Force vector-sized SIMD alignment
-#if ASTCENC_AVX
+#if ASTCENC_AVX || ASTCENC_SVE == 8
  #define ASTCENC_VECALIGN 32
-#elif ASTCENC_SSE || ASTCENC_NEON
+#elif ASTCENC_SSE || ASTCENC_NEON || ASTCENC_SVE == 4
  #define ASTCENC_VECALIGN 16
 // Use default alignment for non-SIMD builds
 #else