public inbox for git-commits@fedoraproject.org
help / color / mirror / Atom feed
* [rpms/firefox] rawhide: Revert "add ppc64le JIT"
@ 2026-06-16 13:11
0 siblings, 0 replies; only message in thread
From: @ 2026-06-16 13:11 UTC (permalink / raw)
To: git-commits
A new commit has been pushed.
Repo : rpms/firefox
Branch : rawhide
Commit : 430380f3f64c47eba746c7e67fa2160ef8865644
Author : Dan Horák <dan@danny.cz>
Date : 2026-06-16T13:11:26+00:00
Stats : +0/-42086 in 4 file(s)
URL : https://src.fedoraproject.org/rpms/firefox/c/430380f3f64c47eba746c7e67fa2160ef8865644?branch=rawhide
Log:
Revert "add ppc64le JIT"
This reverts commit e99f0d4925ac596ad75f2ae084620d36c44a85c2.
---
diff --git a/0001-Add-VSX-instructions-for-SKIA.patch b/0001-Add-VSX-instructions-for-SKIA.patch
deleted file mode 100644
index ac3a0d8..0000000
--- a/0001-Add-VSX-instructions-for-SKIA.patch
+++ /dev/null
@@ -1,1347 +0,0 @@
-From a47c991dbbfb709134737a54e8bbe7e0b1bce800 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Trung=20L=C3=AA?= <8@tle.id.au>
-Date: Fri, 12 Jun 2026 15:23:10 +1000
-Subject: [PATCH 1/3] Add VSX instructions for SKIA
-
-Adapted from work done by Raptor Engineering for chromium's vendored
-SKIA
-
-Co-authored-by: Timothy Pearson <tpearson@raptorengineering.com>
----
- gfx/skia/skia/src/base/SkVx.h | 58 +++-
- gfx/skia/skia/src/core/SkBlitRow_D32.cpp | 98 ++++++
- gfx/skia/skia/src/core/SkBlitter_ARGB32.cpp | 268 ++++++++++++++++
- .../skia/src/opts/SkBitmapProcState_opts.h | 164 ++++++++++
- gfx/skia/skia/src/opts/SkBlitRow_opts.h | 48 +++
- .../skia/src/opts/SkRasterPipeline_opts.h | 237 ++++++++++++++
- gfx/skia/skia/src/opts/SkSwizzler_opts.inc | 289 ++++++++++++++++++
- 7 files changed, 1160 insertions(+), 2 deletions(-)
-
-diff --git a/gfx/skia/skia/src/base/SkVx.h b/gfx/skia/skia/src/base/SkVx.h
-index f87ca44d4af0..ed80c91fd38e 100644
---- a/gfx/skia/skia/src/base/SkVx.h
-+++ b/gfx/skia/skia/src/base/SkVx.h
-@@ -52,6 +52,8 @@
- #include <arm_neon.h>
- #elif defined(__wasm_simd128__)
- #include <wasm_simd128.h>
-+ #elif defined(SK_CPU_PPC) && defined(__VSX__)
-+ #include <altivec.h>
- #elif SK_CPU_LSX_LEVEL >= SK_CPU_LSX_LEVEL_LASX
- #include <lasxintrin.h>
- #include <lsxintrin.h>
-@@ -509,6 +511,14 @@ SINT Vec<N,T> if_then_else(const Vec<N,M<T>>& cond, const Vec<N,T>& t, const Vec
- sk_bit_cast<uint8x16_t>(e)));
- }
- #endif
-+#if SKVX_USE_SIMD && defined(SK_CPU_PPC) && defined(__VSX__)
-+ if constexpr (N*sizeof(T) == 16) {
-+ return sk_bit_cast<Vec<N,T>>(
-+ vec_sel(sk_bit_cast<__vector unsigned char>(e),
-+ sk_bit_cast<__vector unsigned char>(t),
-+ sk_bit_cast<__vector unsigned char>(cond)));
-+ }
-+#endif
- #if SKVX_USE_SIMD && SK_CPU_LSX_LEVEL >= SK_CPU_LSX_LEVEL_LASX
- if constexpr (N*sizeof(T) == 32) {
- return sk_bit_cast<Vec<N,T>>(__lasx_xvbitsel_v(sk_bit_cast<__m256i>(e),
-@@ -579,6 +589,11 @@ SINT bool any(const Vec<N,T>& x) {
- sk_bit_cast<__m128i>(x)));
- return retv[0] != 0b0000;
- }
-+#endif
-+#if SKVX_USE_SIMD && defined(SK_CPU_PPC) && defined(__VSX__)
-+ if constexpr (N*sizeof(T) == 16) {
-+ return vec_any_ne(sk_bit_cast<__vector unsigned int>(x), vec_splats(0u));
-+ }
- #endif
- return any(x.lo)
- || any(x.hi);
-@@ -622,6 +637,11 @@ SINT bool all(const Vec<N,T>& x) {
- sk_bit_cast<__m128i>(x)));
- return retv[0] == 0b1111;
- }
-+#endif
-+#if SKVX_USE_SIMD && defined(SK_CPU_PPC) && defined(__VSX__)
-+ if constexpr (N*sizeof(T) == 16) {
-+ return vec_all_ne(sk_bit_cast<__vector unsigned int>(x), vec_splats(0u));
-+ }
- #endif
- return all(x.lo)
- && all(x.hi);
-@@ -647,8 +667,22 @@ SIT T max(const Vec<1,T>& x) { return x.val; }
- SINT T min(const Vec<N,T>& x) { return std::min(min(x.lo), min(x.hi)); }
- SINT T max(const Vec<N,T>& x) { return std::max(max(x.lo), max(x.hi)); }
-
--SINT Vec<N,T> min(const Vec<N,T>& x, const Vec<N,T>& y) { return naive_if_then_else(y < x, y, x); }
--SINT Vec<N,T> max(const Vec<N,T>& x, const Vec<N,T>& y) { return naive_if_then_else(x < y, y, x); }
-+SINT Vec<N,T> min(const Vec<N,T>& x, const Vec<N,T>& y) {
-+#if SKVX_USE_SIMD && defined(SK_CPU_PPC) && defined(__VSX__)
-+ if constexpr (N*sizeof(T) == 16) {
-+ return sk_bit_cast<Vec<N,T>>(vec_min(to_vext(x), to_vext(y)));
-+ }
-+#endif
-+ return naive_if_then_else(y < x, y, x);
-+}
-+SINT Vec<N,T> max(const Vec<N,T>& x, const Vec<N,T>& y) {
-+#if SKVX_USE_SIMD && defined(SK_CPU_PPC) && defined(__VSX__)
-+ if constexpr (N*sizeof(T) == 16) {
-+ return sk_bit_cast<Vec<N,T>>(vec_max(to_vext(x), to_vext(y)));
-+ }
-+#endif
-+ return naive_if_then_else(x < y, y, x);
-+}
-
- SINTU Vec<N,T> min(const Vec<N,T>& x, U y) { return min(x, Vec<N,T>(y)); }
- SINTU Vec<N,T> max(const Vec<N,T>& x, U y) { return max(x, Vec<N,T>(y)); }
-@@ -960,6 +994,26 @@ SIN Vec<N,uint16_t> mulhi(const Vec<N,uint16_t>& x,
- } else { // N > 8
- return join(mulhi(x.lo, y.lo), mulhi(x.hi, y.hi));
- }
-+#elif SKVX_USE_SIMD && defined(SK_CPU_PPC) && defined(__VSX__)
-+ if constexpr (N == 8) {
-+ // u16*u16 -> u32 even/odd products (vmuleuh/vmulouh), then gather the
-+ // high 16 bits of each back into sequential lanes. Same idiom as the
-+ // VSX scale() in SkSwizzler_opts.
-+ __vector unsigned short xs = sk_bit_cast<__vector unsigned short>(x);
-+ __vector unsigned short ys = sk_bit_cast<__vector unsigned short>(y);
-+ __vector unsigned int even = vec_vmuleuh(xs, ys);
-+ __vector unsigned int odd = vec_vmulouh(xs, ys);
-+ const __vector unsigned char hi = {
-+ 0x02,0x03, 0x12,0x13, 0x06,0x07, 0x16,0x17,
-+ 0x0A,0x0B, 0x1A,0x1B, 0x0E,0x0F, 0x1E,0x1F
-+ };
-+ return sk_bit_cast<Vec<8,uint16_t>>(
-+ vec_perm((__vector unsigned char)even, (__vector unsigned char)odd, hi));
-+ } else if constexpr (N < 8) {
-+ return mulhi(join(x,x), join(y,y)).lo;
-+ } else { // N > 8
-+ return join(mulhi(x.lo, y.lo), mulhi(x.hi, y.hi));
-+ }
- #else
- return skvx::cast<uint16_t>(mull(x, y) >> 16);
- #endif
-diff --git a/gfx/skia/skia/src/core/SkBlitRow_D32.cpp b/gfx/skia/skia/src/core/SkBlitRow_D32.cpp
-index bcbf2e66bd46..920d6a9b2366 100644
---- a/gfx/skia/skia/src/core/SkBlitRow_D32.cpp
-+++ b/gfx/skia/skia/src/core/SkBlitRow_D32.cpp
-@@ -517,6 +517,104 @@ static void blit_row_s32_opaque(SkPMColor* dst,
- }
- }
-
-+#elif defined(SK_CPU_PPC) && defined(__VSX__)
-+ #include <altivec.h>
-+
-+ // dst + (((src - dst) * src_scale) >> 8), splayed into 16-bit lanes; the
-+ // vec_* transcription of SkPMLerp_SSE2.
-+ static inline __vector unsigned char SkPMLerp_VSX(__vector unsigned char src,
-+ __vector unsigned char dst,
-+ unsigned src_scale) {
-+ const __vector unsigned int mask = vec_splats(0x00FF00FFu);
-+ const __vector unsigned short eight = vec_splats((unsigned short)8);
-+ __vector unsigned short src_rb = (__vector unsigned short)vec_and((__vector unsigned int)src, mask);
-+ __vector unsigned short src_ag = vec_sr((__vector unsigned short)src, eight);
-+ __vector unsigned short dst_rb = (__vector unsigned short)vec_and((__vector unsigned int)dst, mask);
-+ __vector unsigned short dst_ag = vec_sr((__vector unsigned short)dst, eight);
-+ __vector unsigned short s = vec_splats((unsigned short)src_scale);
-+ __vector unsigned short diff_rb = vec_mul(vec_sub(src_rb, dst_rb), s);
-+ __vector unsigned short diff_ag = vec_mul(vec_sub(src_ag, dst_ag), s);
-+ diff_rb = vec_sr(diff_rb, eight);
-+ __vector unsigned int diff = vec_or((__vector unsigned int)diff_rb,
-+ vec_andc((__vector unsigned int)diff_ag, mask));
-+ return vec_add(dst, (__vector unsigned char)diff);
-+ }
-+
-+ static void blit_row_s32_blend(SkPMColor* dst, const SkPMColor* src, int count, U8CPU alpha) {
-+ SkASSERT(alpha <= 255);
-+ unsigned src_scale = SkAlpha255To256(alpha);
-+ while (count >= 4) {
-+ __vector unsigned char s = vec_xl(0, (const unsigned char*)src);
-+ __vector unsigned char d = vec_xl(0, (const unsigned char*)dst);
-+ vec_xst(SkPMLerp_VSX(s, d, src_scale), 0, (unsigned char*)dst);
-+ src += 4; dst += 4; count -= 4;
-+ }
-+ while (count --> 0) {
-+ *dst = SkPMLerp(*src, *dst, src_scale);
-+ src++;
-+ dst++;
-+ }
-+ }
-+
-+ // The vec_* transcription of SkBlendARGB32_SSE2: scale src by aa and dst by
-+ // SkAlphaMulInv256(srcA, aa), then add the splayed halves.
-+ static inline __vector unsigned char SkBlendARGB32_VSX(__vector unsigned char src,
-+ __vector unsigned char dst,
-+ unsigned aa) {
-+ unsigned alpha = SkAlpha255To256(aa);
-+ __vector unsigned short src_scale = vec_splats((unsigned short)alpha);
-+ const __vector unsigned int mask = vec_splats(0x00FF00FFu);
-+ const __vector unsigned short eight = vec_splats((unsigned short)8);
-+
-+ // dst_scale = SkAlphaMulInv256(SkGetPackedA32(src), alpha), per 32-bit lane.
-+ __vector unsigned int srcA = vec_sr((__vector unsigned int)src, vec_splats(24u));
-+ __vector unsigned int ds = (__vector unsigned int)vec_mul((__vector unsigned short)srcA, src_scale);
-+ ds = vec_sub(vec_splats((unsigned int)0xFFFF), ds);
-+ ds = vec_add(ds, vec_sr(ds, vec_splats(8u)));
-+ ds = vec_sr(ds, vec_splats(8u));
-+ // Duplicate the low 16-bit word of each 32-bit lane into both halves
-+ // (the SSE shufflelo/shufflehi _MM_SHUFFLE(2,2,0,0)).
-+ const __vector unsigned char dup = (__vector unsigned char){
-+ 0,1,0,1, 4,5,4,5, 8,9,8,9, 12,13,12,13
-+ };
-+ __vector unsigned short dst_scale =
-+ (__vector unsigned short)vec_perm((__vector unsigned char)ds,
-+ (__vector unsigned char)ds, dup);
-+
-+ __vector unsigned short src_rb = (__vector unsigned short)vec_and((__vector unsigned int)src, mask);
-+ __vector unsigned short src_ag = vec_sr((__vector unsigned short)src, eight);
-+ __vector unsigned short dst_rb = (__vector unsigned short)vec_and((__vector unsigned int)dst, mask);
-+ __vector unsigned short dst_ag = vec_sr((__vector unsigned short)dst, eight);
-+
-+ src_rb = vec_mul(src_rb, src_scale);
-+ src_ag = vec_mul(src_ag, src_scale);
-+ dst_rb = vec_mul(dst_rb, dst_scale);
-+ dst_ag = vec_mul(dst_ag, dst_scale);
-+
-+ dst_rb = vec_add(src_rb, dst_rb);
-+ dst_ag = vec_add(src_ag, dst_ag);
-+
-+ dst_rb = vec_sr(dst_rb, eight);
-+ __vector unsigned int out = vec_or((__vector unsigned int)dst_rb,
-+ vec_andc((__vector unsigned int)dst_ag, mask));
-+ return (__vector unsigned char)out;
-+ }
-+
-+ static void blit_row_s32a_blend(SkPMColor* dst, const SkPMColor* src, int count, U8CPU alpha) {
-+ SkASSERT(alpha <= 255);
-+ while (count >= 4) {
-+ __vector unsigned char s = vec_xl(0, (const unsigned char*)src);
-+ __vector unsigned char d = vec_xl(0, (const unsigned char*)dst);
-+ vec_xst(SkBlendARGB32_VSX(s, d, alpha), 0, (unsigned char*)dst);
-+ src += 4; dst += 4; count -= 4;
-+ }
-+ while (count --> 0) {
-+ *dst = SkBlendARGB32(*src, *dst, alpha);
-+ src++;
-+ dst++;
-+ }
-+ }
-+
- #else
- static void blit_row_s32_blend(SkPMColor* dst, const SkPMColor* src, int count, U8CPU alpha) {
- SkASSERT(alpha <= 255);
-diff --git a/gfx/skia/skia/src/core/SkBlitter_ARGB32.cpp b/gfx/skia/skia/src/core/SkBlitter_ARGB32.cpp
-index a7538027b85d..9669431292b6 100644
---- a/gfx/skia/skia/src/core/SkBlitter_ARGB32.cpp
-+++ b/gfx/skia/skia/src/core/SkBlitter_ARGB32.cpp
-@@ -480,6 +480,274 @@ static inline SkPMColor blend_lcd16_opaque(int srcR, int srcG, int srcB,
- }
- }
-
-+#elif defined(SK_CPU_PPC) && defined(__VSX__)
-+ #include <altivec.h>
-+
-+ // Native VSX/AltiVec port of the SSE2 LCD-subpixel blend block below.
-+ // Same algorithm — only the intrinsics change. Translations follow the
-+ // GCC ppc_wrappers pattern (vec_mergeh/l, vec_packsu, etc.).
-+
-+ // The following (left) shifts cause the top 5 bits of the mask components to
-+ // line up with the corresponding components in an SkPMColor.
-+ // Note that the mask's RGB16 order may differ from the SkPMColor order.
-+ #define SK_R16x5_R32x5_SHIFT (SK_R32_SHIFT - SK_R16_SHIFT - SK_R16_BITS + 5)
-+ #define SK_G16x5_G32x5_SHIFT (SK_G32_SHIFT - SK_G16_SHIFT - SK_G16_BITS + 5)
-+ #define SK_B16x5_B32x5_SHIFT (SK_B32_SHIFT - SK_B16_SHIFT - SK_B16_BITS + 5)
-+
-+ // Each macro must always return __vector unsigned int so the surrounding
-+ // vec_and gets matching element types. The pass-through case (SHIFT == 0)
-+ // still needs an explicit reinterpret-cast since `mask` is __vector
-+ // unsigned char in our function signature.
-+ #if SK_R16x5_R32x5_SHIFT == 0
-+ #define SkPackedR16x5ToUnmaskedR32x5_VSX(x) ((__vector unsigned int)(x))
-+ #elif SK_R16x5_R32x5_SHIFT > 0
-+ #define SkPackedR16x5ToUnmaskedR32x5_VSX(x) \
-+ vec_sl((__vector unsigned int)(x), vec_splats((unsigned int)SK_R16x5_R32x5_SHIFT))
-+ #else
-+ #define SkPackedR16x5ToUnmaskedR32x5_VSX(x) \
-+ vec_sr((__vector unsigned int)(x), vec_splats((unsigned int)(-SK_R16x5_R32x5_SHIFT)))
-+ #endif
-+
-+ #if SK_G16x5_G32x5_SHIFT == 0
-+ #define SkPackedG16x5ToUnmaskedG32x5_VSX(x) ((__vector unsigned int)(x))
-+ #elif SK_G16x5_G32x5_SHIFT > 0
-+ #define SkPackedG16x5ToUnmaskedG32x5_VSX(x) \
-+ vec_sl((__vector unsigned int)(x), vec_splats((unsigned int)SK_G16x5_G32x5_SHIFT))
-+ #else
-+ #define SkPackedG16x5ToUnmaskedG32x5_VSX(x) \
-+ vec_sr((__vector unsigned int)(x), vec_splats((unsigned int)(-SK_G16x5_G32x5_SHIFT)))
-+ #endif
-+
-+ #if SK_B16x5_B32x5_SHIFT == 0
-+ #define SkPackedB16x5ToUnmaskedB32x5_VSX(x) ((__vector unsigned int)(x))
-+ #elif SK_B16x5_B32x5_SHIFT > 0
-+ #define SkPackedB16x5ToUnmaskedB32x5_VSX(x) \
-+ vec_sl((__vector unsigned int)(x), vec_splats((unsigned int)SK_B16x5_B32x5_SHIFT))
-+ #else
-+ #define SkPackedB16x5ToUnmaskedB32x5_VSX(x) \
-+ vec_sr((__vector unsigned int)(x), vec_splats((unsigned int)(-SK_B16x5_B32x5_SHIFT)))
-+ #endif
-+
-+ static __vector unsigned char blend_lcd16_vsx(__vector unsigned char& src,
-+ __vector unsigned char& dst,
-+ __vector unsigned char& mask,
-+ __vector unsigned char& srcA) {
-+ // Get the R,G,B of each 16bit mask pixel, all aligned to 5-bit positions.
-+ __vector unsigned int r = vec_and(SkPackedR16x5ToUnmaskedR32x5_VSX(mask),
-+ vec_splats((unsigned int)(0x1F << SK_R32_SHIFT)));
-+ __vector unsigned int g = vec_and(SkPackedG16x5ToUnmaskedG32x5_VSX(mask),
-+ vec_splats((unsigned int)(0x1F << SK_G32_SHIFT)));
-+ __vector unsigned int b = vec_and(SkPackedB16x5ToUnmaskedB32x5_VSX(mask),
-+ vec_splats((unsigned int)(0x1F << SK_B32_SHIFT)));
-+
-+ // a needs to be either the min or the max of the LCD coverages, depending on srcA < dstA.
-+ __vector unsigned int rA = vec_sl(r, vec_splats((unsigned int)(SK_A32_SHIFT - SK_R32_SHIFT)));
-+ __vector unsigned int gA = vec_sl(g, vec_splats((unsigned int)(SK_A32_SHIFT - SK_G32_SHIFT)));
-+ __vector unsigned int bA = vec_sl(b, vec_splats((unsigned int)(SK_A32_SHIFT - SK_B32_SHIFT)));
-+ __vector unsigned char aMin = vec_min(vec_min((__vector unsigned char)rA,
-+ (__vector unsigned char)gA),
-+ (__vector unsigned char)bA);
-+ __vector unsigned char aMax = vec_max(vec_max((__vector unsigned char)rA,
-+ (__vector unsigned char)gA),
-+ (__vector unsigned char)bA);
-+ // srcA has been biased to [0-256]; compare srcA against (dstA+1).
-+ __vector unsigned int dstA = vec_and(vec_add((__vector unsigned int)dst,
-+ vec_splats((unsigned int)(1 << SK_A32_SHIFT))),
-+ vec_splats((unsigned int)SK_A32_MASK));
-+ __vector __bool int aLT = vec_cmplt((__vector signed int)srcA, (__vector signed int)dstA);
-+ // a = (aMin & aLT) | (aMax & ~aLT)
-+ __vector unsigned char a = vec_or(vec_and(aMin, (__vector unsigned char)aLT),
-+ vec_andc(aMax, (__vector unsigned char)aLT));
-+
-+ // Pack the 4 16-bit mask pixels into 4 32-bit pixels (m0A, m0R, m0G, m0B, ...).
-+ mask = vec_or(vec_or(a, (__vector unsigned char)r),
-+ vec_or((__vector unsigned char)g, (__vector unsigned char)b));
-+
-+ // Interleave into 16-bit words.
-+ const __vector unsigned char zeros = vec_splats((unsigned char)0);
-+ __vector unsigned short maskLo = (__vector unsigned short)vec_mergeh(mask, zeros);
-+ __vector unsigned short maskHi = (__vector unsigned short)vec_mergel(mask, zeros);
-+
-+ // Upscale 0..31 -> 0..32 by adding (mask >> 4).
-+ const __vector unsigned short v4 = vec_splats((unsigned short)4);
-+ const __vector unsigned short v8 = vec_splats((unsigned short)8);
-+ const __vector unsigned short v5 = vec_splats((unsigned short)5);
-+ maskLo = vec_add(maskLo, vec_sr(maskLo, v4));
-+ maskHi = vec_add(maskHi, vec_sr(maskHi, v4));
-+
-+ // Multiply by srcA per 16-bit lane.
-+ maskLo = vec_mul(maskLo, (__vector unsigned short)srcA);
-+ maskHi = vec_mul(maskHi, (__vector unsigned short)srcA);
-+ // Divide by 256 (right-shift 8).
-+ maskLo = vec_sr(maskLo, v8);
-+ maskHi = vec_sr(maskHi, v8);
-+
-+ // Unpack dst into 16-bit words.
-+ __vector signed short dstLo = (__vector signed short)vec_mergeh(dst, zeros);
-+ __vector signed short dstHi = (__vector signed short)vec_mergel(dst, zeros);
-+ // mask = (src - dst) * mask
-+ __vector signed short srcS = (__vector signed short)src;
-+ __vector signed short mLoS = vec_mul((__vector signed short)maskLo, vec_sub(srcS, dstLo));
-+ __vector signed short mHiS = vec_mul((__vector signed short)maskHi, vec_sub(srcS, dstHi));
-+ // arithmetic shift right by 5
-+ mLoS = vec_sra(mLoS, (__vector unsigned short)v5);
-+ mHiS = vec_sra(mHiS, (__vector unsigned short)v5);
-+ // result = dst + ((src - dst) * mask >> 5)
-+ __vector signed short resLo = vec_add(dstLo, mLoS);
-+ __vector signed short resHi = vec_add(dstHi, mHiS);
-+ // Pack 16-bit signed -> 8-bit unsigned with saturation.
-+ return vec_packsu(resLo, resHi);
-+ }
-+
-+ static __vector unsigned char blend_lcd16_opaque_vsx(__vector unsigned char& src,
-+ __vector unsigned char& dst,
-+ __vector unsigned char& mask) {
-+ __vector unsigned int r = vec_and(SkPackedR16x5ToUnmaskedR32x5_VSX(mask),
-+ vec_splats((unsigned int)(0x1F << SK_R32_SHIFT)));
-+ __vector unsigned int g = vec_and(SkPackedG16x5ToUnmaskedG32x5_VSX(mask),
-+ vec_splats((unsigned int)(0x1F << SK_G32_SHIFT)));
-+ __vector unsigned int b = vec_and(SkPackedB16x5ToUnmaskedB32x5_VSX(mask),
-+ vec_splats((unsigned int)(0x1F << SK_B32_SHIFT)));
-+
-+ // Opaque src: a = max(r, g, b) shifted to alpha lane.
-+ __vector unsigned int rA = vec_sl(r, vec_splats((unsigned int)(SK_A32_SHIFT - SK_R32_SHIFT)));
-+ __vector unsigned int gA = vec_sl(g, vec_splats((unsigned int)(SK_A32_SHIFT - SK_G32_SHIFT)));
-+ __vector unsigned int bA = vec_sl(b, vec_splats((unsigned int)(SK_A32_SHIFT - SK_B32_SHIFT)));
-+ __vector unsigned char a = vec_max(vec_max((__vector unsigned char)rA,
-+ (__vector unsigned char)gA),
-+ (__vector unsigned char)bA);
-+
-+ mask = vec_or(vec_or(a, (__vector unsigned char)r),
-+ vec_or((__vector unsigned char)g, (__vector unsigned char)b));
-+
-+ const __vector unsigned char zeros = vec_splats((unsigned char)0);
-+ __vector unsigned short maskLo = (__vector unsigned short)vec_mergeh(mask, zeros);
-+ __vector unsigned short maskHi = (__vector unsigned short)vec_mergel(mask, zeros);
-+
-+ const __vector unsigned short v4 = vec_splats((unsigned short)4);
-+ const __vector unsigned short v5 = vec_splats((unsigned short)5);
-+ maskLo = vec_add(maskLo, vec_sr(maskLo, v4));
-+ maskHi = vec_add(maskHi, vec_sr(maskHi, v4));
-+
-+ __vector signed short dstLo = (__vector signed short)vec_mergeh(dst, zeros);
-+ __vector signed short dstHi = (__vector signed short)vec_mergel(dst, zeros);
-+ __vector signed short srcS = (__vector signed short)src;
-+ __vector signed short mLoS = vec_mul((__vector signed short)maskLo, vec_sub(srcS, dstLo));
-+ __vector signed short mHiS = vec_mul((__vector signed short)maskHi, vec_sub(srcS, dstHi));
-+ mLoS = vec_sra(mLoS, (__vector unsigned short)v5);
-+ mHiS = vec_sra(mHiS, (__vector unsigned short)v5);
-+ __vector signed short resLo = vec_add(dstLo, mLoS);
-+ __vector signed short resHi = vec_add(dstHi, mHiS);
-+ return vec_packsu(resLo, resHi);
-+ }
-+
-+ void blit_row_lcd16(SkPMColor dst[], const uint16_t mask[], SkColor src,
-+ int width, SkPMColor) {
-+ if (width <= 0) {
-+ return;
-+ }
-+ int srcA = SkColorGetA(src);
-+ int srcR = SkColorGetR(src);
-+ int srcG = SkColorGetG(src);
-+ int srcB = SkColorGetB(src);
-+ srcA = SkAlpha255To256(srcA);
-+
-+ if (width >= 4) {
-+ SkASSERT(SkIsAlign4((uintptr_t) dst));
-+ while (!SkIsAlign16((uintptr_t) dst)) {
-+ *dst = blend_lcd16(srcA, srcR, srcG, srcB, *dst, *mask);
-+ mask++; dst++; width--;
-+ }
-+
-+ // Replicate source across 4 lanes, then unpack low half to interleaved 16-bit.
-+ uint32_t srcPM = SkPackARGB32(0xFF, srcR, srcG, srcB);
-+ __vector unsigned int src_v32 = vec_splats(srcPM);
-+ const __vector unsigned char zeros = vec_splats((unsigned char)0);
-+ __vector unsigned char src_v = vec_mergeh((__vector unsigned char)src_v32, zeros);
-+ __vector unsigned char srcA_v = (__vector unsigned char)vec_splats((unsigned short)srcA);
-+
-+ while (width >= 4) {
-+ __vector unsigned char dst_v = vec_xl(0, (const unsigned char*)dst);
-+ // Load 8 bytes (4x uint16 mask) into low half of vector.
-+ uint64_t mlo;
-+ memcpy(&mlo, mask, sizeof(mlo));
-+ __vector unsigned long long mask_low =
-+ (__vector unsigned long long){mlo, 0};
-+ __vector unsigned char mask_v = (__vector unsigned char)mask_low;
-+
-+ // Check if all mask values are zero (skip blending if so).
-+ if (!vec_all_eq((__vector unsigned long long)mask_v,
-+ vec_splats((unsigned long long)0))) {
-+ // Unpack low 8 bytes of mask (4x uint16) into 4x uint32 (with zeros).
-+ // Zero-extend the 4 uint16 masks to 4 uint32 (16-bit-granularity
-+ // merge, matching SSE2's _mm_unpacklo_epi16); a char-granularity
-+ // merge would byte-stretch the RGB565 value and misplace the shifts.
-+ mask_v = (__vector unsigned char)vec_mergeh((__vector unsigned short)mask_v,
-+ (__vector unsigned short)zeros);
-+ __vector unsigned char result =
-+ blend_lcd16_vsx(src_v, dst_v, mask_v, srcA_v);
-+ vec_xst(result, 0, (unsigned char*)dst);
-+ }
-+ dst += 4; mask += 4; width -= 4;
-+ }
-+ }
-+
-+ while (width > 0) {
-+ *dst = blend_lcd16(srcA, srcR, srcG, srcB, *dst, *mask);
-+ mask++; dst++; width--;
-+ }
-+ }
-+
-+ void blit_row_lcd16_opaque(SkPMColor dst[], const uint16_t mask[],
-+ SkColor src, int width, SkPMColor opaqueDst) {
-+ if (width <= 0) {
-+ return;
-+ }
-+ int srcR = SkColorGetR(src);
-+ int srcG = SkColorGetG(src);
-+ int srcB = SkColorGetB(src);
-+
-+ if (width >= 4) {
-+ SkASSERT(SkIsAlign4((uintptr_t) dst));
-+ while (!SkIsAlign16((uintptr_t) dst)) {
-+ *dst = blend_lcd16_opaque(srcR, srcG, srcB, *dst, *mask, opaqueDst);
-+ mask++; dst++; width--;
-+ }
-+
-+ uint32_t srcPM = SkPackARGB32(0xFF, srcR, srcG, srcB);
-+ __vector unsigned int src_v32 = vec_splats(srcPM);
-+ const __vector unsigned char zeros = vec_splats((unsigned char)0);
-+ __vector unsigned char src_v = vec_mergeh((__vector unsigned char)src_v32, zeros);
-+
-+ while (width >= 4) {
-+ __vector unsigned char dst_v = vec_xl(0, (const unsigned char*)dst);
-+ uint64_t mlo;
-+ memcpy(&mlo, mask, sizeof(mlo));
-+ __vector unsigned long long mask_low =
-+ (__vector unsigned long long){mlo, 0};
-+ __vector unsigned char mask_v = (__vector unsigned char)mask_low;
-+
-+ if (!vec_all_eq((__vector unsigned long long)mask_v,
-+ vec_splats((unsigned long long)0))) {
-+ // Zero-extend the 4 uint16 masks to 4 uint32 (16-bit-granularity
-+ // merge, matching SSE2's _mm_unpacklo_epi16); a char-granularity
-+ // merge would byte-stretch the RGB565 value and misplace the shifts.
-+ mask_v = (__vector unsigned char)vec_mergeh((__vector unsigned short)mask_v,
-+ (__vector unsigned short)zeros);
-+ __vector unsigned char result =
-+ blend_lcd16_opaque_vsx(src_v, dst_v, mask_v);
-+ vec_xst(result, 0, (unsigned char*)dst);
-+ }
-+ dst += 4; mask += 4; width -= 4;
-+ }
-+ }
-+
-+ while (width > 0) {
-+ *dst = blend_lcd16_opaque(srcR, srcG, srcB, *dst, *mask, opaqueDst);
-+ mask++; dst++; width--;
-+ }
-+ }
-+
- #elif defined(SK_ARM_HAS_NEON)
- #include <arm_neon.h>
-
-diff --git a/gfx/skia/skia/src/opts/SkBitmapProcState_opts.h b/gfx/skia/skia/src/opts/SkBitmapProcState_opts.h
-index 6d01a2f4458f..87b160ed7a1e 100644
---- a/gfx/skia/skia/src/opts/SkBitmapProcState_opts.h
-+++ b/gfx/skia/skia/src/opts/SkBitmapProcState_opts.h
-@@ -29,6 +29,8 @@
- #include <lasxintrin.h>
- #elif SK_CPU_LSX_LEVEL >= SK_CPU_LSX_LEVEL_LSX
- #include <lsxintrin.h>
-+#elif defined(SK_CPU_PPC) && defined(__VSX__)
-+ #include <altivec.h>
- #endif
-
- namespace SK_OPTS_NS {
-@@ -260,6 +262,168 @@ static void decode_packed_coordinates_and_weight(U32 packed, Out* v0, Out* v1, O
- }
- }
-
-+#elif defined(SK_CPU_PPC) && defined(__VSX__)
-+
-+ // Helper: scalar uint32_t -> 16-byte vector with x in low 32 bits, zero elsewhere.
-+ // Equivalent of x86's _mm_cvtsi32_si128.
-+ static inline __vector unsigned char vsx_cvt_u32_to_vec(uint32_t x) {
-+ __vector unsigned int v = (__vector unsigned int){x, 0, 0, 0};
-+ return (__vector unsigned char)v;
-+ }
-+
-+ // Helper: PPC64 VSX equivalent of x86's _mm_maddubs_epi16. Multiplies pairs of
-+ // (unsigned byte, signed byte) and adds adjacent pairs to produce 16-bit signed
-+ // values, saturating to int16. Implementation transcribes the GCC ppc_wrappers
-+ // tmmintrin.h sequence for endianness correctness on LE PPC64.
-+ static inline __vector signed short vsx_maddubs_epi16(__vector unsigned char A,
-+ __vector signed char B) {
-+ __vector signed short __ff = vec_splats((signed short)0x00FF);
-+ __vector signed short __C = vec_and(vec_unpackh((__vector signed char)A), __ff);
-+ __vector signed short __D = vec_and(vec_unpackl((__vector signed char)A), __ff);
-+ __vector signed short __E = vec_unpackh(B);
-+ __vector signed short __F = vec_unpackl(B);
-+ __C = vec_mul(__C, __E);
-+ __D = vec_mul(__D, __F);
-+ const __vector unsigned char __odds = (__vector unsigned char){
-+ 0,1, 4,5, 8,9, 12,13, 16,17, 20,21, 24,25, 28,29
-+ };
-+ const __vector unsigned char __evens = (__vector unsigned char){
-+ 2,3, 6,7, 10,11, 14,15, 18,19, 22,23, 26,27, 30,31
-+ };
-+ __E = (__vector signed short)vec_perm((__vector unsigned char)__C,
-+ (__vector unsigned char)__D, __odds);
-+ __F = (__vector signed short)vec_perm((__vector unsigned char)__C,
-+ (__vector unsigned char)__D, __evens);
-+ return vec_adds(__E, __F);
-+ }
-+
-+ /*not static*/ inline
-+ void S32_alpha_D32_filter_DX(const SkBitmapProcState& s,
-+ const uint32_t* xy, int count, uint32_t* colors) {
-+ SkASSERT(count > 0 && colors != nullptr);
-+ SkASSERT(s.fBilerp);
-+ SkASSERT(kN32_SkColorType == s.fPixmap.colorType());
-+ SkASSERT(s.fAlphaScale <= 256);
-+
-+ // interpolate_in_x() is the crux of the implementation, interpolating in X
-+ // for up to two output pixels (A and B) using vsx_maddubs_epi16().
-+ auto interpolate_in_x = [](uint32_t A0, uint32_t A1,
-+ uint32_t B0, uint32_t B1,
-+ __vector signed char interlaced_x_weights) {
-+ // _mm_unpacklo_epi8(_mm_cvtsi32_si128(A0), _mm_cvtsi32_si128(A1))
-+ // = vec_mergeh on uchar, since the input vectors have only the low 32 bits set.
-+ __vector unsigned char interlaced_A = vec_mergeh(vsx_cvt_u32_to_vec(A0),
-+ vsx_cvt_u32_to_vec(A1));
-+ __vector unsigned char interlaced_B = vec_mergeh(vsx_cvt_u32_to_vec(B0),
-+ vsx_cvt_u32_to_vec(B1));
-+ // _mm_unpacklo_epi64 = vec_mergeh on long long.
-+ __vector long long lo64 = vec_mergeh((__vector long long)interlaced_A,
-+ (__vector long long)interlaced_B);
-+ return vsx_maddubs_epi16((__vector unsigned char)lo64, interlaced_x_weights);
-+ };
-+
-+ // Interpolate {A0..A3} --> output pixel A, and {B0..B3} --> output pixel B.
-+ // Returns two pixels, with each color channel in a 16-bit lane of the result.
-+ auto interpolate_in_x_and_y = [&](uint32_t A0, uint32_t A1,
-+ uint32_t A2, uint32_t A3,
-+ uint32_t B0, uint32_t B1,
-+ uint32_t B2, uint32_t B3,
-+ __vector signed char interlaced_x_weights,
-+ int wy) {
-+ __vector signed short top = interpolate_in_x(A0,A1, B0,B1, interlaced_x_weights);
-+ __vector signed short bot = interpolate_in_x(A2,A3, B2,B3, interlaced_x_weights);
-+
-+ // 16*top + (bot-top)*wy, mirroring the SSE2 form (saves one multiply vs. the
-+ // straightforward top*(16-wy) + bot*wy).
-+ __vector unsigned short v4 = vec_splats((unsigned short)4);
-+ __vector signed short wy_v = vec_splats((signed short)wy);
-+ __vector signed short px = vec_add(vec_sl(top, v4), vec_mul(vec_sub(bot, top), wy_v));
-+
-+ // Scale down by total max weight 16x16 = 256.
-+ px = (__vector signed short)vec_sr((__vector unsigned short)px, vec_splats((unsigned short)8));
-+
-+ // Scale by alpha if needed.
-+ if (s.fAlphaScale < 256) {
-+ __vector signed short scale_v = vec_splats((signed short)s.fAlphaScale);
-+ px = (__vector signed short)vec_sr((__vector unsigned short)vec_mul(px, scale_v),
-+ vec_splats((unsigned short)8));
-+ }
-+ return px;
-+ };
-+
-+ // We're in _DX mode here, so we're only varying in X.
-+ // That means the first entry of xy is our constant pair of Y coordinates and weight in Y.
-+ int y0, y1, wy;
-+ decode_packed_coordinates_and_weight(*xy++, &y0, &y1, &wy);
-+
-+ auto row0 = (const uint32_t*)((const uint8_t*)s.fPixmap.addr() + y0 * s.fPixmap.rowBytes()),
-+ row1 = (const uint32_t*)((const uint8_t*)s.fPixmap.addr() + y1 * s.fPixmap.rowBytes());
-+
-+ while (count >= 4) {
-+ // We can really get going, loading 4 X-pairs at a time to produce 4 output pixels.
-+ int x0[4],
-+ x1[4];
-+ __vector unsigned int wx;
-+
-+ // decode_packed_coordinates_and_weight(), 4x.
-+ __vector unsigned int packed = (__vector unsigned int)vec_xl(0, (const unsigned char*)xy);
-+ __vector unsigned int x0_v = vec_sr(packed, vec_splats(18u));
-+ __vector unsigned int x1_v = vec_and(packed, vec_splats(0x3fffu));
-+ vec_xst((__vector unsigned char)x0_v, 0, (unsigned char*)x0);
-+ vec_xst((__vector unsigned char)x1_v, 0, (unsigned char*)x1);
-+ wx = vec_and(vec_sr(packed, vec_splats(14u)), vec_splats(0xfu)); // [0,15]
-+
-+ // Splat each x weight 4x (for each color channel) as wr for pixels on the right at x1,
-+ // and sixteen minus that as wl for pixels on the left at x0.
-+ const __vector unsigned char wr_mask = (__vector unsigned char){
-+ 0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12
-+ };
-+ __vector unsigned char wr = vec_perm((__vector unsigned char)wx,
-+ (__vector unsigned char)wx, wr_mask);
-+ __vector unsigned char wl = vec_sub(vec_splats((unsigned char)16), wr);
-+
-+ // Interlace wl and wr for vsx_maddubs_epi16().
-+ __vector signed char interlaced_x_weights_AB = (__vector signed char)vec_mergeh(wl, wr);
-+ __vector signed char interlaced_x_weights_CD = (__vector signed char)vec_mergel(wl, wr);
-+
-+ enum { A,B,C,D };
-+
-+ __vector signed short AB = interpolate_in_x_and_y(
-+ row0[x0[A]], row0[x1[A]], row1[x0[A]], row1[x1[A]],
-+ row0[x0[B]], row0[x1[B]], row1[x0[B]], row1[x1[B]],
-+ interlaced_x_weights_AB, wy);
-+ __vector signed short CD = interpolate_in_x_and_y(
-+ row0[x0[C]], row0[x1[C]], row1[x0[C]], row1[x1[C]],
-+ row0[x0[D]], row0[x1[D]], row1[x0[D]], row1[x1[D]],
-+ interlaced_x_weights_CD, wy);
-+
-+ // Pack 16-bit signed -> 8-bit unsigned with saturation, write 4 pixels.
-+ __vector unsigned char packed_out = vec_packsu(AB, CD);
-+ vec_xst(packed_out, 0, (unsigned char*)colors);
-+ xy += 4;
-+ colors += 4;
-+ count -= 4;
-+ }
-+
-+ while (count --> 0) {
-+ // Same flow as the count >= 4 loop, but writing one pixel.
-+ int x0, x1, wx;
-+ decode_packed_coordinates_and_weight(*xy++, &x0, &x1, &wx);
-+
-+ __vector unsigned char wr = vec_splats((unsigned char)wx);
-+ __vector unsigned char wl = vec_sub(vec_splats((unsigned char)16), wr);
-+ __vector signed char interlaced_x_weights = (__vector signed char)vec_mergeh(wl, wr);
-+
-+ __vector signed short Av = interpolate_in_x_and_y(
-+ row0[x0], row0[x1], row1[x0], row1[x1],
-+ 0, 0, 0, 0,
-+ interlaced_x_weights, wy);
-+ __vector unsigned char packed_out = vec_packsu(Av,
-+ (__vector signed short)(__vector unsigned char){0});
-+ *colors++ = ((__vector unsigned int)packed_out)[0];
-+ }
-+ }
-+
- #elif SK_CPU_LSX_LEVEL >= SK_CPU_LSX_LEVEL_LASX
- /*not static*/ inline
- void S32_alpha_D32_filter_DX(const SkBitmapProcState& s,
-diff --git a/gfx/skia/skia/src/opts/SkBlitRow_opts.h b/gfx/skia/skia/src/opts/SkBlitRow_opts.h
-index d1de5681a72e..d03908a03a32 100644
---- a/gfx/skia/skia/src/opts/SkBlitRow_opts.h
-+++ b/gfx/skia/skia/src/opts/SkBlitRow_opts.h
-@@ -68,6 +68,43 @@
- }
- #endif
-
-+#if defined(SK_CPU_PPC) && defined(__VSX__)
-+ #include <altivec.h>
-+
-+ // Native VSX/AltiVec port of SkPMSrcOver_SSE2.
-+ // Same algorithm: src + dst*(256-srcAlpha)/256.
-+ static inline __vector unsigned char SkPMSrcOver_VSX(__vector unsigned char src,
-+ __vector unsigned char dst) {
-+ __vector unsigned int src_u32 = (__vector unsigned int)src;
-+ __vector unsigned int dst_u32 = (__vector unsigned int)dst;
-+
-+ // scale = 256 - (src >> 24) (per 32-bit lane)
-+ __vector unsigned int scale = vec_sub(vec_splats((unsigned int)256),
-+ vec_sr(src_u32, vec_splats(24u)));
-+ // scale_x2 = (scale << 16) | scale -- splat the scale into both 16-bit halves
-+ __vector unsigned int scale_x2 = vec_or(vec_sl(scale, vec_splats(16u)), scale);
-+
-+ const __vector unsigned int rb_mask = vec_splats(0x00FF00FFu);
-+
-+ // rb = (dst & 0x00FF00FF) * scale_x2 >> 8 (R and B channels in 16-bit lanes)
-+ __vector unsigned short rb = (__vector unsigned short)vec_and(rb_mask, dst_u32);
-+ rb = vec_mul(rb, (__vector unsigned short)scale_x2);
-+ rb = vec_sr(rb, vec_splats((unsigned short)8));
-+
-+ // ga = (dst >> 8) * scale_x2 then mask out the rb channels
-+ __vector unsigned short ga = vec_sr((__vector unsigned short)dst_u32,
-+ vec_splats((unsigned short)8));
-+ ga = vec_mul(ga, (__vector unsigned short)scale_x2);
-+ // andc(ga, rb_mask) = ga & ~rb_mask -- keep only G and A channels in 16-bit lanes
-+ __vector unsigned int ga_u32 = vec_andc((__vector unsigned int)ga, rb_mask);
-+
-+ // result = src + adds_epu8(rb | ga)
-+ __vector unsigned char merged =
-+ (__vector unsigned char)vec_or((__vector unsigned int)rb, ga_u32);
-+ return vec_adds(src, merged);
-+ }
-+#endif
-+
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
- #include <immintrin.h>
-
-@@ -176,6 +213,17 @@ inline void blit_row_s32a_opaque(SkPMColor* dst, const SkPMColor* src, int len,
- }
- #endif
-
-+#if defined(SK_CPU_PPC) && defined(__VSX__)
-+ while (len >= 4) {
-+ __vector unsigned char vsrc = vec_xl(0, (const unsigned char*)src);
-+ __vector unsigned char vdst = vec_xl(0, (const unsigned char*)dst);
-+ vec_xst(SkPMSrcOver_VSX(vsrc, vdst), 0, (unsigned char*)dst);
-+ src += 4;
-+ dst += 4;
-+ len -= 4;
-+ }
-+#endif
-+
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
- while (len >= 4) {
- _mm_storeu_si128((__m128i*)dst, SkPMSrcOver_SSE2(_mm_loadu_si128((const __m128i*)src),
-diff --git a/gfx/skia/skia/src/opts/SkRasterPipeline_opts.h b/gfx/skia/skia/src/opts/SkRasterPipeline_opts.h
-index 695b71434f8c..e2af0b94f392 100644
---- a/gfx/skia/skia/src/opts/SkRasterPipeline_opts.h
-+++ b/gfx/skia/skia/src/opts/SkRasterPipeline_opts.h
-@@ -87,6 +87,8 @@ using NoCtx = const void*;
- #define SKRP_CPU_SCALAR
- #elif defined(SK_ARM_HAS_NEON)
- #define SKRP_CPU_NEON
-+#elif defined(SK_CPU_PPC) && defined(__VSX__)
-+ #define SKRP_CPU_VSX
- #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SKX
- #define SKRP_CPU_SKX
- #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
-@@ -109,6 +111,8 @@ using NoCtx = const void*;
- #include <math.h>
- #elif defined(SKRP_CPU_NEON)
- #include <arm_neon.h>
-+#elif defined(SKRP_CPU_VSX)
-+ #include <altivec.h>
- #elif defined(SKRP_CPU_LASX)
- #include <lasxintrin.h>
- #include <lsxintrin.h>
-@@ -337,6 +341,239 @@ namespace SK_OPTS_NS {
- vst4q_f32(ptr, (float32x4x4_t{{r,g,b,a}}));
- }
-
-+#elif defined(SKRP_CPU_VSX)
-+ // Reuse the file-scope Vec<N,T> defined above. It already handles the
-+ // GCC-vs-Clang divergence (ext_vector_type on Clang; vector_size via
-+ // VecHelper on GCC) and produces the right vector-register-passing ABI
-+ // on PPC64. The vec_* intrinsics in <altivec.h> accept either form.
-+ template <typename T> using V = Vec<4, T>;
-+ using F = V<float >;
-+ using I32 = V< int32_t>;
-+ using U64 = V<uint64_t>;
-+ using U32 = V<uint32_t>;
-+ using U16 = V<uint16_t>;
-+ using U8 = V<uint8_t >;
-+
-+ // We polyfill a few routines that Clang doesn't build into ext_vector_types.
-+ SI F min(F a, F b) { return vec_min(a,b); }
-+ SI I32 min(I32 a, I32 b) { return vec_min(a,b); }
-+ SI U32 min(U32 a, U32 b) { return vec_min(a,b); }
-+ SI F max(F a, F b) { return vec_max(a,b); }
-+ SI I32 max(I32 a, I32 b) { return vec_max(a,b); }
-+ SI U32 max(U32 a, U32 b) { return vec_max(a,b); }
-+
-+ SI F abs_ (F v) { return vec_abs(v); }
-+ SI I32 abs_ (I32 v) { return vec_abs(v); }
-+ SI F rcp_approx(F v) { return vec_re(v); }
-+ SI F rcp_precise (F v) { F e = rcp_approx(v); return e * (2.0f - v * e); }
-+ SI F rsqrt_approx (F v) { return vec_rsqrte(v); }
-+
-+ SI U16 pack(U32 v) { return __builtin_convertvector(v, U16); }
-+ SI U8 pack(U16 v) { return __builtin_convertvector(v, U8); }
-+
-+ SI F if_then_else(I32 c, F t, F e) {
-+ return vec_or((__vector float)vec_and((__vector float)c, (__vector float)t), (__vector float)vec_andc((__vector float)e, (__vector float)c));
-+ }
-+ SI I32 if_then_else(I32 c, I32 t, I32 e) {
-+ return (I32)vec_or((__vector unsigned int)vec_and((__vector unsigned int)c, (__vector unsigned int)t), (__vector unsigned int)vec_andc((__vector unsigned int)e, (__vector unsigned int)c));
-+ }
-+
-+ // In both AltiVec and SSE there is no horizontal element compare, unlike ARM. Fall back to scalar operations here...
-+ SI bool any(I32 c) {
-+ if (vec_extract((U32)c, 0) != 0) return 1;
-+ if (vec_extract((U32)c, 1) != 0) return 1;
-+ if (vec_extract((U32)c, 2) != 0) return 1;
-+ if (vec_extract((U32)c, 3) != 0) return 1;
-+ return 0;
-+ }
-+ SI bool all(I32 c) {
-+ if (vec_extract((U32)c, 0) == 0) return 0;
-+ if (vec_extract((U32)c, 1) == 0) return 0;
-+ if (vec_extract((U32)c, 2) == 0) return 0;
-+ if (vec_extract((U32)c, 3) == 0) return 0;
-+ return 1;
-+ }
-+
-+ SI F mad(F f, F m, F a) { return vec_madd(f,m,a); }
-+ SI F nmad(F f, F m, F a) { return vec_nmsub(f,m,a); }
-+ SI F floor_(F v) { return vec_floor(v); }
-+ SI F ceil_(F v) { return vec_ceil(v); }
-+ SI F sqrt_(F v) { return vec_sqrt(v); }
-+ SI I32 iround(F v) { return vec_cts((__vector float)vec_rint(v), 0); }
-+ SI U32 round(F v) { return vec_ctu((__vector float)vec_rint(v), 0); }
-+ SI U32 round(F v, F scale) { return (U32)vec_cts((__vector float)vec_rint(v*scale), 0); }
-+
-+ template <typename T>
-+ SI V<T> gather(const T* p, U32 ix) {
-+ return V<T>{p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]]};
-+ }
-+ template <typename T>
-+ SI V<T> gather_unaligned(const T* ptr, U32 ix) {
-+ // This tells the compiler ptr might not be aligned appropriately, so
-+ // it generates better assembly.
-+ typedef T __attribute__ ((aligned (1))) unaligned_ptr;
-+ const unaligned_ptr* uptr = static_cast<const unaligned_ptr*>(ptr);
-+ return V<T>{uptr[ix[0]], uptr[ix[1]], uptr[ix[2]], uptr[ix[3]]};
-+ }
-+ template <typename V, typename S>
-+ SI void scatter_masked(V src, S* dst, U32 ix, I32 mask) {
-+ V before = gather(dst, ix);
-+ V after = if_then_else(mask, src, before);
-+ dst[ix[0]] = after[0];
-+ dst[ix[1]] = after[1];
-+ dst[ix[2]] = after[2];
-+ dst[ix[3]] = after[3];
-+ }
-+
-+ // Native VSX/AltiVec ports of the load2/store2/load3/load4/store4 helpers.
-+ // Each uses vec_xl/vec_xst for unaligned 16-byte loads/stores, vec_mergeh/
-+ // vec_mergel for SSE-style epi16/epi32/ps unpack ops, and vec_perm with a
-+ // byte-mask for the SSE shufflelo/shufflehi/shuffle/srli_si128 ops. The
-+ // PPC64 LE register-to-memory byte order matches x86 LE, so the byte-mask
-+ // patterns are identical to the corresponding _mm_setr_epi8 forms.
-+
-+ SI void load2(const uint16_t* ptr, U16* r, U16* g) {
-+ // Load 8 uint16: r0 g0 r1 g1 r2 g2 r3 g3 (in LE memory order).
-+ __vector unsigned char v = vec_xl(0, (const unsigned char*)ptr);
-+ // Extract every-other 16-bit value via vec_perm (high half of result is unused
-+ // but written; sk_unaligned_load below picks up the low 8 bytes).
-+ const __vector unsigned char r_mask = (__vector unsigned char){
-+ 0,1, 4,5, 8,9, 12,13, 0,0,0,0,0,0,0,0
-+ };
-+ const __vector unsigned char g_mask = (__vector unsigned char){
-+ 2,3, 6,7, 10,11, 14,15, 0,0,0,0,0,0,0,0
-+ };
-+ __vector unsigned char R_v = vec_perm(v, v, r_mask);
-+ __vector unsigned char G_v = vec_perm(v, v, g_mask);
-+ *r = sk_unaligned_load<U16>(&R_v);
-+ *g = sk_unaligned_load<U16>(&G_v);
-+ }
-+
-+ SI void store2(uint16_t* ptr, U16 r, U16 g) {
-+ // Interleave: rg = r0 g0 r1 g1 r2 g2 r3 g3.
-+ // r and g are 8-byte vectors; widen to 16 and vec_mergeh on ushort takes
-+ // the low 4 lanes of each.
-+ __vector unsigned short rw = widen_cast<__vector unsigned short>(r);
-+ __vector unsigned short gw = widen_cast<__vector unsigned short>(g);
-+ __vector unsigned short rg = vec_mergeh(rw, gw);
-+ vec_xst((__vector unsigned char)rg, 0, (unsigned char*)ptr);
-+ }
-+
-+ SI void load3(const uint16_t* ptr, U16* r, U16* g, U16* b) {
-+ // 4 pixels x 3 channels x 2 bytes = 24 bytes. Two 16-byte loads with overlap
-+ // avoid reading past the 24-byte source.
-+ __vector unsigned char v01 = vec_xl(0, (const unsigned char*)(ptr + 0));
-+ __vector unsigned char v23_raw = vec_xl(0, (const unsigned char*)(ptr + 4));
-+ const __vector unsigned char zero = vec_splats((unsigned char)0);
-+ // v23 = v23_raw >> 4 bytes (drops the overlapping pixel-1 trailing R).
-+ const __vector unsigned char shift4 = (__vector unsigned char){
-+ 4,5,6,7, 8,9,10,11, 12,13,14,15, 16,16,16,16
-+ };
-+ __vector unsigned char v23 = vec_perm(v23_raw, zero, shift4);
-+ // _N holds R,G,B for pixel N in its lower 3 lanes. shift6 advances to the next pixel.
-+ const __vector unsigned char shift6 = (__vector unsigned char){
-+ 6,7,8,9, 10,11,12,13, 14,15, 16,16, 16,16, 16,16
-+ };
-+ __vector unsigned char _0 = v01;
-+ __vector unsigned char _1 = vec_perm(v01, zero, shift6);
-+ __vector unsigned char _2 = v23;
-+ __vector unsigned char _3 = vec_perm(v23, zero, shift6);
-+ // De-interlace to R,G,B per the SSE flow.
-+ __vector unsigned short _02 = vec_mergeh((__vector unsigned short)_0,
-+ (__vector unsigned short)_2);
-+ __vector unsigned short _13 = vec_mergeh((__vector unsigned short)_1,
-+ (__vector unsigned short)_3);
-+ __vector unsigned short R_v = vec_mergeh(_02, _13);
-+ const __vector unsigned char shift8 = (__vector unsigned char){
-+ 8,9,10,11, 12,13,14,15, 16,16,16,16, 16,16,16,16
-+ };
-+ __vector unsigned char G_v = vec_perm((__vector unsigned char)R_v, zero, shift8);
-+ __vector unsigned short B_v = vec_mergel(_02, _13);
-+ *r = sk_unaligned_load<U16>(&R_v);
-+ *g = sk_unaligned_load<U16>(&G_v);
-+ *b = sk_unaligned_load<U16>(&B_v);
-+ }
-+
-+ SI void load4(const uint16_t* ptr, U16* r, U16* g, U16* b, U16* a) {
-+ __vector unsigned short v01 = (__vector unsigned short)
-+ vec_xl(0, (const unsigned char*)ptr); // r0 g0 b0 a0 r1 g1 b1 a1
-+ __vector unsigned short v23 = (__vector unsigned short)
-+ vec_xl(0, (const unsigned char*)(ptr + 8)); // r2 g2 b2 a2 r3 g3 b3 a3
-+ __vector unsigned short _02 = vec_mergeh(v01, v23); // r0 r2 g0 g2 b0 b2 a0 a2
-+ __vector unsigned short _13 = vec_mergel(v01, v23); // r1 r3 g1 g3 b1 b3 a1 a3
-+ __vector unsigned short rg = vec_mergeh(_02, _13); // r0 r1 r2 r3 g0 g1 g2 g3
-+ __vector unsigned short ba = vec_mergel(_02, _13); // b0 b1 b2 b3 a0 a1 a2 a3
-+ *r = sk_unaligned_load<U16>((const uint16_t*)&rg + 0);
-+ *g = sk_unaligned_load<U16>((const uint16_t*)&rg + 4);
-+ *b = sk_unaligned_load<U16>((const uint16_t*)&ba + 0);
-+ *a = sk_unaligned_load<U16>((const uint16_t*)&ba + 4);
-+ }
-+
-+ SI void store4(uint16_t* ptr, U16 r, U16 g, U16 b, U16 a) {
-+ __vector unsigned short rw = widen_cast<__vector unsigned short>(r);
-+ __vector unsigned short gw = widen_cast<__vector unsigned short>(g);
-+ __vector unsigned short bw = widen_cast<__vector unsigned short>(b);
-+ __vector unsigned short aw = widen_cast<__vector unsigned short>(a);
-+ __vector unsigned short rg = vec_mergeh(rw, gw); // r0 g0 r1 g1 r2 g2 r3 g3
-+ __vector unsigned short ba = vec_mergeh(bw, aw); // b0 a0 b1 a1 b2 a2 b3 a3
-+ // Now interleave 32-bit lanes (each rg pair = 1 lane, each ba pair = 1 lane).
-+ __vector unsigned int rgba_lo = vec_mergeh((__vector unsigned int)rg,
-+ (__vector unsigned int)ba);
-+ __vector unsigned int rgba_hi = vec_mergel((__vector unsigned int)rg,
-+ (__vector unsigned int)ba);
-+ vec_xst((__vector unsigned char)rgba_lo, 0, (unsigned char*)ptr);
-+ vec_xst((__vector unsigned char)rgba_hi, 0, (unsigned char*)(ptr + 8));
-+ }
-+
-+ SI void load2(const float* ptr, F* r, F* g) {
-+ __vector float _01 = vec_xl(0, ptr); // r0 g0 r1 g1
-+ __vector float _23 = vec_xl(0, ptr + 4); // r2 g2 r3 g3
-+ // r = lanes {_01[0], _01[2], _23[0], _23[2]}; g = {_01[1], _01[3], _23[1], _23[3]}.
-+ const __vector unsigned char r_mask = (__vector unsigned char){
-+ 0,1,2,3, 8,9,10,11, 16,17,18,19, 24,25,26,27
-+ };
-+ const __vector unsigned char g_mask = (__vector unsigned char){
-+ 4,5,6,7, 12,13,14,15, 20,21,22,23, 28,29,30,31
-+ };
-+ *r = (F)vec_perm((__vector unsigned char)_01, (__vector unsigned char)_23, r_mask);
-+ *g = (F)vec_perm((__vector unsigned char)_01, (__vector unsigned char)_23, g_mask);
-+ }
-+
-+ SI void store2(float* ptr, F r, F g) {
-+ __vector float _01 = vec_mergeh((__vector float)r, (__vector float)g); // r0 g0 r1 g1
-+ __vector float _23 = vec_mergel((__vector float)r, (__vector float)g); // r2 g2 r3 g3
-+ vec_xst((__vector unsigned char)_01, 0, (unsigned char*)ptr);
-+ vec_xst((__vector unsigned char)_23, 0, (unsigned char*)(ptr + 4));
-+ }
-+
-+ SI void load4(const float* ptr, F* r, F* g, F* b, F* a) {
-+ // 4x4 float matrix transpose: rows -> columns.
-+ __vector float row0 = vec_xl(0, ptr + 0);
-+ __vector float row1 = vec_xl(0, ptr + 4);
-+ __vector float row2 = vec_xl(0, ptr + 8);
-+ __vector float row3 = vec_xl(0, ptr + 12);
-+ __vector float T0 = vec_mergeh(row0, row2); // {row0[0], row2[0], row0[1], row2[1]}
-+ __vector float T1 = vec_mergeh(row1, row3);
-+ __vector float T2 = vec_mergel(row0, row2);
-+ __vector float T3 = vec_mergel(row1, row3);
-+ *r = (F)vec_mergeh(T0, T1); // {row0[0], row1[0], row2[0], row3[0]}
-+ *g = (F)vec_mergel(T0, T1);
-+ *b = (F)vec_mergeh(T2, T3);
-+ *a = (F)vec_mergel(T2, T3);
-+ }
-+
-+ SI void store4(float* ptr, F r, F g, F b, F a) {
-+ // 4x4 float matrix transpose, then store rows.
-+ __vector float T0 = vec_mergeh((__vector float)r, (__vector float)b);
-+ __vector float T1 = vec_mergeh((__vector float)g, (__vector float)a);
-+ __vector float T2 = vec_mergel((__vector float)r, (__vector float)b);
-+ __vector float T3 = vec_mergel((__vector float)g, (__vector float)a);
-+ vec_xst((__vector unsigned char)vec_mergeh(T0, T1), 0, (unsigned char*)(ptr + 0));
-+ vec_xst((__vector unsigned char)vec_mergel(T0, T1), 0, (unsigned char*)(ptr + 4));
-+ vec_xst((__vector unsigned char)vec_mergeh(T2, T3), 0, (unsigned char*)(ptr + 8));
-+ vec_xst((__vector unsigned char)vec_mergel(T2, T3), 0, (unsigned char*)(ptr + 12));
-+ }
-+
- #elif defined(SKRP_CPU_SKX)
- template <typename T> using V = Vec<16, T>;
- using F = V<float >;
-diff --git a/gfx/skia/skia/src/opts/SkSwizzler_opts.inc b/gfx/skia/skia/src/opts/SkSwizzler_opts.inc
-index 671db3f05f61..c578238a9e58 100644
---- a/gfx/skia/skia/src/opts/SkSwizzler_opts.inc
-+++ b/gfx/skia/skia/src/opts/SkSwizzler_opts.inc
-@@ -84,6 +84,29 @@ SI float reciprocal_alpha(float a) {
- auto q = F4{1.0f} / vA;
- return _mm_and_ps(sk_bit_cast<__m128>(vA != F4{0.0f}), q)[0];
- }
-+#elif defined(SK_CPU_PPC) && defined(__VSX__)
-+// -- VSX -- Harden against timing attacks.
-+// vec_splats / vec_div / vec_cmpgt / vec_and each map to a single VSX op on
-+// both GCC and Clang. vec_cmpgt(vA, 0) is exact for the non-negative-alpha
-+// contract (0 <= a) and avoids Clang's static_cast<float>(vector) extension
-+// that GCC does not support.
-+SK_NO_SANITIZE("float-divide-by-zero")
-+SI float reciprocal_alpha_times_255(float a) {
-+ SkASSERT(0 <= a && a <= 255);
-+ __vector float vA = vec_splats(a);
-+ __vector float q = vec_div(vec_splats(255.0f), vA);
-+ __vector float vMask = (__vector float)vec_cmpgt(vA, vec_splats(0.0f));
-+ return vec_and(vMask, q)[0];
-+}
-+
-+SK_NO_SANITIZE("float-divide-by-zero")
-+SI float reciprocal_alpha(float a) {
-+ SkASSERT(0 <= a && a <= 1);
-+ __vector float vA = vec_splats(a);
-+ __vector float q = vec_div(vec_splats(1.0f), vA);
-+ __vector float vMask = (__vector float)vec_cmpgt(vA, vec_splats(0.0f));
-+ return vec_and(vMask, q)[0];
-+}
- #else
- // -- Portable -- *Not* hardened against timing attacks
- SI float reciprocal_alpha_times_255(float a) {
-@@ -1085,6 +1108,208 @@ void rgbA_to_BGRA(uint32_t* dst, const uint32_t* src, int count) {
- rgbA_to_BGRA_portable(dst, src, count);
- }
-
-+#elif defined(SK_CPU_PPC) && defined(__VSX__)
-+// -- VSX -- Native Power VSX/AltiVec ports of the SSSE3 swizzlers below.
-+// Each _mm_* operation is replaced by the corresponding vec_* sequence per
-+// the GCC ppc_wrappers translation pattern (vec_mergeh/l, vec_perm, and the
-+// vec_vmuleuh/vmulouh + permute idiom for _mm_mulhi_epu16). The permute
-+// masks for byte-shuffles use the same byte-order layout as the SSE
-+// _mm_setr_epi8 forms because PPC64 LE register-to-memory byte order is the
-+// same as x86 LE.
-+
-+// Scale: ((x*y) + 128) * 257 >> 16, per 16-bit lane (matches the SSSE3 form).
-+static inline __vector unsigned short scale(__vector unsigned short x, __vector unsigned short y) {
-+ const __vector unsigned short v128 = vec_splats((unsigned short)128);
-+ const __vector unsigned short v257 = vec_splats((unsigned short)257);
-+ __vector unsigned short summ = (__vector unsigned short)((__vector unsigned short)(x * y) + v128);
-+ // _mm_mulhi_epu16 equivalent: 16x16 -> high 16 bits, via mule+mulo+permute.
-+ __vector unsigned int even = vec_vmuleuh(summ, v257);
-+ __vector unsigned int odd = vec_vmulouh(summ, v257);
-+ const __vector unsigned char xform = (__vector unsigned char){
-+ 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17,
-+ 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F
-+ };
-+ return (__vector unsigned short)vec_perm((__vector unsigned char)even,
-+ (__vector unsigned char)odd, xform);
-+}
-+
-+static void premul_should_swapRB(bool kSwapRB, uint32_t* dst, const uint32_t* src, int count) {
-+ auto premul8 = [=](__vector unsigned char* lo, __vector unsigned char* hi) {
-+ const __vector unsigned char zeros = (__vector unsigned char){0};
-+ const __vector unsigned char planar = kSwapRB
-+ ? (__vector unsigned char){2,6,10,14, 1,5,9,13, 0,4,8,12, 3,7,11,15}
-+ : (__vector unsigned char){0,4,8,12, 1,5,9,13, 2,6,10,14, 3,7,11,15};
-+
-+ // Swizzle each 16-byte chunk into 8-bit planar layout.
-+ *lo = vec_perm(*lo, *lo, planar); // rrrrgggg bbbbaaaa
-+ *hi = vec_perm(*hi, *hi, planar); // RRRRGGGG BBBBAAAA
-+
-+ // Interleave the two halves at 32-bit granularity.
-+ __vector unsigned char rg = (__vector unsigned char)
-+ vec_mergeh((__vector unsigned int)*lo, (__vector unsigned int)*hi); // rrrrRRRR ggggGGGG
-+ __vector unsigned char ba = (__vector unsigned char)
-+ vec_mergel((__vector unsigned int)*lo, (__vector unsigned int)*hi); // bbbbBBBB aaaaAAAA
-+
-+ // Unpack to 16-bit planar.
-+ __vector unsigned short r = (__vector unsigned short)vec_mergeh(rg, zeros);
-+ __vector unsigned short g = (__vector unsigned short)vec_mergel(rg, zeros);
-+ __vector unsigned short b = (__vector unsigned short)vec_mergeh(ba, zeros);
-+ __vector unsigned short a = (__vector unsigned short)vec_mergel(ba, zeros);
-+
-+ // Premultiply each colour channel by alpha.
-+ r = scale(r, a);
-+ g = scale(g, a);
-+ b = scale(b, a);
-+
-+ // Repack into interlaced pixels.
-+ const __vector unsigned short v8 = vec_splats((unsigned short)8);
-+ __vector unsigned short rg2 = vec_or(r, vec_sl(g, v8));
-+ __vector unsigned short ba2 = vec_or(b, vec_sl(a, v8));
-+ *lo = (__vector unsigned char)vec_mergeh(rg2, ba2);
-+ *hi = (__vector unsigned char)vec_mergel(rg2, ba2);
-+ };
-+
-+ while (count >= 8) {
-+ __vector unsigned char lo = vec_xl(0, (const unsigned char*)(src + 0));
-+ __vector unsigned char hi = vec_xl(0, (const unsigned char*)(src + 4));
-+ premul8(&lo, &hi);
-+ vec_xst(lo, 0, (unsigned char*)(dst + 0));
-+ vec_xst(hi, 0, (unsigned char*)(dst + 4));
-+ src += 8; dst += 8; count -= 8;
-+ }
-+
-+ if (count >= 4) {
-+ __vector unsigned char lo = vec_xl(0, (const unsigned char*)src);
-+ __vector unsigned char hi = (__vector unsigned char){0};
-+ premul8(&lo, &hi);
-+ vec_xst(lo, 0, (unsigned char*)dst);
-+ src += 4; dst += 4; count -= 4;
-+ }
-+
-+ auto proc = kSwapRB ? RGBA_to_bgrA_portable : RGBA_to_rgbA_portable;
-+ proc(dst, src, count);
-+}
-+
-+void RGBA_to_rgbA(uint32_t* dst, const uint32_t* src, int count) {
-+ premul_should_swapRB(false, dst, src, count);
-+}
-+
-+void RGBA_to_bgrA(uint32_t* dst, const uint32_t* src, int count) {
-+ premul_should_swapRB(true, dst, src, count);
-+}
-+
-+void RGBA_to_BGRA(uint32_t* dst, const uint32_t* src, int count) {
-+ const __vector unsigned char swapRB = (__vector unsigned char){
-+ 2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15
-+ };
-+ while (count >= 4) {
-+ __vector unsigned char rgba = vec_xl(0, (const unsigned char*)src);
-+ __vector unsigned char bgra = vec_perm(rgba, rgba, swapRB);
-+ vec_xst(bgra, 0, (unsigned char*)dst);
-+ src += 4; dst += 4; count -= 4;
-+ }
-+ RGBA_to_BGRA_portable(dst, src, count);
-+}
-+
-+void grayA_to_RGBA(uint32_t dst[], const uint8_t* src, int count) {
-+ while (count >= 8) {
-+ __vector unsigned short ga = (__vector unsigned short)vec_xl(0, src);
-+ __vector unsigned short gg = vec_or(
-+ vec_and(ga, vec_splats((unsigned short)0x00FF)),
-+ vec_sl (ga, vec_splats((unsigned short)8)));
-+ __vector unsigned short ggga_lo = vec_mergeh(gg, ga);
-+ __vector unsigned short ggga_hi = vec_mergel(gg, ga);
-+ vec_xst((__vector unsigned char)ggga_lo, 0, (unsigned char*)(dst + 0));
-+ vec_xst((__vector unsigned char)ggga_hi, 0, (unsigned char*)(dst + 4));
-+ src += 8 * 2; dst += 8; count -= 8;
-+ }
-+ grayA_to_RGBA_portable(dst, src, count);
-+}
-+
-+void grayA_to_rgbA(uint32_t dst[], const uint8_t* src, int count) {
-+ while (count >= 8) {
-+ __vector unsigned short grayA = (__vector unsigned short)vec_xl(0, src);
-+ __vector unsigned short g0 = vec_and(grayA, vec_splats((unsigned short)0x00FF));
-+ __vector unsigned short a0 = vec_sr (grayA, vec_splats((unsigned short)8));
-+ g0 = scale(g0, a0);
-+ const __vector unsigned short v8 = vec_splats((unsigned short)8);
-+ __vector unsigned short gg = vec_or(g0, vec_sl(g0, v8));
-+ __vector unsigned short ga = vec_or(g0, vec_sl(a0, v8));
-+ __vector unsigned short ggga_lo = vec_mergeh(gg, ga);
-+ __vector unsigned short ggga_hi = vec_mergel(gg, ga);
-+ vec_xst((__vector unsigned char)ggga_lo, 0, (unsigned char*)(dst + 0));
-+ vec_xst((__vector unsigned char)ggga_hi, 0, (unsigned char*)(dst + 4));
-+ src += 8 * 2; dst += 8; count -= 8;
-+ }
-+ grayA_to_rgbA_portable(dst, src, count);
-+}
-+
-+enum Format { kRGB1, kBGR1 };
-+static void inverted_cmyk_to(Format format, uint32_t* dst, const uint32_t* src, int count) {
-+ auto convert8 = [=](__vector unsigned char* lo, __vector unsigned char* hi) {
-+ const __vector unsigned char zeros = (__vector unsigned char){0};
-+ const __vector unsigned char planar = (kBGR1 == format)
-+ ? (__vector unsigned char){2,6,10,14, 1,5,9,13, 0,4,8,12, 3,7,11,15}
-+ : (__vector unsigned char){0,4,8,12, 1,5,9,13, 2,6,10,14, 3,7,11,15};
-+
-+ *lo = vec_perm(*lo, *lo, planar); // ccccmmmm yyyykkkk
-+ *hi = vec_perm(*hi, *hi, planar); // CCCCMMMM YYYYKKKK
-+ __vector unsigned char cm = (__vector unsigned char)
-+ vec_mergeh((__vector unsigned int)*lo, (__vector unsigned int)*hi);
-+ __vector unsigned char yk = (__vector unsigned char)
-+ vec_mergel((__vector unsigned int)*lo, (__vector unsigned int)*hi);
-+
-+ __vector unsigned short c = (__vector unsigned short)vec_mergeh(cm, zeros);
-+ __vector unsigned short m = (__vector unsigned short)vec_mergel(cm, zeros);
-+ __vector unsigned short y = (__vector unsigned short)vec_mergeh(yk, zeros);
-+ __vector unsigned short k = (__vector unsigned short)vec_mergel(yk, zeros);
-+
-+ __vector unsigned short r = scale(c, k);
-+ __vector unsigned short g = scale(m, k);
-+ __vector unsigned short b = scale(y, k);
-+
-+ const __vector unsigned short v8 = vec_splats((unsigned short)8);
-+ __vector unsigned short rg = vec_or(r, vec_sl(g, v8));
-+ __vector unsigned short ba = vec_or(b, vec_splats((unsigned short)0xFF00));
-+ *lo = (__vector unsigned char)vec_mergeh(rg, ba);
-+ *hi = (__vector unsigned char)vec_mergel(rg, ba);
-+ };
-+
-+ while (count >= 8) {
-+ __vector unsigned char lo = vec_xl(0, (const unsigned char*)(src + 0));
-+ __vector unsigned char hi = vec_xl(0, (const unsigned char*)(src + 4));
-+ convert8(&lo, &hi);
-+ vec_xst(lo, 0, (unsigned char*)(dst + 0));
-+ vec_xst(hi, 0, (unsigned char*)(dst + 4));
-+ src += 8; dst += 8; count -= 8;
-+ }
-+ if (count >= 4) {
-+ __vector unsigned char lo = vec_xl(0, (const unsigned char*)src);
-+ __vector unsigned char hi = (__vector unsigned char){0};
-+ convert8(&lo, &hi);
-+ vec_xst(lo, 0, (unsigned char*)dst);
-+ src += 4; dst += 4; count -= 4;
-+ }
-+ auto proc = (kBGR1 == format) ? inverted_CMYK_to_BGR1_portable : inverted_CMYK_to_RGB1_portable;
-+ proc(dst, src, count);
-+}
-+
-+void inverted_CMYK_to_RGB1(uint32_t dst[], const uint32_t* src, int count) {
-+ inverted_cmyk_to(kRGB1, dst, src, count);
-+}
-+
-+void inverted_CMYK_to_BGR1(uint32_t dst[], const uint32_t* src, int count) {
-+ inverted_cmyk_to(kBGR1, dst, src, count);
-+}
-+
-+void rgbA_to_RGBA(uint32_t* dst, const uint32_t* src, int count) {
-+ rgbA_to_RGBA_portable(dst, src, count);
-+}
-+
-+void rgbA_to_BGRA(uint32_t* dst, const uint32_t* src, int count) {
-+ rgbA_to_BGRA_portable(dst, src, count);
-+}
-+
- #elif SK_CPU_LSX_LEVEL >= SK_CPU_LSX_LEVEL_LASX
- // -- LASX ----------------------------------------------------------------------------------------
-
-@@ -1736,6 +1961,39 @@ static void gray_to_RGB1_portable(uint32_t dst[], const uint8_t* src, int count)
- }
- gray_to_RGB1_portable(dst, src, count);
- }
-+#elif defined(SK_CPU_PPC) && defined(__VSX__)
-+ void gray_to_RGB1(uint32_t dst[], const uint8_t* src, int count) {
-+ const __vector unsigned char alphas = vec_splats((unsigned char)0xFF);
-+ while (count >= 16) {
-+ __vector unsigned char grays = vec_xl(0, src);
-+
-+ // Replicate gray byte: gg = unpack(gray, gray) per 8-bit lane.
-+ __vector unsigned char gg_lo = vec_mergeh(grays, grays);
-+ __vector unsigned char gg_hi = vec_mergel(grays, grays);
-+ __vector unsigned char ga_lo = vec_mergeh(grays, alphas);
-+ __vector unsigned char ga_hi = vec_mergel(grays, alphas);
-+
-+ // Interleave g-pairs and ga-pairs at 16-bit granularity.
-+ __vector unsigned short ggga0 = vec_mergeh((__vector unsigned short)gg_lo,
-+ (__vector unsigned short)ga_lo);
-+ __vector unsigned short ggga1 = vec_mergel((__vector unsigned short)gg_lo,
-+ (__vector unsigned short)ga_lo);
-+ __vector unsigned short ggga2 = vec_mergeh((__vector unsigned short)gg_hi,
-+ (__vector unsigned short)ga_hi);
-+ __vector unsigned short ggga3 = vec_mergel((__vector unsigned short)gg_hi,
-+ (__vector unsigned short)ga_hi);
-+
-+ vec_xst((__vector unsigned char)ggga0, 0, (unsigned char*)(dst + 0));
-+ vec_xst((__vector unsigned char)ggga1, 0, (unsigned char*)(dst + 4));
-+ vec_xst((__vector unsigned char)ggga2, 0, (unsigned char*)(dst + 8));
-+ vec_xst((__vector unsigned char)ggga3, 0, (unsigned char*)(dst + 12));
-+
-+ src += 16;
-+ dst += 16;
-+ count -= 16;
-+ }
-+ gray_to_RGB1_portable(dst, src, count);
-+ }
- #elif SK_CPU_LSX_LEVEL >= SK_CPU_LSX_LEVEL_LASX
- /*not static*/ inline void gray_to_RGB1(uint32_t dst[], const uint8_t* src, int count) {
- const __m256i alphas = __lasx_xvreplgr2vr_b(0xFF);
-@@ -1920,6 +2178,37 @@ static void RGB_to_BGR1_portable(uint32_t dst[], const uint8_t* src, int count)
- proc(dst, src, count);
- }
-
-+ void RGB_to_RGB1(uint32_t dst[], const uint8_t* src, int count) {
-+ insert_alpha_should_swaprb(false, dst, src, count);
-+ }
-+ void RGB_to_BGR1(uint32_t dst[], const uint8_t* src, int count) {
-+ insert_alpha_should_swaprb(true, dst, src, count);
-+ }
-+#elif defined(SK_CPU_PPC) && defined(__VSX__)
-+ static void insert_alpha_should_swaprb(bool kSwapRB,
-+ uint32_t dst[], const uint8_t* src, int count) {
-+ // alphaMask = 0xFF000000 per 32-bit lane -> bytes (in LE memory layout) are
-+ // {00,00,00,FF, 00,00,00,FF, ...}.
-+ const __vector unsigned char alphaMask = (__vector unsigned char){
-+ 0,0,0,0xFF, 0,0,0,0xFF, 0,0,0,0xFF, 0,0,0,0xFF
-+ };
-+ // 'X' (= 0) is irrelevant: the alphaMask OR overwrites those lanes with FF.
-+ const __vector unsigned char expand = kSwapRB
-+ ? (__vector unsigned char){2,1,0,0, 5,4,3,0, 8,7,6,0, 11,10,9,0}
-+ : (__vector unsigned char){0,1,2,0, 3,4,5,0, 6,7,8,0, 9,10,11,0};
-+
-+ while (count >= 6) {
-+ __vector unsigned char rgb = vec_xl(0, src);
-+ __vector unsigned char rgba = vec_or(vec_perm(rgb, rgb, expand), alphaMask);
-+ vec_xst(rgba, 0, (unsigned char*)dst);
-+ src += 4*3;
-+ dst += 4;
-+ count -= 4;
-+ }
-+ auto proc = kSwapRB ? RGB_to_BGR1_portable : RGB_to_RGB1_portable;
-+ proc(dst, src, count);
-+ }
-+
- void RGB_to_RGB1(uint32_t dst[], const uint8_t* src, int count) {
- insert_alpha_should_swaprb(false, dst, src, count);
- }
-
-base-commit: a8d530ac13f0ce7e937c047f01f0d36764f5d34e
---
-2.52.0
-
diff --git a/0002-Add-VSX-instructions-for-libwebp.patch b/0002-Add-VSX-instructions-for-libwebp.patch
deleted file mode 100644
index 1f857a7..0000000
--- a/0002-Add-VSX-instructions-for-libwebp.patch
+++ /dev/null
@@ -1,2524 +0,0 @@
-From b9e116898830a0f9edd1b0566651ce2d4989618d Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Trung=20L=C3=AA?= <8@tle.id.au>
-Date: Fri, 12 Jun 2026 15:30:13 +1000
-Subject: [PATCH 2/3] Add VSX instructions for libwebp
-
-Assisted-by: Lance Albertson <lance@osuosl.org>
-Assisted-by: Thushan Fernando <thushan@thushanfernando.com>
----
- media/libwebp/src/dsp/alpha_processing.c | 6 +
- media/libwebp/src/dsp/alpha_processing_vsx.c | 246 +++++++
- media/libwebp/src/dsp/cpu.h | 14 +-
- media/libwebp/src/dsp/dec.c | 6 +
- media/libwebp/src/dsp/dec_vsx.c | 737 +++++++++++++++++++
- media/libwebp/src/dsp/filters.c | 6 +
- media/libwebp/src/dsp/filters_vsx.c | 162 ++++
- media/libwebp/src/dsp/lossless.c | 6 +
- media/libwebp/src/dsp/lossless_vsx.c | 449 +++++++++++
- media/libwebp/src/dsp/moz.build | 14 +
- media/libwebp/src/dsp/rescaler.c | 6 +
- media/libwebp/src/dsp/rescaler_vsx.c | 201 +++++
- media/libwebp/src/dsp/upsampling.c | 12 +
- media/libwebp/src/dsp/upsampling_vsx.c | 151 ++++
- media/libwebp/src/dsp/yuv.c | 6 +
- media/libwebp/src/dsp/yuv.h | 21 +
- media/libwebp/src/dsp/yuv_vsx.c | 206 ++++++
- media/libwebp/src/moz/cpu.cpp | 4 +
- 18 files changed, 2252 insertions(+), 1 deletion(-)
- create mode 100644 media/libwebp/src/dsp/alpha_processing_vsx.c
- create mode 100644 media/libwebp/src/dsp/dec_vsx.c
- create mode 100644 media/libwebp/src/dsp/filters_vsx.c
- create mode 100644 media/libwebp/src/dsp/lossless_vsx.c
- create mode 100644 media/libwebp/src/dsp/rescaler_vsx.c
- create mode 100644 media/libwebp/src/dsp/upsampling_vsx.c
- create mode 100644 media/libwebp/src/dsp/yuv_vsx.c
-
-diff --git a/media/libwebp/src/dsp/alpha_processing.c b/media/libwebp/src/dsp/alpha_processing.c
-index 4927e73e81bf..5f9152bf701a 100644
---- a/media/libwebp/src/dsp/alpha_processing.c
-+++ b/media/libwebp/src/dsp/alpha_processing.c
-@@ -434,6 +434,7 @@ extern void WebPInitAlphaProcessingMIPSdspR2(void);
- extern void WebPInitAlphaProcessingSSE2(void);
- extern void WebPInitAlphaProcessingSSE41(void);
- extern void WebPInitAlphaProcessingNEON(void);
-+extern void WebPInitAlphaProcessingVSX(void);
-
- WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) {
- WebPMultARGBRow = WebPMultARGBRow_C;
-@@ -472,6 +473,11 @@ WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) {
- if (VP8GetCPUInfo(kMIPSdspR2)) {
- WebPInitAlphaProcessingMIPSdspR2();
- }
-+#endif
-+#if defined(WEBP_HAVE_VSX)
-+ if (VP8GetCPUInfo(kVSX)) {
-+ WebPInitAlphaProcessingVSX();
-+ }
- #endif
- }
-
-diff --git a/media/libwebp/src/dsp/alpha_processing_vsx.c b/media/libwebp/src/dsp/alpha_processing_vsx.c
-new file mode 100644
-index 000000000000..2aad1cd8b648
---- /dev/null
-+++ b/media/libwebp/src/dsp/alpha_processing_vsx.c
-@@ -0,0 +1,246 @@
-+// Copyright 2014 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// VSX (PowerPC) version of alpha processing functions.
-+
-+#include "src/dsp/dsp.h"
-+
-+#if defined(WEBP_USE_VSX)
-+
-+#include <altivec.h>
-+#include <string.h>
-+
-+#include "src/dsp/cpu.h"
-+#include "src/webp/types.h"
-+
-+typedef __vector unsigned char u8x16;
-+typedef __vector unsigned short u16x8;
-+typedef __vector signed short i16x8;
-+typedef __vector unsigned int u32x4;
-+typedef __vector signed int i32x4;
-+
-+//------------------------------------------------------------------------------
-+// Alpha dispatch / extraction.
-+
-+static int DispatchAlpha_VSX(const uint8_t* WEBP_RESTRICT alpha,
-+ int alpha_stride, int width, int height,
-+ uint8_t* WEBP_RESTRICT dst, int dst_stride) {
-+ uint32_t alpha_and = 0xff;
-+ int i, j, k;
-+ const u8x16 zero = vec_splats((unsigned char)0);
-+ const u16x8 z16 = vec_splats((unsigned short)0);
-+ const u32x4 a_mask = vec_splats((uint32_t)0xff); // selects the low byte
-+ u8x16 all_and = vec_splats((unsigned char)0xff);
-+ const int limit = width & ~15;
-+
-+ for (j = 0; j < height; ++j) {
-+ uint8_t* ptr = dst;
-+ for (i = 0; i < limit; i += 16) {
-+ const u8x16 a0 = vec_xl(0, (unsigned char*)&alpha[i]);
-+ // Spread the 16 alpha bytes to the low byte of 16 32-bit lanes.
-+ const u16x8 a1_lo = (u16x8)vec_mergeh(a0, zero);
-+ const u16x8 a1_hi = (u16x8)vec_mergel(a0, zero);
-+ const u32x4 s0 = (u32x4)vec_mergeh(a1_lo, z16);
-+ const u32x4 s1 = (u32x4)vec_mergel(a1_lo, z16);
-+ const u32x4 s2 = (u32x4)vec_mergeh(a1_hi, z16);
-+ const u32x4 s3 = (u32x4)vec_mergel(a1_hi, z16);
-+ const u32x4* spread[4] = {&s0, &s1, &s2, &s3};
-+ for (k = 0; k < 4; ++k) {
-+ const u32x4 d = vec_xl(0, (uint32_t*)(ptr + 16 * k));
-+ vec_xst(vec_sel(d, *spread[k], a_mask), 0, (uint32_t*)(ptr + 16 * k));
-+ }
-+ all_and = vec_and(all_and, a0);
-+ ptr += 64;
-+ }
-+ for (; i < width; ++i) {
-+ const uint32_t alpha_value = alpha[i];
-+ dst[4 * i] = alpha_value;
-+ alpha_and &= alpha_value;
-+ }
-+ alpha += alpha_stride;
-+ dst += dst_stride;
-+ }
-+ {
-+ unsigned char tmp[16];
-+ memcpy(tmp, &all_and, 16);
-+ for (k = 0; k < 16; ++k) alpha_and &= tmp[k];
-+ }
-+ return (alpha_and != 0xff);
-+}
-+
-+static void DispatchAlphaToGreen_VSX(const uint8_t* WEBP_RESTRICT alpha,
-+ int alpha_stride, int width, int height,
-+ uint32_t* WEBP_RESTRICT dst,
-+ int dst_stride) {
-+ int i, j;
-+ const u8x16 zero = vec_splats((unsigned char)0);
-+ const u16x8 z16 = vec_splats((unsigned short)0);
-+ const int limit = width & ~15;
-+ for (j = 0; j < height; ++j) {
-+ for (i = 0; i < limit; i += 16) {
-+ const u8x16 a0 = vec_xl(0, (unsigned char*)&alpha[i]);
-+ // Place each alpha byte into the green slot (<< 8) of a 32-bit lane.
-+ const u16x8 a1_lo = (u16x8)vec_mergeh(zero, a0); // note the 'zero' first
-+ const u16x8 a1_hi = (u16x8)vec_mergel(zero, a0);
-+ const u32x4 g0 = (u32x4)vec_mergeh(a1_lo, z16);
-+ const u32x4 g1 = (u32x4)vec_mergel(a1_lo, z16);
-+ const u32x4 g2 = (u32x4)vec_mergeh(a1_hi, z16);
-+ const u32x4 g3 = (u32x4)vec_mergel(a1_hi, z16);
-+ vec_xst(g0, 0, &dst[i + 0]);
-+ vec_xst(g1, 0, &dst[i + 4]);
-+ vec_xst(g2, 0, &dst[i + 8]);
-+ vec_xst(g3, 0, &dst[i + 12]);
-+ }
-+ for (; i < width; ++i) dst[i] = alpha[i] << 8;
-+ alpha += alpha_stride;
-+ dst += dst_stride;
-+ }
-+}
-+
-+static int ExtractAlpha_VSX(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
-+ int width, int height, uint8_t* WEBP_RESTRICT alpha,
-+ int alpha_stride) {
-+ uint32_t alpha_and = 0xff;
-+ int i, j, k;
-+ const u32x4 a_mask = vec_splats((uint32_t)0xff); // keeps the low byte
-+ u8x16 all_and = vec_splats((unsigned char)0xff);
-+ const int limit = width & ~7;
-+
-+ for (j = 0; j < height; ++j) {
-+ const uint32_t* src = (const uint32_t*)argb;
-+ for (i = 0; i < limit; i += 8) {
-+ const u32x4 a0 = vec_and(vec_xl(0, (uint32_t*)(src + 0)), a_mask);
-+ const u32x4 a1 = vec_and(vec_xl(0, (uint32_t*)(src + 4)), a_mask);
-+ const i16x8 c0 = vec_packs((i32x4)a0, (i32x4)a1);
-+ const u8x16 d0 = vec_packsu(c0, c0); // 8 alpha bytes in the low half
-+ memcpy(&alpha[i], &d0, 8);
-+ all_and = vec_and(all_and, d0);
-+ src += 8;
-+ }
-+ for (; i < width; ++i) {
-+ const uint32_t alpha_value = argb[4 * i];
-+ alpha[i] = alpha_value;
-+ alpha_and &= alpha_value;
-+ }
-+ argb += argb_stride;
-+ alpha += alpha_stride;
-+ }
-+ {
-+ unsigned char tmp[16];
-+ memcpy(tmp, &all_and, 16);
-+ for (k = 0; k < 8; ++k) alpha_and &= tmp[k];
-+ }
-+ return (alpha_and == 0xff);
-+}
-+
-+static void ExtractGreen_VSX(const uint32_t* WEBP_RESTRICT argb,
-+ uint8_t* WEBP_RESTRICT alpha, int size) {
-+ int i;
-+ const u32x4 mask = vec_splats((uint32_t)0xff);
-+ const u32x4 sh8 = vec_splats((uint32_t)8);
-+ for (i = 0; i + 16 <= size; i += 16) {
-+ const u32x4 a0 =
-+ vec_and(vec_sr(vec_xl(0, (uint32_t*)(argb + i + 0)), sh8), mask);
-+ const u32x4 a1 =
-+ vec_and(vec_sr(vec_xl(0, (uint32_t*)(argb + i + 4)), sh8), mask);
-+ const u32x4 a2 =
-+ vec_and(vec_sr(vec_xl(0, (uint32_t*)(argb + i + 8)), sh8), mask);
-+ const u32x4 a3 =
-+ vec_and(vec_sr(vec_xl(0, (uint32_t*)(argb + i + 12)), sh8), mask);
-+ const i16x8 d0 = vec_packs((i32x4)a0, (i32x4)a1);
-+ const i16x8 d1 = vec_packs((i32x4)a2, (i32x4)a3);
-+ const u8x16 e = vec_packsu(d0, d1);
-+ vec_xst(e, 0, &alpha[i]);
-+ }
-+ for (; i < size; ++i) alpha[i] = argb[i] >> 8;
-+}
-+
-+//------------------------------------------------------------------------------
-+// Premultiply.
-+
-+#define MULTIPLIER(a) ((a) * 32897U)
-+#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
-+
-+// Spreads the alpha lane across r/g/b and inserts 0xff in the alpha lane, for
-+// the two pixels packed in a 16-bit-per-channel vector. Built against the
-+// little-endian byte order; src is the channel vector, the second operand is
-+// an all-0xff vector.
-+static const u8x16 kSpreadAlphaLast = {6, 7, 6, 7, 6, 7, 16, 7,
-+ 14, 15, 14, 15, 14, 15, 16, 15};
-+static const u8x16 kSpreadAlphaFirst = {16, 1, 0, 1, 0, 1, 0, 1,
-+ 16, 9, 8, 9, 8, 9, 8, 9};
-+
-+static WEBP_INLINE u16x8 MulHi16(u16x8 a, u16x8 b) {
-+ const u32x4 sh = vec_splats((unsigned int)16);
-+ const u32x4 e = vec_sr(vec_mule(a, b), sh);
-+ const u32x4 o = vec_sr(vec_mulo(a, b), sh);
-+ return vec_pack(vec_mergeh(e, o), vec_mergel(e, o));
-+}
-+
-+static void ApplyAlphaMultiply_VSX(uint8_t* rgba, int alpha_first, int w, int h,
-+ int stride) {
-+ const u8x16 zero = vec_splats((unsigned char)0);
-+ const u8x16 allff = vec_splats((unsigned char)0xff);
-+ const u16x8 z16 = vec_splats((unsigned short)0);
-+ const u16x8 kMult = vec_splats((unsigned short)0x8081);
-+ const u16x8 sh7 = vec_splats((unsigned short)7);
-+ const u8x16 ctrl = alpha_first ? kSpreadAlphaFirst : kSpreadAlphaLast;
-+ const int kSpan = 4;
-+ while (h-- > 0) {
-+ uint8_t* const rgbx = rgba;
-+ int i;
-+ for (i = 0; i + kSpan <= w; i += kSpan) {
-+ const u8x16 argb0 = vec_xl(0, (unsigned char*)(rgbx + 4 * i));
-+ const u16x8 lo = (u16x8)vec_mergeh(argb0, zero);
-+ const u16x8 hi = (u16x8)vec_mergel(argb0, zero);
-+ const u16x8 a_lo = (u16x8)vec_perm((u8x16)lo, allff, ctrl);
-+ const u16x8 a_hi = (u16x8)vec_perm((u8x16)hi, allff, ctrl);
-+ const u16x8 A0lo = vec_mladd(a_lo, lo, z16);
-+ const u16x8 A0hi = vec_mladd(a_hi, hi, z16);
-+ const u16x8 A2lo = vec_sr(MulHi16(A0lo, kMult), sh7);
-+ const u16x8 A2hi = vec_sr(MulHi16(A0hi, kMult), sh7);
-+ const u8x16 out = vec_packsu((i16x8)A2lo, (i16x8)A2hi);
-+ vec_xst(out, 0, (unsigned char*)(rgbx + 4 * i));
-+ }
-+ // Finish with left-overs.
-+ for (; i < w; ++i) {
-+ uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
-+ const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
-+ const uint32_t a = alpha[4 * i];
-+ if (a != 0xff) {
-+ const uint32_t mult = MULTIPLIER(a);
-+ rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
-+ rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
-+ rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
-+ }
-+ }
-+ rgba += stride;
-+ }
-+}
-+
-+#undef MULTIPLIER
-+#undef PREMULTIPLY
-+
-+//------------------------------------------------------------------------------
-+
-+extern void WebPInitAlphaProcessingVSX(void);
-+
-+WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingVSX(void) {
-+ WebPApplyAlphaMultiply = ApplyAlphaMultiply_VSX;
-+ WebPDispatchAlpha = DispatchAlpha_VSX;
-+ WebPDispatchAlphaToGreen = DispatchAlphaToGreen_VSX;
-+ WebPExtractAlpha = ExtractAlpha_VSX;
-+ WebPExtractGreen = ExtractGreen_VSX;
-+}
-+
-+#else // !WEBP_USE_VSX
-+
-+WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingVSX)
-+
-+#endif // WEBP_USE_VSX
-diff --git a/media/libwebp/src/dsp/cpu.h b/media/libwebp/src/dsp/cpu.h
-index 17c4db971c7f..d1d4b3127c84 100644
---- a/media/libwebp/src/dsp/cpu.h
-+++ b/media/libwebp/src/dsp/cpu.h
-@@ -154,6 +154,17 @@
- #define WEBP_USE_MSA
- #endif
-
-+//------------------------------------------------------------------------------
-+// PPC64 / Power VSX (ISA 2.07 / POWER8 baseline).
-+
-+#if defined(__powerpc64__) && defined(__VSX__)
-+#define WEBP_USE_VSX
-+#endif
-+
-+#if defined(WEBP_USE_VSX) && !defined(WEBP_HAVE_VSX)
-+#define WEBP_HAVE_VSX
-+#endif
-+
- //------------------------------------------------------------------------------
-
- #ifndef WEBP_DSP_OMIT_C_CODE
-@@ -308,7 +319,8 @@ typedef enum {
- kNEON,
- kMIPS32,
- kMIPSdspR2,
-- kMSA
-+ kMSA,
-+ kVSX
- } CPUFeature;
-
- // returns true if the CPU supports the feature.
-diff --git a/media/libwebp/src/dsp/dec.c b/media/libwebp/src/dsp/dec.c
-index 4f38309980ce..f34276ba7316 100644
---- a/media/libwebp/src/dsp/dec.c
-+++ b/media/libwebp/src/dsp/dec.c
-@@ -752,6 +752,7 @@ extern void VP8DspInitNEON(void);
- extern void VP8DspInitMIPS32(void);
- extern void VP8DspInitMIPSdspR2(void);
- extern void VP8DspInitMSA(void);
-+extern void VP8DspInitVSX(void);
-
- WEBP_DSP_INIT_FUNC(VP8DspInit) {
- VP8InitClipTables();
-@@ -843,6 +844,11 @@ WEBP_DSP_INIT_FUNC(VP8DspInit) {
- if (VP8GetCPUInfo(kMSA)) {
- VP8DspInitMSA();
- }
-+#endif
-+#if defined(WEBP_HAVE_VSX)
-+ if (VP8GetCPUInfo(kVSX)) {
-+ VP8DspInitVSX();
-+ }
- #endif
- }
-
-diff --git a/media/libwebp/src/dsp/dec_vsx.c b/media/libwebp/src/dsp/dec_vsx.c
-new file mode 100644
-index 000000000000..e0c1cbc3b71b
---- /dev/null
-+++ b/media/libwebp/src/dsp/dec_vsx.c
-@@ -0,0 +1,737 @@
-+// Copyright 2011 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// VSX (PowerPC) version of decoding functions.
-+
-+#include "src/dsp/dsp.h"
-+
-+#if defined(WEBP_USE_VSX)
-+
-+#include <altivec.h>
-+#include <string.h>
-+
-+typedef __vector signed short i16x8;
-+typedef __vector unsigned short u16x8;
-+typedef __vector signed int i32x4;
-+typedef __vector unsigned int u32x4;
-+typedef __vector unsigned char u8x16;
-+typedef __vector signed char i8x16;
-+typedef __vector signed long long i64x2;
-+
-+// Signed multiply-high of packed 16-bit lanes (POWER8 has no vmulhsh).
-+static WEBP_INLINE i16x8 MulHi16_S(i16x8 a, i16x8 b) {
-+ const u32x4 sh = vec_splats((unsigned int)16);
-+ const i32x4 e = vec_sra(vec_mule(a, b), sh);
-+ const i32x4 o = vec_sra(vec_mulo(a, b), sh);
-+ return (i16x8)vec_pack(vec_mergeh(e, o), vec_mergel(e, o));
-+}
-+
-+// Transpose two interleaved 4x4 blocks of 16-bit values.
-+static WEBP_INLINE void Transpose2_4x4(i16x8 in0, i16x8 in1, i16x8 in2,
-+ i16x8 in3, i16x8* out0, i16x8* out1,
-+ i16x8* out2, i16x8* out3) {
-+ const i16x8 t0 = (i16x8)vec_mergeh(in0, in1);
-+ const i16x8 t1 = (i16x8)vec_mergeh(in2, in3);
-+ const i16x8 t2 = (i16x8)vec_mergel(in0, in1);
-+ const i16x8 t3 = (i16x8)vec_mergel(in2, in3);
-+ const i32x4 u0 = vec_mergeh((i32x4)t0, (i32x4)t1);
-+ const i32x4 u1 = vec_mergeh((i32x4)t2, (i32x4)t3);
-+ const i32x4 u2 = vec_mergel((i32x4)t0, (i32x4)t1);
-+ const i32x4 u3 = vec_mergel((i32x4)t2, (i32x4)t3);
-+ *out0 = (i16x8)vec_mergeh((i64x2)u0, (i64x2)u1);
-+ *out1 = (i16x8)vec_mergel((i64x2)u0, (i64x2)u1);
-+ *out2 = (i16x8)vec_mergeh((i64x2)u2, (i64x2)u3);
-+ *out3 = (i16x8)vec_mergel((i64x2)u2, (i64x2)u3);
-+}
-+
-+// Bounded 4-coefficient load into the low half of a 16-bit vector.
-+static WEBP_INLINE i16x8 Load4Coeffs(const int16_t* WEBP_RESTRICT p) {
-+ int16_t tmp[8] = {0};
-+ memcpy(tmp, p, 4 * sizeof(int16_t));
-+ return *(const i16x8*)tmp;
-+}
-+
-+// Bounded load of n pixels, zero-extended to 16-bit lanes.
-+static WEBP_INLINE i16x8 LoadDst(const uint8_t* WEBP_RESTRICT p, int n) {
-+ unsigned char tmp[16] = {0};
-+ memcpy(tmp, p, n);
-+ return (i16x8)vec_mergeh(vec_xl(0, tmp), vec_splats((unsigned char)0));
-+}
-+
-+static void Transform_VSX(const int16_t* WEBP_RESTRICT in,
-+ uint8_t* WEBP_RESTRICT dst, int do_two) {
-+ const i16x8 k1 = vec_splats((short)20091);
-+ const i16x8 k2 = vec_splats((short)-30068);
-+ const u16x8 three = vec_splats((unsigned short)3);
-+ i16x8 in0 = Load4Coeffs(in + 0), in1 = Load4Coeffs(in + 4);
-+ i16x8 in2 = Load4Coeffs(in + 8), in3 = Load4Coeffs(in + 12);
-+ i16x8 T0, T1, T2, T3;
-+
-+ if (do_two) {
-+ in0 = (i16x8)vec_mergeh((i64x2)in0, (i64x2)Load4Coeffs(in + 16));
-+ in1 = (i16x8)vec_mergeh((i64x2)in1, (i64x2)Load4Coeffs(in + 20));
-+ in2 = (i16x8)vec_mergeh((i64x2)in2, (i64x2)Load4Coeffs(in + 24));
-+ in3 = (i16x8)vec_mergeh((i64x2)in3, (i64x2)Load4Coeffs(in + 28));
-+ }
-+
-+ { // Vertical pass + transpose.
-+ const i16x8 a = vec_add(in0, in2);
-+ const i16x8 b = vec_sub(in0, in2);
-+ const i16x8 c = vec_add(vec_sub(in1, in3),
-+ vec_sub(MulHi16_S(in1, k2), MulHi16_S(in3, k1)));
-+ const i16x8 d = vec_add(vec_add(in1, in3),
-+ vec_add(MulHi16_S(in1, k1), MulHi16_S(in3, k2)));
-+ Transpose2_4x4(vec_add(a, d), vec_add(b, c), vec_sub(b, c), vec_sub(a, d),
-+ &T0, &T1, &T2, &T3);
-+ }
-+ { // Horizontal pass + transpose.
-+ const i16x8 dc = vec_add(T0, vec_splats((short)4));
-+ const i16x8 a = vec_add(dc, T2);
-+ const i16x8 b = vec_sub(dc, T2);
-+ const i16x8 c = vec_add(vec_sub(T1, T3),
-+ vec_sub(MulHi16_S(T1, k2), MulHi16_S(T3, k1)));
-+ const i16x8 d = vec_add(vec_add(T1, T3),
-+ vec_add(MulHi16_S(T1, k1), MulHi16_S(T3, k2)));
-+ const i16x8 s0 = vec_sra(vec_add(a, d), three);
-+ const i16x8 s1 = vec_sra(vec_add(b, c), three);
-+ const i16x8 s2 = vec_sra(vec_sub(b, c), three);
-+ const i16x8 s3 = vec_sra(vec_sub(a, d), three);
-+ Transpose2_4x4(s0, s1, s2, s3, &T0, &T1, &T2, &T3);
-+ }
-+ { // Add to the reference pixels and store with saturation.
-+ const int n = do_two ? 8 : 4;
-+ const i16x8 d0 = LoadDst(dst + 0 * BPS, n);
-+ const i16x8 d1 = LoadDst(dst + 1 * BPS, n);
-+ const i16x8 d2 = LoadDst(dst + 2 * BPS, n);
-+ const i16x8 d3 = LoadDst(dst + 3 * BPS, n);
-+ const u8x16 r0 = vec_packsu(vec_add(d0, T0), vec_add(d0, T0));
-+ const u8x16 r1 = vec_packsu(vec_add(d1, T1), vec_add(d1, T1));
-+ const u8x16 r2 = vec_packsu(vec_add(d2, T2), vec_add(d2, T2));
-+ const u8x16 r3 = vec_packsu(vec_add(d3, T3), vec_add(d3, T3));
-+ unsigned char b0[16], b1[16], b2[16], b3[16];
-+ memcpy(b0, &r0, 16); memcpy(b1, &r1, 16);
-+ memcpy(b2, &r2, 16); memcpy(b3, &r3, 16);
-+ memcpy(dst + 0 * BPS, b0, n); memcpy(dst + 1 * BPS, b1, n);
-+ memcpy(dst + 2 * BPS, b2, n); memcpy(dst + 3 * BPS, b3, n);
-+ }
-+}
-+
-+//------------------------------------------------------------------------------
-+// Simple in-loop edge filtering.
-+
-+#define ABSU(p, q) vec_or(vec_subs((u8x16)(q), (u8x16)(p)), \
-+ vec_subs((u8x16)(p), (u8x16)(q)))
-+
-+// Per-byte signed arithmetic >>3, packed with saturation.
-+static WEBP_INLINE i8x16 SignedShift3(i8x16 x) {
-+ const u8x16 z = vec_splats((unsigned char)0);
-+ const u16x8 sh = vec_splats((unsigned short)(3 + 8));
-+ const i16x8 lo = vec_sra((i16x8)vec_mergeh(z, (u8x16)x), sh);
-+ const i16x8 hi = vec_sra((i16x8)vec_mergel(z, (u8x16)x), sh);
-+ return (i8x16)vec_packs(lo, hi);
-+}
-+
-+static WEBP_INLINE void DoFilter2_VSX(u8x16* WEBP_RESTRICT p1,
-+ u8x16* WEBP_RESTRICT p0,
-+ u8x16* WEBP_RESTRICT q0,
-+ u8x16* WEBP_RESTRICT q1, int thresh) {
-+ const u8x16 sign = vec_splats((unsigned char)0x80);
-+ const u8x16 t1 = ABSU(*p1, *q1);
-+ const u8x16 t2 = vec_and(t1, vec_splats((unsigned char)0xFE));
-+ const u8x16 t3 = (u8x16)vec_sr((u16x8)t2, vec_splats((unsigned short)1));
-+ const u8x16 t4 = ABSU(*p0, *q0);
-+ const u8x16 t6 = vec_adds(vec_adds(t4, t4), t3);
-+ const u8x16 t7 = vec_subs(t6, vec_splats((unsigned char)thresh));
-+ const u8x16 mask = (u8x16)vec_cmpeq(t7, vec_splats((unsigned char)0));
-+
-+ const i8x16 p1s = (i8x16)vec_xor(*p1, sign);
-+ const i8x16 q1s = (i8x16)vec_xor(*q1, sign);
-+ i8x16 P0 = (i8x16)vec_xor(*p0, sign);
-+ i8x16 Q0 = (i8x16)vec_xor(*q0, sign);
-+
-+ const i8x16 d0 = vec_subs(Q0, P0);
-+ const i8x16 s1 = vec_adds(vec_subs(p1s, q1s), d0);
-+ i8x16 a = vec_adds(d0, vec_adds(d0, s1));
-+ a = vec_and(a, (i8x16)mask);
-+ const i8x16 v3 = SignedShift3(vec_adds(a, vec_splats((signed char)3)));
-+ const i8x16 v4 = SignedShift3(vec_adds(a, vec_splats((signed char)4)));
-+ Q0 = vec_subs(Q0, v4);
-+ P0 = vec_adds(P0, v3);
-+ *p0 = vec_xor((u8x16)P0, sign);
-+ *q0 = vec_xor((u8x16)Q0, sign);
-+}
-+
-+static void SimpleVFilter16_VSX(uint8_t* p, int stride, int thresh) {
-+ u8x16 p1 = vec_xl(0, p - 2 * stride);
-+ u8x16 p0 = vec_xl(0, p - stride);
-+ u8x16 q0 = vec_xl(0, p);
-+ u8x16 q1 = vec_xl(0, p + stride);
-+ DoFilter2_VSX(&p1, &p0, &q0, &q1, thresh);
-+ vec_xst(p0, 0, p - stride);
-+ vec_xst(q0, 0, p);
-+}
-+
-+static void SimpleVFilter16i_VSX(uint8_t* p, int stride, int thresh) {
-+ int k;
-+ for (k = 3; k > 0; --k) {
-+ p += 4 * stride;
-+ SimpleVFilter16_VSX(p, stride, thresh);
-+ }
-+}
-+
-+// Transpose four columns out of / into 16 rows for horizontal-edge filtering.
-+static WEBP_INLINE void Load8x4(const uint8_t* WEBP_RESTRICT b, int s,
-+ u8x16* WEBP_RESTRICT p, u8x16* WEBP_RESTRICT q) {
-+ uint32_t a0[4], a1[4];
-+ memcpy(&a0[0], b + 0 * s, 4); memcpy(&a0[1], b + 4 * s, 4);
-+ memcpy(&a0[2], b + 2 * s, 4); memcpy(&a0[3], b + 6 * s, 4);
-+ memcpy(&a1[0], b + 1 * s, 4); memcpy(&a1[1], b + 5 * s, 4);
-+ memcpy(&a1[2], b + 3 * s, 4); memcpy(&a1[3], b + 7 * s, 4);
-+ const u8x16 A0 = vec_xl(0, (unsigned char*)a0);
-+ const u8x16 A1 = vec_xl(0, (unsigned char*)a1);
-+ const u8x16 B0 = vec_mergeh(A0, A1), B1 = vec_mergel(A0, A1);
-+ const u16x8 C0 = vec_mergeh((u16x8)B0, (u16x8)B1);
-+ const u16x8 C1 = vec_mergel((u16x8)B0, (u16x8)B1);
-+ *p = (u8x16)vec_mergeh((u32x4)C0, (u32x4)C1);
-+ *q = (u8x16)vec_mergel((u32x4)C0, (u32x4)C1);
-+}
-+
-+static WEBP_INLINE void Load16x4(const uint8_t* WEBP_RESTRICT r0,
-+ const uint8_t* WEBP_RESTRICT r8, int s,
-+ u8x16* p1, u8x16* p0, u8x16* q0, u8x16* q1) {
-+ Load8x4(r0, s, p1, q0);
-+ Load8x4(r8, s, p0, q1);
-+ const u8x16 t1 = *p1, t2 = *q0;
-+ *p1 = (u8x16)vec_mergeh((i64x2)t1, (i64x2)*p0);
-+ *p0 = (u8x16)vec_mergel((i64x2)t1, (i64x2)*p0);
-+ *q0 = (u8x16)vec_mergeh((i64x2)t2, (i64x2)*q1);
-+ *q1 = (u8x16)vec_mergel((i64x2)t2, (i64x2)*q1);
-+}
-+
-+static WEBP_INLINE void Store4x4(u8x16 x, uint8_t* WEBP_RESTRICT dst, int s) {
-+ unsigned char b[16];
-+ int i;
-+ memcpy(b, &x, 16);
-+ for (i = 0; i < 4; ++i) memcpy(dst + i * s, b + 4 * i, 4);
-+}
-+
-+static WEBP_INLINE void Store16x4(u8x16 p1, u8x16 p0, u8x16 q0, u8x16 q1,
-+ uint8_t* WEBP_RESTRICT r0,
-+ uint8_t* WEBP_RESTRICT r8, int s) {
-+ u8x16 t = p0;
-+ u8x16 p0s = vec_mergeh(p1, t), p1s = vec_mergel(p1, t);
-+ t = q0;
-+ u8x16 q0s = vec_mergeh(t, q1), q1s = vec_mergel(t, q1);
-+ t = p0s;
-+ p0s = (u8x16)vec_mergeh((u16x8)t, (u16x8)q0s);
-+ q0s = (u8x16)vec_mergel((u16x8)t, (u16x8)q0s);
-+ t = p1s;
-+ p1s = (u8x16)vec_mergeh((u16x8)t, (u16x8)q1s);
-+ q1s = (u8x16)vec_mergel((u16x8)t, (u16x8)q1s);
-+ Store4x4(p0s, r0, s); Store4x4(q0s, r0 + 4 * s, s);
-+ Store4x4(p1s, r8, s); Store4x4(q1s, r8 + 4 * s, s);
-+}
-+
-+static void SimpleHFilter16_VSX(uint8_t* p, int stride, int thresh) {
-+ u8x16 p1, p0, q0, q1;
-+ p -= 2; // beginning of p1
-+ Load16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
-+ DoFilter2_VSX(&p1, &p0, &q0, &q1, thresh);
-+ Store16x4(p1, p0, q0, q1, p, p + 8 * stride, stride);
-+}
-+
-+static void SimpleHFilter16i_VSX(uint8_t* p, int stride, int thresh) {
-+ int k;
-+ for (k = 3; k > 0; --k) {
-+ p += 4;
-+ SimpleHFilter16_VSX(p, stride, thresh);
-+ }
-+}
-+
-+//------------------------------------------------------------------------------
-+// Complex in-loop edge filtering (vertical/luma).
-+
-+static const u8x16 kSignBit = {
-+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
-+#define FLIPB(x) ((x) = (i8x16)vec_xor((u8x16)(x), kSignBit))
-+
-+static WEBP_INLINE u8x16 GetNotHEV(u8x16 p1, u8x16 p0, u8x16 q0, u8x16 q1,
-+ int hev_thresh) {
-+ const u8x16 d = vec_subs(vec_max(ABSU(p1, p0), ABSU(q1, q0)),
-+ vec_splats((unsigned char)hev_thresh));
-+ return (u8x16)vec_cmpeq(d, vec_splats((unsigned char)0));
-+}
-+
-+static WEBP_INLINE i8x16 GetBaseDelta(i8x16 p1, i8x16 p0, i8x16 q0, i8x16 q1) {
-+ const i8x16 d = vec_subs(q0, p0);
-+ const i8x16 s1 = vec_adds(vec_subs(p1, q1), d);
-+ return vec_adds(d, vec_adds(d, s1));
-+}
-+
-+static WEBP_INLINE void DoSimpleFilterS(i8x16* p0, i8x16* q0, i8x16 f) {
-+ *q0 = vec_subs(*q0, SignedShift3(vec_adds(f, vec_splats((signed char)4))));
-+ *p0 = vec_adds(*p0, SignedShift3(vec_adds(f, vec_splats((signed char)3))));
-+}
-+
-+static WEBP_INLINE void Update2Pixels(i8x16* pi, i8x16* qi, i16x8 lo, i16x8 hi) {
-+ const u16x8 s7 = vec_splats((unsigned short)7);
-+ const i8x16 d = (i8x16)vec_packs(vec_sra(lo, s7), vec_sra(hi, s7));
-+ *pi = vec_adds(*pi, d);
-+ *qi = vec_subs(*qi, d);
-+ FLIPB(*pi);
-+ FLIPB(*qi);
-+}
-+
-+// mask = (max inner abs-diff <= ithresh) && NeedsFilter(thresh).
-+static WEBP_INLINE u8x16 ComplexMask(u8x16 p3, u8x16 p2, u8x16 p1, u8x16 p0,
-+ u8x16 q0, u8x16 q1, u8x16 q2, u8x16 q3,
-+ int thresh, int ithresh) {
-+ u8x16 m = ABSU(p1, p0);
-+ m = vec_max(m, ABSU(p3, p2));
-+ m = vec_max(m, ABSU(p2, p1));
-+ m = vec_max(m, ABSU(q1, q0));
-+ m = vec_max(m, ABSU(q3, q2));
-+ m = vec_max(m, ABSU(q2, q1));
-+ const u8x16 tm = (u8x16)vec_cmpeq(
-+ vec_subs(m, vec_splats((unsigned char)ithresh)),
-+ vec_splats((unsigned char)0));
-+ const u8x16 t2 = vec_and(ABSU(p1, q1), vec_splats((unsigned char)0xFE));
-+ const u8x16 t3 = (u8x16)vec_sr((u16x8)t2, vec_splats((unsigned short)1));
-+ const u8x16 t6 = vec_adds(vec_adds(ABSU(p0, q0), ABSU(p0, q0)), t3);
-+ const u8x16 fm = (u8x16)vec_cmpeq(
-+ vec_subs(t6, vec_splats((unsigned char)thresh)),
-+ vec_splats((unsigned char)0));
-+ return vec_and(tm, fm);
-+}
-+
-+static WEBP_INLINE void DoFilter4(u8x16* p1u, u8x16* p0u, u8x16* q0u,
-+ u8x16* q1u, u8x16 mask, int hev_thresh) {
-+ const u8x16 zero = vec_splats((unsigned char)0);
-+ const u8x16 not_hev = GetNotHEV(*p1u, *p0u, *q0u, *q1u, hev_thresh);
-+ i8x16 p1 = (i8x16)vec_xor(*p1u, kSignBit), p0 = (i8x16)vec_xor(*p0u, kSignBit);
-+ i8x16 q0 = (i8x16)vec_xor(*q0u, kSignBit), q1 = (i8x16)vec_xor(*q1u, kSignBit);
-+ i8x16 t1 = vec_andc(vec_subs(p1, q1), (i8x16)not_hev);
-+ const i8x16 t2 = vec_subs(q0, p0);
-+ t1 = vec_adds(t1, t2); t1 = vec_adds(t1, t2); t1 = vec_adds(t1, t2);
-+ t1 = vec_and(t1, (i8x16)mask);
-+ const i8x16 a3 = SignedShift3(vec_adds(t1, vec_splats((signed char)4)));
-+ p0 = vec_adds(p0, SignedShift3(vec_adds(t1, vec_splats((signed char)3))));
-+ q0 = vec_subs(q0, a3);
-+ FLIPB(p0); FLIPB(q0);
-+ const i8x16 t = vec_add(a3, (i8x16)kSignBit);
-+ i8x16 t3 = vec_sub((i8x16)vec_avg((u8x16)t, zero), vec_splats((signed char)64));
-+ t3 = vec_and((i8x16)not_hev, t3);
-+ q1 = vec_subs(q1, t3); p1 = vec_adds(p1, t3);
-+ FLIPB(p1); FLIPB(q1);
-+ *p1u = (u8x16)p1; *p0u = (u8x16)p0; *q0u = (u8x16)q0; *q1u = (u8x16)q1;
-+}
-+
-+static WEBP_INLINE void DoFilter6(u8x16* p2u, u8x16* p1u, u8x16* p0u,
-+ u8x16* q0u, u8x16* q1u, u8x16* q2u,
-+ u8x16 mask, int hev_thresh) {
-+ const u8x16 zero = vec_splats((unsigned char)0);
-+ const u8x16 not_hev = GetNotHEV(*p1u, *p0u, *q0u, *q1u, hev_thresh);
-+ i8x16 p2 = (i8x16)vec_xor(*p2u, kSignBit), p1 = (i8x16)vec_xor(*p1u, kSignBit);
-+ i8x16 p0 = (i8x16)vec_xor(*p0u, kSignBit), q0 = (i8x16)vec_xor(*q0u, kSignBit);
-+ i8x16 q1 = (i8x16)vec_xor(*q1u, kSignBit), q2 = (i8x16)vec_xor(*q2u, kSignBit);
-+ const i8x16 a = GetBaseDelta(p1, p0, q0, q1);
-+ { // hev pixels: simple filter
-+ const i8x16 f = vec_and(a, (i8x16)vec_andc(mask, not_hev));
-+ DoSimpleFilterS(&p0, &q0, f);
-+ }
-+ { // non-hev pixels: strong filter
-+ const i8x16 f = vec_and(a, vec_and((i8x16)not_hev, (i8x16)mask));
-+ const i16x8 k9 = vec_splats((short)0x0900), k63 = vec_splats((short)63);
-+ const i16x8 f9lo = MulHi16_S((i16x8)vec_mergeh(zero, (u8x16)f), k9);
-+ const i16x8 f9hi = MulHi16_S((i16x8)vec_mergel(zero, (u8x16)f), k9);
-+ const i16x8 a2lo = vec_add(f9lo, k63), a2hi = vec_add(f9hi, k63);
-+ const i16x8 a1lo = vec_add(a2lo, f9lo), a1hi = vec_add(a2hi, f9hi);
-+ const i16x8 a0lo = vec_add(a1lo, f9lo), a0hi = vec_add(a1hi, f9hi);
-+ Update2Pixels(&p2, &q2, a2lo, a2hi);
-+ Update2Pixels(&p1, &q1, a1lo, a1hi);
-+ Update2Pixels(&p0, &q0, a0lo, a0hi);
-+ }
-+ *p2u = (u8x16)p2; *p1u = (u8x16)p1; *p0u = (u8x16)p0;
-+ *q0u = (u8x16)q0; *q1u = (u8x16)q1; *q2u = (u8x16)q2;
-+}
-+
-+static void VFilter16_VSX(uint8_t* p, int s, int thresh, int ithresh,
-+ int hev_thresh) {
-+ u8x16 p3 = vec_xl(0, p - 4 * s), p2 = vec_xl(0, p - 3 * s);
-+ u8x16 p1 = vec_xl(0, p - 2 * s), p0 = vec_xl(0, p - s);
-+ u8x16 q0 = vec_xl(0, p), q1 = vec_xl(0, p + s);
-+ u8x16 q2 = vec_xl(0, p + 2 * s), q3 = vec_xl(0, p + 3 * s);
-+ const u8x16 m = ComplexMask(p3, p2, p1, p0, q0, q1, q2, q3, thresh, ithresh);
-+ DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, m, hev_thresh);
-+ vec_xst(p2, 0, p - 3 * s); vec_xst(p1, 0, p - 2 * s); vec_xst(p0, 0, p - s);
-+ vec_xst(q0, 0, p); vec_xst(q1, 0, p + s); vec_xst(q2, 0, p + 2 * s);
-+}
-+
-+static void VFilter16i_VSX(uint8_t* p, int s, int thresh, int ithresh,
-+ int hev_thresh) {
-+ int k;
-+ for (k = 3; k > 0; --k) {
-+ p += 4 * s;
-+ u8x16 p3 = vec_xl(0, p - 4 * s), p2 = vec_xl(0, p - 3 * s);
-+ u8x16 p1 = vec_xl(0, p - 2 * s), p0 = vec_xl(0, p - s);
-+ u8x16 q0 = vec_xl(0, p), q1 = vec_xl(0, p + s);
-+ u8x16 q2 = vec_xl(0, p + 2 * s), q3 = vec_xl(0, p + 3 * s);
-+ const u8x16 m = ComplexMask(p3, p2, p1, p0, q0, q1, q2, q3, thresh, ithresh);
-+ DoFilter4(&p1, &p0, &q0, &q1, m, hev_thresh);
-+ vec_xst(p1, 0, p - 2 * s); vec_xst(p0, 0, p - s);
-+ vec_xst(q0, 0, p); vec_xst(q1, 0, p + s);
-+ }
-+}
-+
-+// Complex horizontal luma: two 16x4 transposes around the vertical edge feed
-+// the same DoFilter4/DoFilter6 used by the vertical variants.
-+static void HFilter16_VSX(uint8_t* p, int s, int thresh, int ithresh,
-+ int hev_thresh) {
-+ uint8_t* const b = p - 4;
-+ u8x16 p3, p2, p1, p0, q0, q1, q2, q3;
-+ Load16x4(b, b + 8 * s, s, &p3, &p2, &p1, &p0);
-+ Load16x4(p, p + 8 * s, s, &q0, &q1, &q2, &q3);
-+ const u8x16 m = ComplexMask(p3, p2, p1, p0, q0, q1, q2, q3, thresh, ithresh);
-+ DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, m, hev_thresh);
-+ Store16x4(p3, p2, p1, p0, b, b + 8 * s, s);
-+ Store16x4(q0, q1, q2, q3, p, p + 8 * s, s);
-+}
-+
-+static void HFilter16i_VSX(uint8_t* p, int s, int thresh, int ithresh,
-+ int hev_thresh) {
-+ int k;
-+ for (k = 3; k > 0; --k) {
-+ p += 4;
-+ u8x16 p3, p2, p1, p0, q0, q1, q2, q3;
-+ Load16x4(p - 4, p - 4 + 8 * s, s, &p3, &p2, &p1, &p0);
-+ Load16x4(p, p + 8 * s, s, &q0, &q1, &q2, &q3);
-+ const u8x16 m =
-+ ComplexMask(p3, p2, p1, p0, q0, q1, q2, q3, thresh, ithresh);
-+ DoFilter4(&p1, &p0, &q0, &q1, m, hev_thresh);
-+ Store16x4(p1, p0, q0, q1, p - 2, p - 2 + 8 * s, s);
-+ }
-+}
-+
-+//------------------------------------------------------------------------------
-+// Complex chroma filtering: operate on the u and v planes (8 wide) together.
-+
-+// Pack 8 u-bytes into the low half and 8 v-bytes into the high half.
-+static WEBP_INLINE u8x16 LoadUV(const uint8_t* WEBP_RESTRICT u,
-+ const uint8_t* WEBP_RESTRICT v) {
-+ unsigned char b[16];
-+ memcpy(b, u, 8);
-+ memcpy(b + 8, v, 8);
-+ return vec_xl(0, b);
-+}
-+
-+static WEBP_INLINE void StoreUV(u8x16 x, uint8_t* WEBP_RESTRICT u,
-+ uint8_t* WEBP_RESTRICT v) {
-+ unsigned char b[16];
-+ memcpy(b, &x, 16);
-+ memcpy(u, b, 8);
-+ memcpy(v, b + 8, 8);
-+}
-+
-+static void VFilter8_VSX(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
-+ int s, int thresh, int ithresh, int hev_thresh) {
-+ u8x16 p3 = LoadUV(u - 4 * s, v - 4 * s), p2 = LoadUV(u - 3 * s, v - 3 * s);
-+ u8x16 p1 = LoadUV(u - 2 * s, v - 2 * s), p0 = LoadUV(u - s, v - s);
-+ u8x16 q0 = LoadUV(u, v), q1 = LoadUV(u + s, v + s);
-+ u8x16 q2 = LoadUV(u + 2 * s, v + 2 * s), q3 = LoadUV(u + 3 * s, v + 3 * s);
-+ const u8x16 m = ComplexMask(p3, p2, p1, p0, q0, q1, q2, q3, thresh, ithresh);
-+ DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, m, hev_thresh);
-+ StoreUV(p2, u - 3 * s, v - 3 * s); StoreUV(p1, u - 2 * s, v - 2 * s);
-+ StoreUV(p0, u - s, v - s); StoreUV(q0, u, v);
-+ StoreUV(q1, u + s, v + s); StoreUV(q2, u + 2 * s, v + 2 * s);
-+}
-+
-+static void VFilter8i_VSX(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
-+ int s, int thresh, int ithresh, int hev_thresh) {
-+ u += 4 * s; v += 4 * s;
-+ u8x16 p3 = LoadUV(u - 4 * s, v - 4 * s), p2 = LoadUV(u - 3 * s, v - 3 * s);
-+ u8x16 p1 = LoadUV(u - 2 * s, v - 2 * s), p0 = LoadUV(u - s, v - s);
-+ u8x16 q0 = LoadUV(u, v), q1 = LoadUV(u + s, v + s);
-+ u8x16 q2 = LoadUV(u + 2 * s, v + 2 * s), q3 = LoadUV(u + 3 * s, v + 3 * s);
-+ const u8x16 m = ComplexMask(p3, p2, p1, p0, q0, q1, q2, q3, thresh, ithresh);
-+ DoFilter4(&p1, &p0, &q0, &q1, m, hev_thresh);
-+ StoreUV(p1, u - 2 * s, v - 2 * s); StoreUV(p0, u - s, v - s);
-+ StoreUV(q0, u, v); StoreUV(q1, u + s, v + s);
-+}
-+
-+static void HFilter8_VSX(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
-+ int s, int thresh, int ithresh, int hev_thresh) {
-+ u8x16 p3, p2, p1, p0, q0, q1, q2, q3;
-+ Load16x4(u - 4, v - 4, s, &p3, &p2, &p1, &p0);
-+ Load16x4(u, v, s, &q0, &q1, &q2, &q3);
-+ const u8x16 m = ComplexMask(p3, p2, p1, p0, q0, q1, q2, q3, thresh, ithresh);
-+ DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, m, hev_thresh);
-+ Store16x4(p3, p2, p1, p0, u - 4, v - 4, s);
-+ Store16x4(q0, q1, q2, q3, u, v, s);
-+}
-+
-+static void HFilter8i_VSX(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
-+ int s, int thresh, int ithresh, int hev_thresh) {
-+ u += 4; v += 4;
-+ u8x16 p3, p2, p1, p0, q0, q1, q2, q3;
-+ Load16x4(u - 4, v - 4, s, &p3, &p2, &p1, &p0);
-+ Load16x4(u, v, s, &q0, &q1, &q2, &q3);
-+ const u8x16 m = ComplexMask(p3, p2, p1, p0, q0, q1, q2, q3, thresh, ithresh);
-+ DoFilter4(&p1, &p0, &q0, &q1, m, hev_thresh);
-+ Store16x4(p1, p0, q0, q1, u - 2, v - 2, s);
-+}
-+
-+//------------------------------------------------------------------------------
-+// Intra prediction (16x16 luma, 8x8 chroma). DC top-sums are scalar (the SIMD
-+// win is the block fill); TrueMotion/VE/HE are vectorized.
-+
-+static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
-+ const u8x16 x = vec_splats(v);
-+ int j;
-+ for (j = 0; j < 16; ++j) vec_xst(x, 0, dst + j * BPS);
-+}
-+static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
-+ const u8x16 x = vec_splats(v);
-+ unsigned char b[16];
-+ int j;
-+ memcpy(b, &x, 16);
-+ for (j = 0; j < 8; ++j) memcpy(dst + j * BPS, b, 8);
-+}
-+
-+static void VE16_VSX(uint8_t* dst) {
-+ const u8x16 top = vec_xl(0, dst - BPS);
-+ int j;
-+ for (j = 0; j < 16; ++j) vec_xst(top, 0, dst + j * BPS);
-+}
-+static void HE16_VSX(uint8_t* dst) {
-+ int j;
-+ for (j = 0; j < 16; ++j) vec_xst(vec_splats(dst[-1 + j * BPS]), 0, dst + j * BPS);
-+}
-+static void DC16_VSX(uint8_t* dst) {
-+ int s = 16, j;
-+ for (j = 0; j < 16; ++j) s += dst[-BPS + j] + dst[-1 + j * BPS];
-+ Put16(s >> 5, dst);
-+}
-+static void DC16NoTop_VSX(uint8_t* dst) {
-+ int s = 8, j;
-+ for (j = 0; j < 16; ++j) s += dst[-1 + j * BPS];
-+ Put16(s >> 4, dst);
-+}
-+static void DC16NoLeft_VSX(uint8_t* dst) {
-+ int s = 8, j;
-+ for (j = 0; j < 16; ++j) s += dst[-BPS + j];
-+ Put16(s >> 4, dst);
-+}
-+static void DC16NoTopLeft_VSX(uint8_t* dst) { Put16(0x80, dst); }
-+static void TM16_VSX(uint8_t* dst) {
-+ const u8x16 zero = vec_splats((unsigned char)0);
-+ const u8x16 t = vec_xl(0, dst - BPS);
-+ const i16x8 tl = (i16x8)vec_mergeh(t, zero), th = (i16x8)vec_mergel(t, zero);
-+ const int c = dst[-BPS - 1];
-+ int y;
-+ for (y = 0; y < 16; ++y) {
-+ const i16x8 b = vec_splats((short)(dst[-1 + y * BPS] - c));
-+ vec_xst((u8x16)vec_packsu(vec_add(b, tl), vec_add(b, th)), 0, dst + y * BPS);
-+ }
-+}
-+
-+static void VE8uv_VSX(uint8_t* dst) {
-+ unsigned char t[8];
-+ int j;
-+ memcpy(t, dst - BPS, 8);
-+ for (j = 0; j < 8; ++j) memcpy(dst + j * BPS, t, 8);
-+}
-+static void DC8uv_VSX(uint8_t* dst) {
-+ int s = 8, j;
-+ for (j = 0; j < 8; ++j) s += dst[-BPS + j] + dst[-1 + j * BPS];
-+ Put8x8uv(s >> 4, dst);
-+}
-+static void DC8uvNoTop_VSX(uint8_t* dst) {
-+ int s = 4, j;
-+ for (j = 0; j < 8; ++j) s += dst[-1 + j * BPS];
-+ Put8x8uv(s >> 3, dst);
-+}
-+static void DC8uvNoLeft_VSX(uint8_t* dst) {
-+ int s = 4, j;
-+ for (j = 0; j < 8; ++j) s += dst[-BPS + j];
-+ Put8x8uv(s >> 3, dst);
-+}
-+static void DC8uvNoTopLeft_VSX(uint8_t* dst) { Put8x8uv(0x80, dst); }
-+static void TM8uv_VSX(uint8_t* dst) {
-+ const u8x16 zero = vec_splats((unsigned char)0);
-+ const u8x16 t = vec_xl(0, dst - BPS);
-+ const i16x8 tl = (i16x8)vec_mergeh(t, zero);
-+ const int c = dst[-BPS - 1];
-+ int y;
-+ for (y = 0; y < 8; ++y) {
-+ const i16x8 b = vec_splats((short)(dst[-1 + y * BPS] - c));
-+ const u8x16 o = (u8x16)vec_packsu(vec_add(b, tl), vec_splats((short)0));
-+ unsigned char bb[16];
-+ memcpy(bb, &o, 16);
-+ memcpy(dst + y * BPS, bb, 8);
-+ }
-+}
-+
-+//------------------------------------------------------------------------------
-+// 4x4 luma intra prediction. Whole-vector byte shifts window the edge samples:
-+// srli_si128(x,n) == vec_sld(zero, x, 16 - n)
-+// slli_si128(x,n) == vec_sld(x, zero, n)
-+
-+#define SRLI(x, n) vec_sld(zero, (x), 16 - (n))
-+#define SLLI(x, n) vec_sld((x), zero, (n))
-+#define INS16(v, val, i) ((u8x16)vec_insert((short)(val), (i16x8)(v), (i)))
-+#define AVG3C(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
-+
-+static WEBP_INLINE u8x16 Load64(const uint8_t* WEBP_RESTRICT p) {
-+ unsigned char b[16] = {0};
-+ memcpy(b, p, 8);
-+ return vec_xl(0, b);
-+}
-+static WEBP_INLINE uint32_t GetWord(u8x16 v) {
-+ unsigned char b[16];
-+ uint32_t r;
-+ memcpy(b, &v, 16);
-+ memcpy(&r, b, 4);
-+ return r;
-+}
-+static WEBP_INLINE u8x16 SetWord(uint32_t v) {
-+ unsigned char b[16] = {0};
-+ memcpy(b, &v, 4);
-+ return vec_xl(0, b);
-+}
-+static WEBP_INLINE void StoreWord(uint32_t v, uint8_t* dst) {
-+ memcpy(dst, &v, 4);
-+}
-+
-+static void VE4_VSX(uint8_t* dst) {
-+ const u8x16 zero = vec_splats((unsigned char)0), one = vec_splats((unsigned char)1);
-+ const u8x16 A = Load64(dst - BPS - 1), B = SRLI(A, 1), C = SRLI(A, 2);
-+ const u8x16 a = vec_avg(A, C), lsb = vec_and(vec_xor(A, C), one);
-+ const u8x16 avg = vec_avg(vec_subs(a, lsb), B);
-+ const uint32_t v = GetWord(avg);
-+ int i;
-+ for (i = 0; i < 4; ++i) StoreWord(v, dst + i * BPS);
-+}
-+static void LD4_VSX(uint8_t* dst) {
-+ const u8x16 zero = vec_splats((unsigned char)0), one = vec_splats((unsigned char)1);
-+ const u8x16 A = Load64(dst - BPS), B = SRLI(A, 1), C = SRLI(A, 2);
-+ const u8x16 CH = INS16(C, dst[-BPS + 7], 3);
-+ const u8x16 a1 = vec_avg(A, CH), lsb = vec_and(vec_xor(A, CH), one);
-+ const u8x16 r = vec_avg(vec_subs(a1, lsb), B);
-+ StoreWord(GetWord(r), dst + 0 * BPS);
-+ StoreWord(GetWord(SRLI(r, 1)), dst + 1 * BPS);
-+ StoreWord(GetWord(SRLI(r, 2)), dst + 2 * BPS);
-+ StoreWord(GetWord(SRLI(r, 3)), dst + 3 * BPS);
-+}
-+static void VR4_VSX(uint8_t* dst) {
-+ const u8x16 zero = vec_splats((unsigned char)0), one = vec_splats((unsigned char)1);
-+ const int I = dst[-1 + 0 * BPS], J = dst[-1 + 1 * BPS], K = dst[-1 + 2 * BPS];
-+ const int X = dst[-1 - BPS];
-+ const u8x16 XA = Load64(dst - BPS - 1), A0 = SRLI(XA, 1);
-+ const u8x16 abcd = vec_avg(XA, A0);
-+ const u8x16 IX = INS16(SLLI(XA, 1), (I | (X << 8)), 0);
-+ const u8x16 a1 = vec_avg(IX, A0), lsb = vec_and(vec_xor(IX, A0), one);
-+ const u8x16 efgh = vec_avg(vec_subs(a1, lsb), XA);
-+ StoreWord(GetWord(abcd), dst + 0 * BPS);
-+ StoreWord(GetWord(efgh), dst + 1 * BPS);
-+ StoreWord(GetWord(SLLI(abcd, 1)), dst + 2 * BPS);
-+ StoreWord(GetWord(SLLI(efgh, 1)), dst + 3 * BPS);
-+ dst[0 + 2 * BPS] = AVG3C(J, I, X);
-+ dst[0 + 3 * BPS] = AVG3C(K, J, I);
-+}
-+static void VL4_VSX(uint8_t* dst) {
-+ const u8x16 zero = vec_splats((unsigned char)0), one = vec_splats((unsigned char)1);
-+ const u8x16 A = Load64(dst - BPS), B = SRLI(A, 1), C = SRLI(A, 2);
-+ const u8x16 a1 = vec_avg(A, B), a2 = vec_avg(C, B), a3 = vec_avg(a1, a2);
-+ const u8x16 lsb1 = vec_and(vec_xor(a1, a2), one);
-+ const u8x16 abbc = vec_or(vec_xor(A, B), vec_xor(C, B));
-+ const u8x16 a4 = vec_subs(a3, vec_and(abbc, lsb1));
-+ const uint32_t extra = GetWord(SRLI(a4, 4));
-+ StoreWord(GetWord(a1), dst + 0 * BPS);
-+ StoreWord(GetWord(a4), dst + 1 * BPS);
-+ StoreWord(GetWord(SRLI(a1, 1)), dst + 2 * BPS);
-+ StoreWord(GetWord(SRLI(a4, 1)), dst + 3 * BPS);
-+ dst[3 + 2 * BPS] = (extra >> 0) & 0xff;
-+ dst[3 + 3 * BPS] = (extra >> 8) & 0xff;
-+}
-+static void RD4_VSX(uint8_t* dst) {
-+ const u8x16 zero = vec_splats((unsigned char)0), one = vec_splats((unsigned char)1);
-+ const uint32_t I = dst[-1 + 0 * BPS], J = dst[-1 + 1 * BPS];
-+ const uint32_t K = dst[-1 + 2 * BPS], L = dst[-1 + 3 * BPS];
-+ const u8x16 XA = Load64(dst - BPS - 1);
-+ const u8x16 all = vec_or(SetWord((uint32_t)(L | (K << 8) | (J << 16) | (I << 24))),
-+ SLLI(XA, 4));
-+ const u8x16 k1 = SRLI(all, 1), j2 = SRLI(all, 2);
-+ const u8x16 a1 = vec_avg(j2, all), lsb = vec_and(vec_xor(j2, all), one);
-+ const u8x16 r = vec_avg(vec_subs(a1, lsb), k1);
-+ StoreWord(GetWord(r), dst + 3 * BPS);
-+ StoreWord(GetWord(SRLI(r, 1)), dst + 2 * BPS);
-+ StoreWord(GetWord(SRLI(r, 2)), dst + 1 * BPS);
-+ StoreWord(GetWord(SRLI(r, 3)), dst + 0 * BPS);
-+}
-+static void TM4_VSX(uint8_t* dst) {
-+ const u8x16 zero = vec_splats((unsigned char)0);
-+ const u8x16 t = Load64(dst - BPS);
-+ const i16x8 tb = (i16x8)vec_mergeh(t, zero);
-+ const int c = dst[-BPS - 1];
-+ int y;
-+ for (y = 0; y < 4; ++y) {
-+ const i16x8 b = vec_splats((short)(dst[-1 + y * BPS] - c));
-+ const u8x16 o = (u8x16)vec_packsu(vec_add(b, tb), vec_splats((short)0));
-+ StoreWord(GetWord(o), dst + y * BPS);
-+ }
-+}
-+#undef SRLI
-+#undef SLLI
-+#undef INS16
-+#undef AVG3C
-+
-+extern void VP8DspInitVSX(void);
-+
-+WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitVSX(void) {
-+ VP8Transform = Transform_VSX;
-+ VP8SimpleVFilter16 = SimpleVFilter16_VSX;
-+ VP8SimpleVFilter16i = SimpleVFilter16i_VSX;
-+ VP8SimpleHFilter16 = SimpleHFilter16_VSX;
-+ VP8SimpleHFilter16i = SimpleHFilter16i_VSX;
-+ VP8VFilter16 = VFilter16_VSX;
-+ VP8VFilter16i = VFilter16i_VSX;
-+ VP8HFilter16 = HFilter16_VSX;
-+ VP8HFilter16i = HFilter16i_VSX;
-+ VP8VFilter8 = VFilter8_VSX;
-+ VP8VFilter8i = VFilter8i_VSX;
-+ VP8HFilter8 = HFilter8_VSX;
-+ VP8HFilter8i = HFilter8i_VSX;
-+
-+ VP8PredLuma16[0] = DC16_VSX;
-+ VP8PredLuma16[1] = TM16_VSX;
-+ VP8PredLuma16[2] = VE16_VSX;
-+ VP8PredLuma16[3] = HE16_VSX;
-+ VP8PredLuma16[4] = DC16NoTop_VSX;
-+ VP8PredLuma16[5] = DC16NoLeft_VSX;
-+ VP8PredLuma16[6] = DC16NoTopLeft_VSX;
-+ VP8PredChroma8[0] = DC8uv_VSX;
-+ VP8PredChroma8[1] = TM8uv_VSX;
-+ VP8PredChroma8[2] = VE8uv_VSX;
-+ VP8PredChroma8[4] = DC8uvNoTop_VSX;
-+ VP8PredChroma8[5] = DC8uvNoLeft_VSX;
-+ VP8PredChroma8[6] = DC8uvNoTopLeft_VSX;
-+ VP8PredLuma4[1] = TM4_VSX;
-+ VP8PredLuma4[2] = VE4_VSX;
-+ VP8PredLuma4[4] = RD4_VSX;
-+ VP8PredLuma4[5] = VR4_VSX;
-+ VP8PredLuma4[6] = LD4_VSX;
-+ VP8PredLuma4[7] = VL4_VSX;
-+}
-+
-+#else // !WEBP_USE_VSX
-+
-+WEBP_DSP_INIT_STUB(VP8DspInitVSX)
-+
-+#endif // WEBP_USE_VSX
-diff --git a/media/libwebp/src/dsp/filters.c b/media/libwebp/src/dsp/filters.c
-index 38da5252df3a..9962e1287402 100644
---- a/media/libwebp/src/dsp/filters.c
-+++ b/media/libwebp/src/dsp/filters.c
-@@ -217,6 +217,7 @@ extern void VP8FiltersInitMIPSdspR2(void);
- extern void VP8FiltersInitMSA(void);
- extern void VP8FiltersInitNEON(void);
- extern void VP8FiltersInitSSE2(void);
-+extern void VP8FiltersInitVSX(void);
-
- WEBP_DSP_INIT_FUNC(VP8FiltersInit) {
- WebPUnfilters[WEBP_FILTER_NONE] = NoneUnfilter_C;
-@@ -248,6 +249,11 @@ WEBP_DSP_INIT_FUNC(VP8FiltersInit) {
- if (VP8GetCPUInfo(kMSA)) {
- VP8FiltersInitMSA();
- }
-+#endif
-+#if defined(WEBP_HAVE_VSX)
-+ if (VP8GetCPUInfo(kVSX)) {
-+ VP8FiltersInitVSX();
-+ }
- #endif
- }
-
-diff --git a/media/libwebp/src/dsp/filters_vsx.c b/media/libwebp/src/dsp/filters_vsx.c
-new file mode 100644
-index 000000000000..ae8e57ac685c
---- /dev/null
-+++ b/media/libwebp/src/dsp/filters_vsx.c
-@@ -0,0 +1,162 @@
-+// Copyright 2015 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// VSX (PowerPC) version of filtering functions.
-+
-+#include "src/dsp/dsp.h"
-+
-+#if defined(WEBP_USE_VSX)
-+
-+#include <altivec.h>
-+#include <assert.h>
-+#include <string.h>
-+
-+#include "src/dsp/cpu.h"
-+#include "src/webp/types.h"
-+
-+typedef __vector unsigned char u8x16;
-+typedef __vector unsigned short u16x8;
-+typedef __vector signed short i16x8;
-+typedef __vector unsigned long long u64x2;
-+
-+// Byte-wise shifts of the whole 128-bit register, matching the little-endian
-+// semantics of _mm_slli_si128 / _mm_srli_si128. 'n' must be a literal.
-+#define SLLI(x, n) vec_sld((x), zero, (n))
-+#define SRLI(x, n) vec_sld(zero, (x), 16 - (n))
-+
-+// Loads 8 bytes from 'p' into the low half of a vector (high half undefined).
-+static WEBP_INLINE u8x16 Load8(const uint8_t* p) {
-+ uint64_t v;
-+ memcpy(&v, p, 8);
-+ return (u8x16)vec_splats(v);
-+}
-+
-+//------------------------------------------------------------------------------
-+// Horizontal unfilter: out[i] = in[i] + out[i - 1] (a prefix sum).
-+
-+static void HorizontalUnfilter_VSX(const uint8_t* prev, const uint8_t* in,
-+ uint8_t* out, int width) {
-+ const u8x16 zero = vec_splats((unsigned char)0);
-+ const u64x2 sh56 = vec_splats((unsigned long long)56);
-+ u8x16 last;
-+ int i;
-+ out[0] = (uint8_t)(in[0] + (prev == NULL ? 0 : prev[0]));
-+ if (width <= 1) return;
-+ last = vec_insert(out[0], zero, 0);
-+ for (i = 1; i + 8 <= width; i += 8) {
-+ const u8x16 A0 = Load8(in + i);
-+ const u8x16 A1 = vec_add(A0, last);
-+ const u8x16 A2 = SLLI(A1, 1);
-+ const u8x16 A3 = vec_add(A1, A2);
-+ const u8x16 A4 = SLLI(A3, 2);
-+ const u8x16 A5 = vec_add(A3, A4);
-+ const u8x16 A6 = SLLI(A5, 4);
-+ const u8x16 A7 = vec_add(A5, A6);
-+ memcpy(out + i, &A7, 8);
-+ last = (u8x16)vec_sr((u64x2)A7, sh56); // broadcast out[i + 7] to byte 0
-+ }
-+ for (; i < width; ++i) out[i] = (uint8_t)(in[i] + out[i - 1]);
-+}
-+
-+//------------------------------------------------------------------------------
-+// Vertical unfilter: out[i] = in[i] + prev[i].
-+
-+static void VerticalUnfilter_VSX(const uint8_t* prev, const uint8_t* in,
-+ uint8_t* out, int width) {
-+ if (prev == NULL) {
-+ HorizontalUnfilter_VSX(NULL, in, out, width);
-+ } else {
-+ int i;
-+ const int max_pos = width & ~31;
-+ for (i = 0; i < max_pos; i += 32) {
-+ const u8x16 A0 = vec_xl(0, (unsigned char*)&in[i + 0]);
-+ const u8x16 A1 = vec_xl(0, (unsigned char*)&in[i + 16]);
-+ const u8x16 B0 = vec_xl(0, (unsigned char*)&prev[i + 0]);
-+ const u8x16 B1 = vec_xl(0, (unsigned char*)&prev[i + 16]);
-+ vec_xst(vec_add(A0, B0), 0, (unsigned char*)&out[i + 0]);
-+ vec_xst(vec_add(A1, B1), 0, (unsigned char*)&out[i + 16]);
-+ }
-+ for (; i < width; ++i) out[i] = (uint8_t)(in[i] + prev[i]);
-+ }
-+}
-+
-+//------------------------------------------------------------------------------
-+// Gradient unfilter: row[i] = in[i] + clip(row[i-1] + top[i] - top[i-1]).
-+
-+static WEBP_INLINE int GradientPredictor_VSX(uint8_t a, uint8_t b, uint8_t c) {
-+ const int g = a + b - c;
-+ return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255;
-+}
-+
-+static void GradientPredictInverse_VSX(const uint8_t* in, const uint8_t* top,
-+ uint8_t* row, int length) {
-+ if (length > 0) {
-+ int i;
-+ const int max_pos = length & ~7;
-+ const u8x16 zero = vec_splats((unsigned char)0);
-+ u8x16 A = vec_insert((unsigned char)row[-1], zero, 0); // left sample
-+ for (i = 0; i < max_pos; i += 8) {
-+ const u8x16 t0 = Load8(top + i);
-+ const u8x16 t1 = Load8(top + i - 1);
-+ const u16x8 B = (u16x8)vec_mergeh(t0, zero);
-+ const u16x8 C = (u16x8)vec_mergeh(t1, zero);
-+ const u8x16 D = Load8(in + i); // base input
-+ const u16x8 E = vec_sub(B, C); // unclipped gradient basis b - c
-+ u8x16 out = zero; // accumulator for output
-+ u8x16 mask_hi = vec_insert((unsigned char)0xff, zero, 0);
-+ int k = 8;
-+ while (1) {
-+ const u16x8 tmp3 = vec_add((u16x8)A, E); // delta = a + b - c
-+ const u8x16 tmp4 = vec_packsu((i16x8)tmp3, (i16x8)zero); // sat. delta
-+ const u8x16 tmp5 = vec_add(tmp4, D); // add to in[]
-+ A = vec_and(tmp5, mask_hi); // keep new sample
-+ out = vec_or(out, A); // accumulate output
-+ if (--k == 0) break;
-+ A = SLLI(A, 1); // rotate left sample
-+ mask_hi = SLLI(mask_hi, 1); // rotate mask
-+ A = (u8x16)vec_mergeh(A, zero); // convert 8b -> 16b
-+ }
-+ A = SRLI(A, 7); // prepare left sample for next iteration
-+ memcpy(row + i, &out, 8);
-+ }
-+ for (; i < length; ++i) {
-+ const int delta = GradientPredictor_VSX(row[i - 1], top[i], top[i - 1]);
-+ row[i] = (uint8_t)(in[i] + delta);
-+ }
-+ }
-+}
-+
-+static void GradientUnfilter_VSX(const uint8_t* prev, const uint8_t* in,
-+ uint8_t* out, int width) {
-+ if (prev == NULL) {
-+ HorizontalUnfilter_VSX(NULL, in, out, width);
-+ } else {
-+ out[0] = (uint8_t)(in[0] + prev[0]); // predict from above
-+ GradientPredictInverse_VSX(in + 1, prev + 1, out + 1, width - 1);
-+ }
-+}
-+
-+#undef SLLI
-+#undef SRLI
-+
-+//------------------------------------------------------------------------------
-+
-+extern void VP8FiltersInitVSX(void);
-+
-+WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitVSX(void) {
-+ WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_VSX;
-+ WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_VSX;
-+ WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_VSX;
-+}
-+
-+#else // !WEBP_USE_VSX
-+
-+WEBP_DSP_INIT_STUB(VP8FiltersInitVSX)
-+
-+#endif // WEBP_USE_VSX
-diff --git a/media/libwebp/src/dsp/lossless.c b/media/libwebp/src/dsp/lossless.c
-index 1a3d800c3fbc..48b5d4a3aedc 100644
---- a/media/libwebp/src/dsp/lossless.c
-+++ b/media/libwebp/src/dsp/lossless.c
-@@ -606,6 +606,7 @@ extern void VP8LDspInitAVX2(void);
- extern void VP8LDspInitNEON(void);
- extern void VP8LDspInitMIPSdspR2(void);
- extern void VP8LDspInitMSA(void);
-+extern void VP8LDspInitVSX(void);
-
- #define COPY_PREDICTOR_ARRAY(IN, OUT) do { \
- (OUT)[0] = IN##0_C; \
-@@ -673,6 +674,11 @@ WEBP_DSP_INIT_FUNC(VP8LDspInit) {
- if (VP8GetCPUInfo(kMSA)) {
- VP8LDspInitMSA();
- }
-+#endif
-+#if defined(WEBP_HAVE_VSX)
-+ if (VP8GetCPUInfo(kVSX)) {
-+ VP8LDspInitVSX();
-+ }
- #endif
- }
-
-diff --git a/media/libwebp/src/dsp/lossless_vsx.c b/media/libwebp/src/dsp/lossless_vsx.c
-new file mode 100644
-index 000000000000..89da30c9589c
---- /dev/null
-+++ b/media/libwebp/src/dsp/lossless_vsx.c
-@@ -0,0 +1,449 @@
-+// Copyright 2014 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// VSX (PowerPC) version of lossless functions.
-+
-+#include "src/dsp/dsp.h"
-+
-+#if defined(WEBP_USE_VSX)
-+
-+#include <altivec.h>
-+#include <string.h>
-+
-+#include "src/dsp/cpu.h"
-+#include "src/dsp/lossless.h"
-+#include "src/dsp/lossless_common.h"
-+#include "src/webp/format_constants.h"
-+#include "src/webp/types.h"
-+
-+typedef __vector unsigned char u8x16;
-+typedef __vector unsigned short u16x8;
-+typedef __vector signed short i16x8;
-+typedef __vector unsigned int u32x4;
-+typedef __vector signed int i32x4;
-+
-+// Signed multiply-high of 16-bit lanes: (a * b) >> 16, matching
-+// _mm_mulhi_epi16.
-+static WEBP_INLINE i16x8 MulHiS16(i16x8 a, i16x8 b) {
-+ const u32x4 sh = vec_splats((unsigned int)16);
-+ const i32x4 e = vec_sra(vec_mule(a, b), sh);
-+ const i32x4 o = vec_sra(vec_mulo(a, b), sh);
-+ return (i16x8)vec_pack(vec_mergeh(e, o), vec_mergel(e, o));
-+}
-+
-+//------------------------------------------------------------------------------
-+// Color transforms.
-+
-+static void AddGreenToBlueAndRed_VSX(const uint32_t* src, int num_pixels,
-+ uint32_t* dst) {
-+ const u8x16 zero = vec_splats((unsigned char)0);
-+ // Replicate the green byte (offset 1 of each pixel) into the blue/red slots.
-+ const u8x16 kSpreadGreen = {1, 16, 1, 16, 5, 16, 5, 16,
-+ 9, 16, 9, 16, 13, 16, 13, 16};
-+ int i;
-+ for (i = 0; i + 4 <= num_pixels; i += 4) {
-+ const u8x16 in = (u8x16)vec_xl(0, (uint32_t*)&src[i]);
-+ const u8x16 g = vec_perm(in, zero, kSpreadGreen); // 0 g 0 g per pixel
-+ vec_xst((u32x4)vec_add(in, g), 0, &dst[i]);
-+ }
-+ if (i != num_pixels) {
-+ VP8LAddGreenToBlueAndRed_C(src + i, num_pixels - i, dst + i);
-+ }
-+}
-+
-+static void TransformColorInverse_VSX(const VP8LMultipliers* const m,
-+ const uint32_t* src, int num_pixels,
-+ uint32_t* dst) {
-+// sign-extended multiplying constants, pre-shifted by 5 (see lossless_sse2.c).
-+#define CST(X) (((int16_t)((m->X) << 8)) >> 5)
-+ const i16x8 mults_rb =
-+ (i16x8)vec_splats((int)(((uint32_t)(uint16_t)CST(green_to_red) << 16) |
-+ ((uint16_t)CST(green_to_blue))));
-+ const i16x8 mults_b2 =
-+ (i16x8)vec_splats((int)((uint32_t)(uint16_t)CST(red_to_blue) << 16));
-+#undef CST
-+ const u8x16 zero = vec_splats((unsigned char)0);
-+ const u32x4 mask_ag = vec_splats((uint32_t)0xff00ff00); // alpha/green
-+ const u16x8 sh8_16 = vec_splats((unsigned short)8);
-+ const u32x4 sh8_32 = vec_splats((unsigned int)8);
-+ // Broadcast the green byte (offset 1) into the high byte of both 16-bit
-+ // halves of each pixel: yields g << 8 in each lane.
-+ const u8x16 kGreenHi = {16, 1, 16, 1, 16, 5, 16, 5,
-+ 16, 9, 16, 9, 16, 13, 16, 13};
-+ int i;
-+ for (i = 0; i + 4 <= num_pixels; i += 4) {
-+ const u8x16 in = (u8x16)vec_xl(0, (uint32_t*)&src[i]);
-+ const u8x16 A = (u8x16)vec_and((u32x4)in, mask_ag); // a 0 g 0
-+ const i16x8 C = (i16x8)vec_perm(A, zero, kGreenHi); // g0g0 (g << 8)
-+ const u8x16 D = (u8x16)MulHiS16(C, mults_rb); // x dr x db1
-+ const u8x16 E = vec_add(in, D); // x r' x b'
-+ const u16x8 F = vec_sl((u16x8)E, sh8_16); // r' 0 b' 0
-+ const u8x16 G = (u8x16)MulHiS16((i16x8)F, mults_b2); // x db2 0 0
-+ const u8x16 H = (u8x16)vec_sr((u32x4)G, sh8_32); // 0 x db2 0
-+ const u16x8 I = (u16x8)vec_add(H, (u8x16)F); // r' x b'' 0
-+ const u8x16 J = (u8x16)vec_sr(I, sh8_16); // 0 r' 0 b''
-+ vec_xst(vec_or((u32x4)J, (u32x4)A), 0, &dst[i]);
-+ }
-+ if (i != num_pixels) {
-+ VP8LTransformColorInverse_C(m, src + i, num_pixels - i, dst + i);
-+ }
-+}
-+
-+//------------------------------------------------------------------------------
-+// Color-space conversion functions.
-+
-+static void ConvertBGRAToRGBA_VSX(const uint32_t* WEBP_RESTRICT src,
-+ int num_pixels, uint8_t* WEBP_RESTRICT dst) {
-+ // Swap the blue (offset 0) and red (offset 2) bytes of each pixel.
-+ const u8x16 kSwapBR = {2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15};
-+ int i;
-+ for (i = 0; i + 4 <= num_pixels; i += 4) {
-+ const u8x16 in = (u8x16)vec_xl(0, (uint32_t*)&src[i]);
-+ vec_xst(vec_perm(in, in, kSwapBR), 0, &dst[4 * i]);
-+ }
-+ if (i != num_pixels) {
-+ VP8LConvertBGRAToRGBA_C(src + i, num_pixels - i, dst + 4 * i);
-+ }
-+}
-+
-+static void ConvertBGRAToRGB_VSX(const uint32_t* WEBP_RESTRICT src,
-+ int num_pixels, uint8_t* WEBP_RESTRICT dst) {
-+ // BGRA -> RGB: gather R,G,B (offsets 2,1,0) of each pixel, drop alpha.
-+ const u8x16 kToRGB = {2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, 0, 0, 0, 0};
-+ int i;
-+ for (i = 0; i + 4 <= num_pixels; i += 4) {
-+ const u8x16 in = (u8x16)vec_xl(0, (uint32_t*)&src[i]);
-+ const u8x16 out = vec_perm(in, in, kToRGB);
-+ memcpy(&dst[3 * i], &out, 12);
-+ }
-+ if (i != num_pixels) {
-+ VP8LConvertBGRAToRGB_C(src + i, num_pixels - i, dst + 3 * i);
-+ }
-+}
-+
-+static void ConvertBGRAToBGR_VSX(const uint32_t* WEBP_RESTRICT src,
-+ int num_pixels, uint8_t* WEBP_RESTRICT dst) {
-+ // BGRA -> BGR: gather B,G,R (offsets 0,1,2) of each pixel, drop alpha.
-+ const u8x16 kToBGR = {0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0, 0, 0, 0};
-+ int i;
-+ for (i = 0; i + 4 <= num_pixels; i += 4) {
-+ const u8x16 in = (u8x16)vec_xl(0, (uint32_t*)&src[i]);
-+ const u8x16 out = vec_perm(in, in, kToBGR);
-+ memcpy(&dst[3 * i], &out, 12);
-+ }
-+ if (i != num_pixels) {
-+ VP8LConvertBGRAToBGR_C(src + i, num_pixels - i, dst + 3 * i);
-+ }
-+}
-+
-+//------------------------------------------------------------------------------
-+// Predictor transform.
-+
-+// Byte-wise shifts of the whole register (little-endian _mm_s{l,r}li_si128).
-+#define SLLI(x, n) vec_sld((x), kZero, (n))
-+#define SRLI(x, n) vec_sld(kZero, (x), 16 - (n))
-+static const u8x16 kZero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-+
-+// Per-byte floor average (a + b) >> 1, matching the C Average2().
-+static WEBP_INLINE u8x16 Average2_u8(u8x16 a, u8x16 b) {
-+ const u8x16 one = vec_splats((unsigned char)1);
-+ const u8x16 avg1 = vec_avg(a, b); // (a + b + 1) >> 1
-+ return vec_sub(avg1, vec_and(vec_xor(a, b), one));
-+}
-+
-+static WEBP_INLINE u32x4 Lane0(uint32_t v) {
-+ const u32x4 r = {v, 0, 0, 0};
-+ return r;
-+}
-+
-+// Single-pixel helpers operating on the low 32-bit lane only.
-+static WEBP_INLINE u16x8 Unpack16(uint32_t a) {
-+ return (u16x8)vec_mergeh((u8x16)Lane0(a), kZero);
-+}
-+
-+static WEBP_INLINE uint32_t Average2_VSX(uint32_t a0, uint32_t a1) {
-+ return vec_extract((u32x4)Average2_u8((u8x16)Lane0(a0), (u8x16)Lane0(a1)), 0);
-+}
-+
-+static WEBP_INLINE u16x8 Average2_16(uint32_t a0, uint32_t a1) {
-+ const u16x8 one = vec_splats((unsigned short)1);
-+ return vec_sr(vec_add(Unpack16(a0), Unpack16(a1)), one);
-+}
-+
-+static WEBP_INLINE uint32_t Average3_VSX(uint32_t a0, uint32_t a1,
-+ uint32_t a2) {
-+ const u16x8 one = vec_splats((unsigned short)1);
-+ const u16x8 avg1 = Average2_16(a0, a2);
-+ const u16x8 avg2 = vec_sr(vec_add(avg1, Unpack16(a1)), one);
-+ return vec_extract((u32x4)vec_packsu((i16x8)avg2, (i16x8)avg2), 0);
-+}
-+
-+static WEBP_INLINE uint32_t Average4_VSX(uint32_t a0, uint32_t a1, uint32_t a2,
-+ uint32_t a3) {
-+ const u16x8 one = vec_splats((unsigned short)1);
-+ const u16x8 avg1 = Average2_16(a0, a1);
-+ const u16x8 avg2 = Average2_16(a2, a3);
-+ const u16x8 avg3 = vec_sr(vec_add(avg1, avg2), one);
-+ return vec_extract((u32x4)vec_packsu((i16x8)avg3, (i16x8)avg3), 0);
-+}
-+
-+static WEBP_INLINE uint32_t ClampedAddSubtractFull_VSX(uint32_t c0, uint32_t c1,
-+ uint32_t c2) {
-+ const i16x8 v =
-+ vec_sub((i16x8)vec_add(Unpack16(c0), Unpack16(c1)), (i16x8)Unpack16(c2));
-+ return vec_extract((u32x4)vec_packsu(v, v), 0);
-+}
-+
-+static WEBP_INLINE uint32_t ClampedAddSubtractHalf_VSX(uint32_t c0, uint32_t c1,
-+ uint32_t c2) {
-+ const u16x8 one = vec_splats((unsigned short)1);
-+ const u16x8 C0 = Unpack16(c0);
-+ const u16x8 C1 = Unpack16(c1);
-+ const u16x8 B0 = Unpack16(c2);
-+ const u16x8 A0 = vec_sr(vec_add(C1, C0), one); // ave
-+ const i16x8 A1 = vec_sub((i16x8)A0, (i16x8)B0);
-+ const i16x8 BgtA = (i16x8)vec_cmpgt(B0, A0); // 0 or -1
-+ const i16x8 A2 = vec_sub(A1, BgtA);
-+ const i16x8 A3 = vec_sra(A2, one);
-+ const i16x8 A4 = vec_add((i16x8)A0, A3);
-+ return vec_extract((u32x4)vec_packsu(A4, A4), 0);
-+}
-+
-+static WEBP_INLINE uint32_t Select_VSX(uint32_t a, uint32_t b, uint32_t c) {
-+ const u8x16 A = (u8x16)Lane0(a);
-+ const u8x16 B = (u8x16)Lane0(b);
-+ const u8x16 C = (u8x16)Lane0(c);
-+ const u32x4 sa = vec_sum4s(vec_or(vec_subs(A, C), vec_subs(C, A)),
-+ vec_splats((unsigned int)0));
-+ const u32x4 sb = vec_sum4s(vec_or(vec_subs(B, C), vec_subs(C, B)),
-+ vec_splats((unsigned int)0));
-+ return vec_extract((u32x4)vec_cmpgt(sb, sa), 0) ? b : a;
-+}
-+
-+static uint32_t Predictor5_VSX(const uint32_t* const left,
-+ const uint32_t* const top) {
-+ return Average3_VSX(*left, top[0], top[1]);
-+}
-+static uint32_t Predictor6_VSX(const uint32_t* const left,
-+ const uint32_t* const top) {
-+ return Average2_VSX(*left, top[-1]);
-+}
-+static uint32_t Predictor7_VSX(const uint32_t* const left,
-+ const uint32_t* const top) {
-+ return Average2_VSX(*left, top[0]);
-+}
-+static uint32_t Predictor13_VSX(const uint32_t* const left,
-+ const uint32_t* const top) {
-+ return ClampedAddSubtractHalf_VSX(*left, top[0], top[-1]);
-+}
-+
-+static void PredictorAdd0_VSX(const uint32_t* in, const uint32_t* upper,
-+ int num_pixels, uint32_t* WEBP_RESTRICT out) {
-+ const u8x16 black = (u8x16)vec_splats((uint32_t)ARGB_BLACK);
-+ int i;
-+ for (i = 0; i + 4 <= num_pixels; i += 4) {
-+ const u8x16 src = (u8x16)vec_xl(0, (uint32_t*)&in[i]);
-+ vec_xst((u32x4)vec_add(src, black), 0, &out[i]);
-+ }
-+ if (i != num_pixels) {
-+ VP8LPredictorsAdd_C[0](in + i, NULL, num_pixels - i, out + i);
-+ }
-+ (void)upper;
-+}
-+
-+static void PredictorAdd1_VSX(const uint32_t* in, const uint32_t* upper,
-+ int num_pixels, uint32_t* WEBP_RESTRICT out) {
-+ u32x4 prev = vec_splats(out[-1]);
-+ int i;
-+ for (i = 0; i + 4 <= num_pixels; i += 4) {
-+ const u8x16 src = (u8x16)vec_xl(0, (uint32_t*)&in[i]);
-+ const u8x16 sum0 = vec_add(src, SLLI(src, 4)); // a | a+b | b+c | c+d
-+ const u8x16 sum1 = vec_add(sum0, SLLI(sum0, 8)); // running sum
-+ const u8x16 res = vec_add(sum1, (u8x16)prev);
-+ vec_xst((u32x4)res, 0, &out[i]);
-+ prev = vec_splat((u32x4)res, 3); // replicate last pixel
-+ }
-+ if (i != num_pixels) {
-+ VP8LPredictorsAdd_C[1](in + i, upper + i, num_pixels - i, out + i);
-+ }
-+}
-+
-+#define GENERATE_PREDICTOR_1_VSX(X, IN) \
-+ static void PredictorAdd##X##_VSX(const uint32_t* in, const uint32_t* upper, \
-+ int num_pixels, \
-+ uint32_t* WEBP_RESTRICT out) { \
-+ int i; \
-+ for (i = 0; i + 4 <= num_pixels; i += 4) { \
-+ const u8x16 src = (u8x16)vec_xl(0, (uint32_t*)&in[i]); \
-+ const u8x16 other = (u8x16)vec_xl(0, (uint32_t*)&(IN)); \
-+ vec_xst((u32x4)vec_add(src, other), 0, &out[i]); \
-+ } \
-+ if (i != num_pixels) { \
-+ VP8LPredictorsAdd_C[(X)](in + i, upper + i, num_pixels - i, out + i); \
-+ } \
-+ }
-+GENERATE_PREDICTOR_1_VSX(2, upper[i]) // Top.
-+GENERATE_PREDICTOR_1_VSX(3, upper[i + 1]) // Top-right.
-+GENERATE_PREDICTOR_1_VSX(4, upper[i - 1]) // Top-left.
-+#undef GENERATE_PREDICTOR_1_VSX
-+
-+// Predictors 5, 6, 7, 13 use integer averages and cannot be accumulated in
-+// parallel, so use the generic one-pixel-at-a-time batch.
-+GENERATE_PREDICTOR_ADD(Predictor5_VSX, PredictorAdd5_VSX)
-+GENERATE_PREDICTOR_ADD(Predictor6_VSX, PredictorAdd6_VSX)
-+GENERATE_PREDICTOR_ADD(Predictor7_VSX, PredictorAdd7_VSX)
-+GENERATE_PREDICTOR_ADD(Predictor13_VSX, PredictorAdd13_VSX)
-+
-+#define GENERATE_PREDICTOR_2_VSX(X, IN) \
-+ static void PredictorAdd##X##_VSX(const uint32_t* in, const uint32_t* upper, \
-+ int num_pixels, \
-+ uint32_t* WEBP_RESTRICT out) { \
-+ int i; \
-+ for (i = 0; i + 4 <= num_pixels; i += 4) { \
-+ const u8x16 Tother = (u8x16)vec_xl(0, (uint32_t*)&(IN)); \
-+ const u8x16 T = (u8x16)vec_xl(0, (uint32_t*)&upper[i]); \
-+ const u8x16 src = (u8x16)vec_xl(0, (uint32_t*)&in[i]); \
-+ vec_xst((u32x4)vec_add(Average2_u8(T, Tother), src), 0, &out[i]); \
-+ } \
-+ if (i != num_pixels) { \
-+ VP8LPredictorsAdd_C[(X)](in + i, upper + i, num_pixels - i, out + i); \
-+ } \
-+ }
-+GENERATE_PREDICTOR_2_VSX(8, upper[i - 1]) // Average TL, T.
-+GENERATE_PREDICTOR_2_VSX(9, upper[i + 1]) // Average T, TR.
-+#undef GENERATE_PREDICTOR_2_VSX
-+
-+// Predictor10: average of (average(L, TL), average(T, TR)).
-+static void PredictorAdd10_VSX(const uint32_t* in, const uint32_t* upper,
-+ int num_pixels, uint32_t* WEBP_RESTRICT out) {
-+ u8x16 L = (u8x16)Lane0(out[-1]);
-+ int i, k;
-+ for (i = 0; i + 4 <= num_pixels; i += 4) {
-+ u8x16 src = (u8x16)vec_xl(0, (uint32_t*)&in[i]);
-+ u8x16 TL = (u8x16)vec_xl(0, (uint32_t*)&upper[i - 1]);
-+ const u8x16 T = (u8x16)vec_xl(0, (uint32_t*)&upper[i]);
-+ const u8x16 TR = (u8x16)vec_xl(0, (uint32_t*)&upper[i + 1]);
-+ u8x16 avgTTR = Average2_u8(T, TR);
-+ for (k = 0; k < 4; ++k) {
-+ const u8x16 avg = Average2_u8(avgTTR, Average2_u8(L, TL));
-+ L = vec_add(avg, src);
-+ out[i + k] = vec_extract((u32x4)L, 0);
-+ avgTTR = SRLI(avgTTR, 4);
-+ TL = SRLI(TL, 4);
-+ src = SRLI(src, 4);
-+ }
-+ }
-+ if (i != num_pixels) {
-+ VP8LPredictorsAdd_C[10](in + i, upper + i, num_pixels - i, out + i);
-+ }
-+}
-+
-+// Predictor11: select between T and L based on |T-TL| vs |L-TL|.
-+static void PredictorAdd11_VSX(const uint32_t* in, const uint32_t* upper,
-+ int num_pixels, uint32_t* WEBP_RESTRICT out) {
-+ const u32x4 z32 = vec_splats((unsigned int)0);
-+ u8x16 L = (u8x16)Lane0(out[-1]);
-+ int i, k;
-+ for (i = 0; i + 4 <= num_pixels; i += 4) {
-+ u8x16 T = (u8x16)vec_xl(0, (uint32_t*)&upper[i]);
-+ u8x16 TL = (u8x16)vec_xl(0, (uint32_t*)&upper[i - 1]);
-+ u8x16 src = (u8x16)vec_xl(0, (uint32_t*)&in[i]);
-+ u8x16 pa = (u8x16)vec_sum4s(vec_or(vec_subs(T, TL), vec_subs(TL, T)), z32);
-+ for (k = 0; k < 4; ++k) {
-+ const u32x4 pb = vec_sum4s(vec_or(vec_subs(L, TL), vec_subs(TL, L)), z32);
-+ const u32x4 mask = (u32x4)vec_cmpgt(pb, (u32x4)pa); // pb > pa ? L : T
-+ const u8x16 pred = vec_sel(T, L, (u8x16)mask);
-+ L = vec_add(src, pred);
-+ out[i + k] = vec_extract((u32x4)L, 0);
-+ T = SRLI(T, 4);
-+ TL = SRLI(TL, 4);
-+ src = SRLI(src, 4);
-+ pa = SRLI(pa, 4);
-+ }
-+ }
-+ if (i != num_pixels) {
-+ VP8LPredictorsAdd_C[11](in + i, upper + i, num_pixels - i, out + i);
-+ }
-+}
-+
-+// Predictor12: ClampedAddSubtractFull. 'L' is kept unpacked to 16 bits in the
-+// low 4 lanes; 'diff' (= T - TL) holds two pixels, the active one in lanes 0-3.
-+#define DO_PRED12(DIFF) \
-+ do { \
-+ const i16x8 all = vec_add((i16x8)L, (DIFF)); \
-+ const u8x16 res = vec_add(src, vec_packsu(all, all)); \
-+ out[i + out_idx++] = vec_extract((u32x4)res, 0); \
-+ L = (u16x8)vec_mergeh(res, kZero); \
-+ } while (0)
-+
-+static void PredictorAdd12_VSX(const uint32_t* in, const uint32_t* upper,
-+ int num_pixels, uint32_t* WEBP_RESTRICT out) {
-+ u16x8 L = Unpack16(out[-1]);
-+ int i;
-+ for (i = 0; i + 4 <= num_pixels; i += 4) {
-+ int out_idx = 0;
-+ u8x16 src = (u8x16)vec_xl(0, (uint32_t*)&in[i]);
-+ const u8x16 T = (u8x16)vec_xl(0, (uint32_t*)&upper[i]);
-+ const u8x16 TL = (u8x16)vec_xl(0, (uint32_t*)&upper[i - 1]);
-+ // 16-bit gradient basis T - TL for the four pixels (low and high halves).
-+ i16x8 diff_lo =
-+ vec_sub((i16x8)vec_mergeh(T, kZero), (i16x8)vec_mergeh(TL, kZero));
-+ i16x8 diff_hi =
-+ vec_sub((i16x8)vec_mergel(T, kZero), (i16x8)vec_mergel(TL, kZero));
-+ DO_PRED12(diff_lo);
-+ diff_lo = (i16x8)SRLI((u8x16)diff_lo, 8);
-+ src = SRLI(src, 4);
-+ DO_PRED12(diff_lo);
-+ src = SRLI(src, 4);
-+ DO_PRED12(diff_hi);
-+ diff_hi = (i16x8)SRLI((u8x16)diff_hi, 8);
-+ src = SRLI(src, 4);
-+ DO_PRED12(diff_hi);
-+ }
-+ if (i != num_pixels) {
-+ VP8LPredictorsAdd_C[12](in + i, upper + i, num_pixels - i, out + i);
-+ }
-+}
-+#undef DO_PRED12
-+
-+#undef SLLI
-+#undef SRLI
-+
-+//------------------------------------------------------------------------------
-+
-+extern void VP8LDspInitVSX(void);
-+
-+WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitVSX(void) {
-+ VP8LPredictorsAdd[0] = PredictorAdd0_VSX;
-+ VP8LPredictorsAdd[1] = PredictorAdd1_VSX;
-+ VP8LPredictorsAdd[2] = PredictorAdd2_VSX;
-+ VP8LPredictorsAdd[3] = PredictorAdd3_VSX;
-+ VP8LPredictorsAdd[4] = PredictorAdd4_VSX;
-+ VP8LPredictorsAdd[5] = PredictorAdd5_VSX;
-+ VP8LPredictorsAdd[6] = PredictorAdd6_VSX;
-+ VP8LPredictorsAdd[7] = PredictorAdd7_VSX;
-+ VP8LPredictorsAdd[8] = PredictorAdd8_VSX;
-+ VP8LPredictorsAdd[9] = PredictorAdd9_VSX;
-+ VP8LPredictorsAdd[10] = PredictorAdd10_VSX;
-+ VP8LPredictorsAdd[11] = PredictorAdd11_VSX;
-+ VP8LPredictorsAdd[12] = PredictorAdd12_VSX;
-+ VP8LPredictorsAdd[13] = PredictorAdd13_VSX;
-+
-+ VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_VSX;
-+ VP8LTransformColorInverse = TransformColorInverse_VSX;
-+ VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_VSX;
-+ VP8LConvertBGRAToRGB = ConvertBGRAToRGB_VSX;
-+ VP8LConvertBGRAToBGR = ConvertBGRAToBGR_VSX;
-+}
-+
-+#else // !WEBP_USE_VSX
-+
-+WEBP_DSP_INIT_STUB(VP8LDspInitVSX)
-+
-+#endif // WEBP_USE_VSX
-diff --git a/media/libwebp/src/dsp/moz.build b/media/libwebp/src/dsp/moz.build
-index 8d6f8427c900..f3e9d1273110 100644
---- a/media/libwebp/src/dsp/moz.build
-+++ b/media/libwebp/src/dsp/moz.build
-@@ -118,6 +118,20 @@ elif CONFIG['TARGET_CPU'].startswith('mips'):
- 'yuv_mips32.c',
- 'yuv_mips_dsp_r2.c',
- ]
-+elif CONFIG['TARGET_CPU'] == 'ppc64':
-+ SOURCES += [
-+ 'alpha_processing_vsx.c',
-+ 'dec_vsx.c',
-+ 'filters_vsx.c',
-+ 'lossless_vsx.c',
-+ 'rescaler_vsx.c',
-+ 'upsampling_vsx.c',
-+ 'yuv_vsx.c',
-+ ]
-+ DEFINES['WEBP_HAVE_VSX'] = 1;
-+ for f in SOURCES:
-+ if f.endswith('vsx.c'):
-+ SOURCES[f].flags += ['-mvsx']
-
- if CONFIG['CC_TYPE'] in ('clang', 'clang-cl'):
- CFLAGS += ['-Wno-unreachable-code']
-diff --git a/media/libwebp/src/dsp/rescaler.c b/media/libwebp/src/dsp/rescaler.c
-index eafccd442f25..2c0c8c47a7a3 100644
---- a/media/libwebp/src/dsp/rescaler.c
-+++ b/media/libwebp/src/dsp/rescaler.c
-@@ -207,6 +207,7 @@ extern void WebPRescalerDspInitMIPS32(void);
- extern void WebPRescalerDspInitMIPSdspR2(void);
- extern void WebPRescalerDspInitMSA(void);
- extern void WebPRescalerDspInitNEON(void);
-+extern void WebPRescalerDspInitVSX(void);
-
- WEBP_DSP_INIT_FUNC(WebPRescalerDspInit) {
- #if !defined(WEBP_REDUCE_SIZE)
-@@ -238,6 +239,11 @@ WEBP_DSP_INIT_FUNC(WebPRescalerDspInit) {
- if (VP8GetCPUInfo(kMSA)) {
- WebPRescalerDspInitMSA();
- }
-+#endif
-+#if defined(WEBP_HAVE_VSX)
-+ if (VP8GetCPUInfo(kVSX)) {
-+ WebPRescalerDspInitVSX();
-+ }
- #endif
- }
-
-diff --git a/media/libwebp/src/dsp/rescaler_vsx.c b/media/libwebp/src/dsp/rescaler_vsx.c
-new file mode 100644
-index 000000000000..002f232d647a
---- /dev/null
-+++ b/media/libwebp/src/dsp/rescaler_vsx.c
-@@ -0,0 +1,201 @@
-+// Copyright 2015 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// VSX (PowerPC) version of rescaling functions.
-+
-+#include "src/dsp/dsp.h"
-+
-+#if defined(WEBP_USE_VSX) && !defined(WEBP_REDUCE_SIZE)
-+
-+#include <altivec.h>
-+#include <assert.h>
-+#include <string.h>
-+
-+#include "src/dsp/cpu.h"
-+#include "src/utils/rescaler_utils.h"
-+#include "src/webp/types.h"
-+
-+typedef __vector unsigned char u8x16;
-+typedef __vector signed short i16x8;
-+typedef __vector unsigned int u32x4;
-+typedef __vector signed int i32x4;
-+typedef __vector unsigned long long u64x2;
-+
-+#define ROUNDER (WEBP_RESCALER_ONE >> 1)
-+#define MULT_FIX_C(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
-+#define MULT_FIX_FLOOR_C(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX)
-+
-+#if (WEBP_RESCALER_RFIX != 32)
-+#error "MULT_FIX/WEBP_RESCALER_RFIX need some more work"
-+#endif
-+
-+// Returns (x * scale + ROUNDER) >> 32 for each of the four 32-bit lanes.
-+static WEBP_INLINE u32x4 MultFix_VSX(u32x4 x, uint32_t scale) {
-+ const u64x2 rounder = vec_splats((unsigned long long)ROUNDER);
-+ const u64x2 shift = vec_splats((unsigned long long)WEBP_RESCALER_RFIX);
-+ const u32x4 s = vec_splats(scale);
-+ // vec_mule/vec_mulo produce the 32x32->64 products of the even (0, 2) and
-+ // odd (1, 3) lanes respectively.
-+ u64x2 e = vec_add(vec_mule(x, s), rounder);
-+ u64x2 o = vec_add(vec_mulo(x, s), rounder);
-+ e = vec_sr(e, shift);
-+ o = vec_sr(o, shift);
-+ return vec_mergee((u32x4)e, (u32x4)o);
-+}
-+
-+// Returns (x * scale) >> 32 for each lane (no rounding).
-+static WEBP_INLINE u32x4 MultFixFloor_VSX(u32x4 x, uint32_t scale) {
-+ const u64x2 shift = vec_splats((unsigned long long)WEBP_RESCALER_RFIX);
-+ const u32x4 s = vec_splats(scale);
-+ u64x2 e = vec_sr(vec_mule(x, s), shift);
-+ u64x2 o = vec_sr(vec_mulo(x, s), shift);
-+ return vec_mergee((u32x4)e, (u32x4)o);
-+}
-+
-+// Returns (A * frow + B * irow + ROUNDER) >> 32 for each lane.
-+static WEBP_INLINE u32x4 Interpolate_VSX(const rescaler_t* WEBP_RESTRICT frow,
-+ const rescaler_t* WEBP_RESTRICT irow,
-+ uint32_t A, uint32_t B) {
-+ const u64x2 rounder = vec_splats((unsigned long long)ROUNDER);
-+ const u64x2 shift = vec_splats((unsigned long long)WEBP_RESCALER_RFIX);
-+ const u32x4 f = vec_xl(0, (uint32_t*)frow);
-+ const u32x4 ir = vec_xl(0, (uint32_t*)irow);
-+ const u32x4 va = vec_splats(A);
-+ const u32x4 vb = vec_splats(B);
-+ u64x2 e = vec_add(vec_mule(f, va), vec_mule(ir, vb));
-+ u64x2 o = vec_add(vec_mulo(f, va), vec_mulo(ir, vb));
-+ e = vec_sr(vec_add(e, rounder), shift);
-+ o = vec_sr(vec_add(o, rounder), shift);
-+ return vec_mergee((u32x4)e, (u32x4)o);
-+}
-+
-+// Saturated pack of two 32-bit lane vectors (8 values) into 8 bytes at dst.
-+static WEBP_INLINE void Store8_VSX(u32x4 lo, u32x4 hi, uint8_t* dst) {
-+ const i16x8 s16 = vec_packs((i32x4)lo, (i32x4)hi);
-+ const u8x16 s8 = vec_packsu(s16, s16);
-+ memcpy(dst, &s8, 8);
-+}
-+
-+static void RescalerExportRowExpand_VSX(WebPRescaler* const wrk) {
-+ int x_out;
-+ uint8_t* const dst = wrk->dst;
-+ rescaler_t* const irow = wrk->irow;
-+ const int x_out_max = wrk->dst_width * wrk->num_channels;
-+ const int max_span = x_out_max & ~7;
-+ const rescaler_t* const frow = wrk->frow;
-+ const uint32_t fy_scale = wrk->fy_scale;
-+ assert(!WebPRescalerOutputDone(wrk));
-+ assert(wrk->y_accum <= 0);
-+ assert(wrk->y_expand);
-+ assert(wrk->y_sub != 0);
-+ if (wrk->y_accum == 0) {
-+ for (x_out = 0; x_out < max_span; x_out += 8) {
-+ const u32x4 A0 = vec_xl(0, (uint32_t*)(frow + x_out + 0));
-+ const u32x4 A1 = vec_xl(0, (uint32_t*)(frow + x_out + 4));
-+ const u32x4 B0 = MultFix_VSX(A0, fy_scale);
-+ const u32x4 B1 = MultFix_VSX(A1, fy_scale);
-+ Store8_VSX(B0, B1, dst + x_out);
-+ }
-+ for (; x_out < x_out_max; ++x_out) {
-+ const uint32_t J = frow[x_out];
-+ const int v = (int)MULT_FIX_C(J, fy_scale);
-+ dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
-+ }
-+ } else {
-+ const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
-+ const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
-+ for (x_out = 0; x_out < max_span; x_out += 8) {
-+ const u32x4 C0 =
-+ Interpolate_VSX(frow + x_out + 0, irow + x_out + 0, A, B);
-+ const u32x4 C1 =
-+ Interpolate_VSX(frow + x_out + 4, irow + x_out + 4, A, B);
-+ const u32x4 D0 = MultFix_VSX(C0, fy_scale);
-+ const u32x4 D1 = MultFix_VSX(C1, fy_scale);
-+ Store8_VSX(D0, D1, dst + x_out);
-+ }
-+ for (; x_out < x_out_max; ++x_out) {
-+ const uint64_t I = (uint64_t)A * frow[x_out] + (uint64_t)B * irow[x_out];
-+ const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
-+ const int v = (int)MULT_FIX_C(J, fy_scale);
-+ dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
-+ }
-+ }
-+}
-+
-+static void RescalerExportRowShrink_VSX(WebPRescaler* const wrk) {
-+ int x_out;
-+ uint8_t* const dst = wrk->dst;
-+ rescaler_t* const irow = wrk->irow;
-+ const int x_out_max = wrk->dst_width * wrk->num_channels;
-+ const int max_span = x_out_max & ~7;
-+ const rescaler_t* const frow = wrk->frow;
-+ const uint32_t yscale = wrk->fy_scale * (-wrk->y_accum);
-+ const uint32_t fxy_scale = wrk->fxy_scale;
-+ assert(!WebPRescalerOutputDone(wrk));
-+ assert(wrk->y_accum <= 0);
-+ assert(!wrk->y_expand);
-+ if (yscale) {
-+ for (x_out = 0; x_out < max_span; x_out += 8) {
-+ const u32x4 in0 = vec_xl(0, (uint32_t*)(frow + x_out + 0));
-+ const u32x4 in1 = vec_xl(0, (uint32_t*)(frow + x_out + 4));
-+ const u32x4 in2 = vec_xl(0, (uint32_t*)(irow + x_out + 0));
-+ const u32x4 in3 = vec_xl(0, (uint32_t*)(irow + x_out + 4));
-+ const u32x4 A0 = MultFixFloor_VSX(in0, yscale);
-+ const u32x4 A1 = MultFixFloor_VSX(in1, yscale);
-+ const u32x4 B0 = vec_sub(in2, A0);
-+ const u32x4 B1 = vec_sub(in3, A1);
-+ const u32x4 C0 = MultFix_VSX(B0, fxy_scale);
-+ const u32x4 C1 = MultFix_VSX(B1, fxy_scale);
-+ Store8_VSX(C0, C1, dst + x_out);
-+ vec_xst(A0, 0, (uint32_t*)(irow + x_out + 0));
-+ vec_xst(A1, 0, (uint32_t*)(irow + x_out + 4));
-+ }
-+ for (; x_out < x_out_max; ++x_out) {
-+ const uint32_t frac = (uint32_t)MULT_FIX_FLOOR_C(frow[x_out], yscale);
-+ const int v = (int)MULT_FIX_C(irow[x_out] - frac, fxy_scale);
-+ dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
-+ irow[x_out] = frac; // new fractional start
-+ }
-+ } else {
-+ const u32x4 zero = vec_splats((uint32_t)0);
-+ for (x_out = 0; x_out < max_span; x_out += 8) {
-+ const u32x4 in0 = vec_xl(0, (uint32_t*)(irow + x_out + 0));
-+ const u32x4 in1 = vec_xl(0, (uint32_t*)(irow + x_out + 4));
-+ const u32x4 A0 = MultFix_VSX(in0, fxy_scale);
-+ const u32x4 A1 = MultFix_VSX(in1, fxy_scale);
-+ Store8_VSX(A0, A1, dst + x_out);
-+ vec_xst(zero, 0, (uint32_t*)(irow + x_out + 0));
-+ vec_xst(zero, 0, (uint32_t*)(irow + x_out + 4));
-+ }
-+ for (; x_out < x_out_max; ++x_out) {
-+ const int v = (int)MULT_FIX_C(irow[x_out], fxy_scale);
-+ dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
-+ irow[x_out] = 0;
-+ }
-+ }
-+}
-+
-+#undef MULT_FIX_FLOOR_C
-+#undef MULT_FIX_C
-+#undef ROUNDER
-+
-+//------------------------------------------------------------------------------
-+
-+extern void WebPRescalerDspInitVSX(void);
-+
-+WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitVSX(void) {
-+ WebPRescalerExportRowExpand = RescalerExportRowExpand_VSX;
-+ WebPRescalerExportRowShrink = RescalerExportRowShrink_VSX;
-+}
-+
-+#else // !WEBP_USE_VSX
-+
-+WEBP_DSP_INIT_STUB(WebPRescalerDspInitVSX)
-+
-+#endif // WEBP_USE_VSX
-diff --git a/media/libwebp/src/dsp/upsampling.c b/media/libwebp/src/dsp/upsampling.c
-index c57f66c3553f..faecdf277393 100644
---- a/media/libwebp/src/dsp/upsampling.c
-+++ b/media/libwebp/src/dsp/upsampling.c
-@@ -235,6 +235,7 @@ extern VP8CPUInfo VP8GetCPUInfo;
- extern void WebPInitYUV444ConvertersMIPSdspR2(void);
- extern void WebPInitYUV444ConvertersSSE2(void);
- extern void WebPInitYUV444ConvertersSSE41(void);
-+extern void WebPInitYUV444ConvertersVSX(void);
-
- WEBP_DSP_INIT_FUNC(WebPInitYUV444Converters) {
- WebPYUV444Converters[MODE_RGBA] = WebPYuv444ToRgba_C;
-@@ -264,6 +265,11 @@ WEBP_DSP_INIT_FUNC(WebPInitYUV444Converters) {
- if (VP8GetCPUInfo(kMIPSdspR2)) {
- WebPInitYUV444ConvertersMIPSdspR2();
- }
-+#endif
-+#if defined(WEBP_HAVE_VSX)
-+ if (VP8GetCPUInfo(kVSX)) {
-+ WebPInitYUV444ConvertersVSX();
-+ }
- #endif
- }
- }
-@@ -276,6 +282,7 @@ extern void WebPInitUpsamplersSSE41(void);
- extern void WebPInitUpsamplersNEON(void);
- extern void WebPInitUpsamplersMIPSdspR2(void);
- extern void WebPInitUpsamplersMSA(void);
-+extern void WebPInitUpsamplersVSX(void);
-
- WEBP_DSP_INIT_FUNC(WebPInitUpsamplers) {
- #ifdef FANCY_UPSAMPLING
-@@ -314,6 +321,11 @@ WEBP_DSP_INIT_FUNC(WebPInitUpsamplers) {
- if (VP8GetCPUInfo(kMSA)) {
- WebPInitUpsamplersMSA();
- }
-+#endif
-+#if defined(WEBP_HAVE_VSX)
-+ if (VP8GetCPUInfo(kVSX)) {
-+ WebPInitUpsamplersVSX();
-+ }
- #endif
- }
-
-diff --git a/media/libwebp/src/dsp/upsampling_vsx.c b/media/libwebp/src/dsp/upsampling_vsx.c
-new file mode 100644
-index 000000000000..a7191972fc6e
---- /dev/null
-+++ b/media/libwebp/src/dsp/upsampling_vsx.c
-@@ -0,0 +1,151 @@
-+// Copyright 2011 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// VSX (PowerPC) version of YUV to RGB upsampling functions.
-+
-+#include "src/dsp/dsp.h"
-+
-+#if defined(WEBP_USE_VSX)
-+
-+#include <altivec.h>
-+#include <assert.h>
-+#include <string.h>
-+
-+#include "src/dsp/cpu.h"
-+#include "src/dsp/yuv.h"
-+#include "src/webp/decode.h"
-+#include "src/webp/types.h"
-+
-+typedef __vector unsigned char u8x16;
-+typedef __vector unsigned short u16x8;
-+
-+// Upsample 16 chroma pairs from rows r1/r2 (17 readable bytes each) into 32
-+// "top" bytes at out[0..31] and 32 "bottom" bytes at out[64..95], matching the
-+// fancy-upsampler diagonal weights (a + 3b + 3c + d) / 8 etc.
-+#define GET_M(ij, in) \
-+ vec_sub(vec_avg(k, (in)), \
-+ vec_and(vec_or(vec_and((ij), st), vec_xor(k, (in))), one))
-+
-+static void Upsample32Pixels(const uint8_t* WEBP_RESTRICT r1,
-+ const uint8_t* WEBP_RESTRICT r2,
-+ uint8_t* WEBP_RESTRICT out) {
-+ const u8x16 one = vec_splats((unsigned char)1);
-+ const u8x16 a = vec_xl(0, (const unsigned char*)r1);
-+ const u8x16 b = vec_xl(1, (const unsigned char*)r1);
-+ const u8x16 c = vec_xl(0, (const unsigned char*)r2);
-+ const u8x16 d = vec_xl(1, (const unsigned char*)r2);
-+ const u8x16 s = vec_avg(a, d);
-+ const u8x16 t = vec_avg(b, c);
-+ const u8x16 st = vec_xor(s, t);
-+ const u8x16 t3 =
-+ vec_and(vec_or(vec_or(vec_xor(a, d), vec_xor(b, c)), st), one);
-+ const u8x16 k = vec_sub(vec_avg(s, t), t3);
-+ const u8x16 diag1 = GET_M(vec_xor(b, c), t);
-+ const u8x16 diag2 = GET_M(vec_xor(a, d), s);
-+ const u8x16 ta = vec_avg(a, diag1), tb = vec_avg(b, diag2);
-+ const u8x16 tc = vec_avg(c, diag2), td = vec_avg(d, diag1);
-+ vec_xst(vec_mergeh(ta, tb), 0, out);
-+ vec_xst(vec_mergel(ta, tb), 0, out + 16);
-+ vec_xst(vec_mergeh(tc, td), 0, out + 64);
-+ vec_xst(vec_mergel(tc, td), 0, out + 80);
-+}
-+
-+#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, FUNC32) \
-+static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
-+ const uint8_t* WEBP_RESTRICT bottom_y, \
-+ const uint8_t* WEBP_RESTRICT top_u, \
-+ const uint8_t* WEBP_RESTRICT top_v, \
-+ const uint8_t* WEBP_RESTRICT cur_u, \
-+ const uint8_t* WEBP_RESTRICT cur_v, \
-+ uint8_t* WEBP_RESTRICT top_dst, \
-+ uint8_t* WEBP_RESTRICT bottom_dst, int len) { \
-+ int uv_pos, pos; \
-+ uint8_t uv_buf[14 * 32 + 15] = {0}; \
-+ uint8_t* const r_u = (uint8_t*)(((uintptr_t)(uv_buf + 15)) & ~(uintptr_t)15);\
-+ uint8_t* const r_v = r_u + 32; \
-+ assert(top_y != NULL); \
-+ { \
-+ const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1; \
-+ const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1; \
-+ FUNC(top_y[0], (top_u[0] + u_diag) >> 1, (top_v[0] + v_diag) >> 1, \
-+ top_dst); \
-+ if (bottom_y != NULL) { \
-+ FUNC(bottom_y[0], (cur_u[0] + u_diag) >> 1, (cur_v[0] + v_diag) >> 1, \
-+ bottom_dst); \
-+ } \
-+ } \
-+ for (pos = 1, uv_pos = 0; pos + 32 + 1 <= len; pos += 32, uv_pos += 16) { \
-+ Upsample32Pixels(top_u + uv_pos, cur_u + uv_pos, r_u); \
-+ Upsample32Pixels(top_v + uv_pos, cur_v + uv_pos, r_v); \
-+ FUNC32(top_y + pos, r_u, r_v, top_dst + pos * 4); \
-+ if (bottom_y != NULL) { \
-+ FUNC32(bottom_y + pos, r_u + 64, r_v + 64, bottom_dst + pos * 4); \
-+ } \
-+ } \
-+ if (len > 1) { \
-+ const int left_over = ((len + 1) >> 1) - (pos >> 1); \
-+ uint8_t* const tmp_top_dst = r_u + 4 * 32; \
-+ uint8_t* const tmp_bottom_dst = tmp_top_dst + 4 * 32; \
-+ uint8_t* const tmp_top = tmp_bottom_dst + 4 * 32; \
-+ uint8_t* const tmp_bottom = (bottom_y == NULL) ? NULL : tmp_top + 32; \
-+ uint8_t r1[17], r2[17]; \
-+ assert(left_over > 0); \
-+ memcpy(r1, top_u + uv_pos, left_over); \
-+ memcpy(r2, cur_u + uv_pos, left_over); \
-+ memset(r1 + left_over, r1[left_over - 1], 17 - left_over); \
-+ memset(r2 + left_over, r2[left_over - 1], 17 - left_over); \
-+ Upsample32Pixels(r1, r2, r_u); \
-+ memcpy(r1, top_v + uv_pos, left_over); \
-+ memcpy(r2, cur_v + uv_pos, left_over); \
-+ memset(r1 + left_over, r1[left_over - 1], 17 - left_over); \
-+ memset(r2 + left_over, r2[left_over - 1], 17 - left_over); \
-+ Upsample32Pixels(r1, r2, r_v); \
-+ memcpy(tmp_top, top_y + pos, len - pos); \
-+ if (bottom_y != NULL) memcpy(tmp_bottom, bottom_y + pos, len - pos); \
-+ FUNC32(tmp_top, r_u, r_v, tmp_top_dst); \
-+ if (bottom_y != NULL) FUNC32(tmp_bottom, r_u + 64, r_v + 64, \
-+ tmp_bottom_dst); \
-+ memcpy(top_dst + pos * 4, tmp_top_dst, (len - pos) * 4); \
-+ if (bottom_y != NULL) { \
-+ memcpy(bottom_dst + pos * 4, tmp_bottom_dst, (len - pos) * 4); \
-+ } \
-+ } \
-+}
-+
-+UPSAMPLE_FUNC(UpsampleRgbaLinePair_VSX, VP8YuvToRgba, VP8YuvToRgba32_VSX)
-+UPSAMPLE_FUNC(UpsampleBgraLinePair_VSX, VP8YuvToBgra, VP8YuvToBgra32_VSX)
-+UPSAMPLE_FUNC(UpsampleArgbLinePair_VSX, VP8YuvToArgb, VP8YuvToArgb32_VSX)
-+
-+extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
-+
-+extern void WebPInitUpsamplersVSX(void);
-+
-+WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersVSX(void) {
-+ WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair_VSX;
-+ WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair_VSX;
-+ WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair_VSX;
-+ WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair_VSX;
-+#if !defined(WEBP_REDUCE_CSP)
-+ WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair_VSX;
-+ WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair_VSX;
-+#endif
-+}
-+
-+extern void WebPInitYUV444ConvertersVSX(void);
-+
-+// YUV444 point converters stay on the C path for now.
-+WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersVSX(void) {}
-+
-+#else // !WEBP_USE_VSX
-+
-+WEBP_DSP_INIT_STUB(WebPInitYUV444ConvertersVSX)
-+
-+WEBP_DSP_INIT_STUB(WebPInitUpsamplersVSX)
-+
-+#endif // WEBP_USE_VSX
-diff --git a/media/libwebp/src/dsp/yuv.c b/media/libwebp/src/dsp/yuv.c
-index 62f1ecc1567d..9a95c5de1e23 100644
---- a/media/libwebp/src/dsp/yuv.c
-+++ b/media/libwebp/src/dsp/yuv.c
-@@ -81,6 +81,7 @@ extern void WebPInitSamplersSSE2(void);
- extern void WebPInitSamplersSSE41(void);
- extern void WebPInitSamplersMIPS32(void);
- extern void WebPInitSamplersMIPSdspR2(void);
-+extern void WebPInitSamplersVSX(void);
-
- WEBP_DSP_INIT_FUNC(WebPInitSamplers) {
- WebPSamplers[MODE_RGB] = YuvToRgbRow;
-@@ -117,6 +118,11 @@ WEBP_DSP_INIT_FUNC(WebPInitSamplers) {
- WebPInitSamplersMIPSdspR2();
- }
- #endif // WEBP_USE_MIPS_DSP_R2
-+#if defined(WEBP_HAVE_VSX)
-+ if (VP8GetCPUInfo(kVSX)) {
-+ WebPInitSamplersVSX();
-+ }
-+#endif
- }
- }
-
-diff --git a/media/libwebp/src/dsp/yuv.h b/media/libwebp/src/dsp/yuv.h
-index 6f218cf7e07f..979891d3232d 100644
---- a/media/libwebp/src/dsp/yuv.h
-+++ b/media/libwebp/src/dsp/yuv.h
-@@ -182,6 +182,27 @@ void VP8YuvToRgb56532_SSE2(const uint8_t* WEBP_RESTRICT y,
-
- #endif // WEBP_USE_SSE2
-
-+//-----------------------------------------------------------------------------
-+// VSX extra functions (mostly for upsampling_vsx.c)
-+
-+#if defined(WEBP_USE_VSX)
-+
-+// Process 32 pixels and store the 32b-per-pixel result in *dst.
-+void VP8YuvToRgba32_VSX(const uint8_t* WEBP_RESTRICT y,
-+ const uint8_t* WEBP_RESTRICT u,
-+ const uint8_t* WEBP_RESTRICT v,
-+ uint8_t* WEBP_RESTRICT dst);
-+void VP8YuvToBgra32_VSX(const uint8_t* WEBP_RESTRICT y,
-+ const uint8_t* WEBP_RESTRICT u,
-+ const uint8_t* WEBP_RESTRICT v,
-+ uint8_t* WEBP_RESTRICT dst);
-+void VP8YuvToArgb32_VSX(const uint8_t* WEBP_RESTRICT y,
-+ const uint8_t* WEBP_RESTRICT u,
-+ const uint8_t* WEBP_RESTRICT v,
-+ uint8_t* WEBP_RESTRICT dst);
-+
-+#endif // WEBP_USE_VSX
-+
- //-----------------------------------------------------------------------------
- // SSE41 extra functions (mostly for upsampling_sse41.c)
-
-diff --git a/media/libwebp/src/dsp/yuv_vsx.c b/media/libwebp/src/dsp/yuv_vsx.c
-new file mode 100644
-index 000000000000..1fdc5c80ba16
---- /dev/null
-+++ b/media/libwebp/src/dsp/yuv_vsx.c
-@@ -0,0 +1,206 @@
-+// Copyright 2014 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// VSX (PowerPC) version of YUV->RGB conversion functions.
-+
-+#include "src/dsp/dsp.h"
-+
-+#if defined(WEBP_USE_VSX)
-+
-+#include <altivec.h>
-+#include <string.h>
-+
-+#include "src/dsp/yuv.h"
-+
-+typedef __vector unsigned char u8x16;
-+typedef __vector unsigned short u16x8;
-+typedef __vector signed short i16x8;
-+typedef __vector unsigned int u32x4;
-+
-+// POWER8 has no "multiply-high unsigned halfword", so emulate _mm_mulhi_epu16
-+// via even/odd 16x16->32 products, >>16, then interleave back.
-+static WEBP_INLINE u16x8 MulHi16(u16x8 a, u16x8 b) {
-+ const u32x4 sh = vec_splats((unsigned int)16);
-+ const u32x4 e = vec_sr(vec_mule(a, b), sh);
-+ const u32x4 o = vec_sr(vec_mulo(a, b), sh);
-+ return vec_pack(vec_mergeh(e, o), vec_mergel(e, o));
-+}
-+
-+// 14b fixed-point ITU-R BT.601 YUV->RGB, matching the SSE2/scalar path.
-+// Inputs are samples pre-shifted into the high byte (<< 8).
-+static WEBP_INLINE void ConvertYUV444ToRGB(u16x8 Y0, u16x8 U0, u16x8 V0,
-+ i16x8* const R, i16x8* const G,
-+ u16x8* const B) {
-+ const u16x8 k19077 = vec_splats((unsigned short)19077);
-+ const u16x8 k26149 = vec_splats((unsigned short)26149);
-+ const u16x8 k14234 = vec_splats((unsigned short)14234);
-+ const u16x8 k33050 = vec_splats((unsigned short)33050);
-+ const u16x8 k17685 = vec_splats((unsigned short)17685);
-+ const u16x8 k6419 = vec_splats((unsigned short)6419);
-+ const u16x8 k13320 = vec_splats((unsigned short)13320);
-+ const u16x8 k8708 = vec_splats((unsigned short)8708);
-+ const u16x8 six = vec_splats((unsigned short)6);
-+
-+ const u16x8 Y1 = MulHi16(Y0, k19077);
-+ const u16x8 R2 = vec_add(vec_sub(Y1, k14234), MulHi16(V0, k26149));
-+ const u16x8 G4 = vec_sub(vec_add(Y1, k8708),
-+ vec_add(MulHi16(U0, k6419), MulHi16(V0, k13320)));
-+ // 33050 needs unsigned saturating arithmetic; B can exceed 32767.
-+ const u16x8 B2 = vec_subs(vec_adds(MulHi16(U0, k33050), Y1), k17685);
-+
-+ *R = vec_sra((i16x8)R2, six);
-+ *G = vec_sra((i16x8)G4, six);
-+ *B = vec_sr(B2, six);
-+}
-+
-+// Load 8 bytes into the high byte of 8 u16 lanes (i.e. sample << 8).
-+// Use an 8-byte copy (not a 16-byte vector load) to avoid reading past the
-+// end of the source row, matching the SSE2 _mm_loadl_epi64 behavior.
-+static WEBP_INLINE u16x8 LoadHi16(const uint8_t* WEBP_RESTRICT src) {
-+ const u8x16 zero = vec_splats((unsigned char)0);
-+ unsigned char tmp[16] = {0};
-+ memcpy(tmp, src, 8);
-+ return (u16x8)vec_mergeh(zero, vec_xl(0, tmp));
-+}
-+
-+// Load 4 U/V bytes, shift into the high byte, and replicate each sample.
-+static WEBP_INLINE u16x8 LoadUVHi8(const uint8_t* WEBP_RESTRICT src) {
-+ const u8x16 zero = vec_splats((unsigned char)0);
-+ unsigned char tmp[16] = {0};
-+ memcpy(tmp, src, 4);
-+ const u16x8 t = (u16x8)vec_mergeh(zero, vec_xl(0, tmp));
-+ return vec_mergeh(t, t);
-+}
-+
-+static WEBP_INLINE void YUV420ToRGB(const uint8_t* WEBP_RESTRICT y,
-+ const uint8_t* WEBP_RESTRICT u,
-+ const uint8_t* WEBP_RESTRICT v,
-+ i16x8* const R, i16x8* const G,
-+ u16x8* const B) {
-+ ConvertYUV444ToRGB(LoadHi16(y), LoadUVHi8(u), LoadUVHi8(v), R, G, B);
-+}
-+
-+// Pack four 8-lane channels into 32 interleaved bytes (c0 c1 c2 c3 per pixel).
-+static WEBP_INLINE void PackAndStore4(i16x8 c0, i16x8 c1, i16x8 c2, i16x8 c3,
-+ uint8_t* WEBP_RESTRICT dst) {
-+ const u8x16 c02 = vec_packsu(c0, c2);
-+ const u8x16 c13 = vec_packsu(c1, c3);
-+ const u8x16 lo8 = vec_mergeh(c02, c13);
-+ const u8x16 hi8 = vec_mergel(c02, c13);
-+ vec_xst((u8x16)vec_mergeh((u16x8)lo8, (u16x8)hi8), 0, dst);
-+ vec_xst((u8x16)vec_mergel((u16x8)lo8, (u16x8)hi8), 0, dst + 16);
-+}
-+
-+static const i16x8 kAlpha = {255, 255, 255, 255, 255, 255, 255, 255};
-+
-+static void YuvToRgbaRow_VSX(const uint8_t* WEBP_RESTRICT y,
-+ const uint8_t* WEBP_RESTRICT u,
-+ const uint8_t* WEBP_RESTRICT v,
-+ uint8_t* WEBP_RESTRICT dst, int len) {
-+ int n;
-+ for (n = 0; n + 8 <= len; n += 8, dst += 32) {
-+ i16x8 R, G; u16x8 B;
-+ YUV420ToRGB(y, u, v, &R, &G, &B);
-+ PackAndStore4(R, G, (i16x8)B, kAlpha, dst);
-+ y += 8; u += 4; v += 4;
-+ }
-+ for (; n < len; ++n) {
-+ VP8YuvToRgba(y[0], u[0], v[0], dst);
-+ dst += 4; y += 1; u += (n & 1); v += (n & 1);
-+ }
-+}
-+
-+static void YuvToBgraRow_VSX(const uint8_t* WEBP_RESTRICT y,
-+ const uint8_t* WEBP_RESTRICT u,
-+ const uint8_t* WEBP_RESTRICT v,
-+ uint8_t* WEBP_RESTRICT dst, int len) {
-+ int n;
-+ for (n = 0; n + 8 <= len; n += 8, dst += 32) {
-+ i16x8 R, G; u16x8 B;
-+ YUV420ToRGB(y, u, v, &R, &G, &B);
-+ PackAndStore4((i16x8)B, G, R, kAlpha, dst);
-+ y += 8; u += 4; v += 4;
-+ }
-+ for (; n < len; ++n) {
-+ VP8YuvToBgra(y[0], u[0], v[0], dst);
-+ dst += 4; y += 1; u += (n & 1); v += (n & 1);
-+ }
-+}
-+
-+static void YuvToArgbRow_VSX(const uint8_t* WEBP_RESTRICT y,
-+ const uint8_t* WEBP_RESTRICT u,
-+ const uint8_t* WEBP_RESTRICT v,
-+ uint8_t* WEBP_RESTRICT dst, int len) {
-+ int n;
-+ for (n = 0; n + 8 <= len; n += 8, dst += 32) {
-+ i16x8 R, G; u16x8 B;
-+ YUV420ToRGB(y, u, v, &R, &G, &B);
-+ PackAndStore4(kAlpha, R, G, (i16x8)B, dst);
-+ y += 8; u += 4; v += 4;
-+ }
-+ for (; n < len; ++n) {
-+ VP8YuvToArgb(y[0], u[0], v[0], dst);
-+ dst += 4; y += 1; u += (n & 1); v += (n & 1);
-+ }
-+}
-+
-+// Convert 32 YUV444 pixels and store the 32b-per-pixel result. Used by the
-+// fancy upsampler in upsampling_vsx.c.
-+void VP8YuvToRgba32_VSX(const uint8_t* WEBP_RESTRICT y,
-+ const uint8_t* WEBP_RESTRICT u,
-+ const uint8_t* WEBP_RESTRICT v,
-+ uint8_t* WEBP_RESTRICT dst) {
-+ int n;
-+ for (n = 0; n < 32; n += 8, dst += 32) {
-+ i16x8 R, G; u16x8 B;
-+ ConvertYUV444ToRGB(LoadHi16(y + n), LoadHi16(u + n), LoadHi16(v + n),
-+ &R, &G, &B);
-+ PackAndStore4(R, G, (i16x8)B, kAlpha, dst);
-+ }
-+}
-+
-+void VP8YuvToBgra32_VSX(const uint8_t* WEBP_RESTRICT y,
-+ const uint8_t* WEBP_RESTRICT u,
-+ const uint8_t* WEBP_RESTRICT v,
-+ uint8_t* WEBP_RESTRICT dst) {
-+ int n;
-+ for (n = 0; n < 32; n += 8, dst += 32) {
-+ i16x8 R, G; u16x8 B;
-+ ConvertYUV444ToRGB(LoadHi16(y + n), LoadHi16(u + n), LoadHi16(v + n),
-+ &R, &G, &B);
-+ PackAndStore4((i16x8)B, G, R, kAlpha, dst);
-+ }
-+}
-+
-+void VP8YuvToArgb32_VSX(const uint8_t* WEBP_RESTRICT y,
-+ const uint8_t* WEBP_RESTRICT u,
-+ const uint8_t* WEBP_RESTRICT v,
-+ uint8_t* WEBP_RESTRICT dst) {
-+ int n;
-+ for (n = 0; n < 32; n += 8, dst += 32) {
-+ i16x8 R, G; u16x8 B;
-+ ConvertYUV444ToRGB(LoadHi16(y + n), LoadHi16(u + n), LoadHi16(v + n),
-+ &R, &G, &B);
-+ PackAndStore4(kAlpha, R, G, (i16x8)B, dst);
-+ }
-+}
-+
-+extern void WebPInitSamplersVSX(void);
-+
-+WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersVSX(void) {
-+ WebPSamplers[MODE_RGBA] = YuvToRgbaRow_VSX;
-+ WebPSamplers[MODE_BGRA] = YuvToBgraRow_VSX;
-+ WebPSamplers[MODE_ARGB] = YuvToArgbRow_VSX;
-+}
-+
-+#else // !WEBP_USE_VSX
-+
-+WEBP_DSP_INIT_STUB(WebPInitSamplersVSX)
-+
-+#endif // WEBP_USE_VSX
-diff --git a/media/libwebp/src/moz/cpu.cpp b/media/libwebp/src/moz/cpu.cpp
-index c6633170c923..82986d2f631e 100644
---- a/media/libwebp/src/moz/cpu.cpp
-+++ b/media/libwebp/src/moz/cpu.cpp
-@@ -35,6 +35,10 @@ static int MozCPUInfo(CPUFeature feature)
- case kMIPSdspR2:
- case kMSA:
- return 1;
-+#endif
-+#if defined(WEBP_USE_VSX)
-+ case kVSX:
-+ return 1;
- #endif
- default:
- return 0;
---
-2.52.0
-
diff --git a/0003-Add-PPC64LE-JIT-backend.patch b/0003-Add-PPC64LE-JIT-backend.patch
deleted file mode 100644
index ee08b33..0000000
--- a/0003-Add-PPC64LE-JIT-backend.patch
+++ /dev/null
@@ -1,38205 +0,0 @@
-From c79926e41764c6aa6ae596812b23bc35b470028c Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Trung=20L=C3=AA?= <8@tle.id.au>
-Date: Fri, 12 Jun 2026 16:02:28 +1000
-Subject: [PATCH 3/3] Add PPC64LE JIT backend
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Based on the work done by Cameron Kaiser and Justin Hibbits
-https://github.com/chmeeedalf/gecko-dev
-
-Co-authored-by: Cameron Kaiser <classilla@floodgap.com>
-Co-authored-by: Justin Hibbits <chmeeedalf@gmail.com>
-Assisted-by: Lance Albertson <lance@osuosl.org>
-Assisted-by: Thushan Fernando <thushan@thushanfernando.com>
-Assisted-by: Timothy Pearson <tpearson@solidsilicon.com>
-Assisted-by: Dan Horák <dan@danny.cz>
-Assisted-by: Hiếu Lê <modology@gmail.com>
-Assisted-by: Claude Fable 5 <noreply@anthropic.com>
----
- config/check_macroassembler_style.py | 2 +
- js/moz.configure | 34 +-
- js/src/builtin/TestingFunctions.cpp | 18 +
- js/src/irregexp/RegExpAPI.cpp | 5 +-
- .../irregexp/RegExpNativeMacroAssembler.cpp | 28 +
- .../tests/baseline/ppc64-branch8-16-narrow.js | 103 +
- js/src/jit-test/tests/gc/gcparam.js | 3 +-
- .../tests/ion/mod-constant-pow2-minus-one.js | 78 +
- .../tests/ion/mod-pow2-negative-dividend.js | 71 +
- .../tests/math-min-max-corner-cases.js | 50 +
- js/src/jit-test/tests/wasm/atomicity.js | 8 +-
- .../jit-test/tests/wasm/excessive-inlining.js | 19 +-
- .../jit-test/tests/wasm/memory-oob-message.js | 10 +-
- .../tests/wasm/ppc64-argon2-tiering.js | 124 +
- .../tests/wasm/ppc64-compare-select-bench.js | 70 +
- .../jit-test/tests/wasm/ppc64-extmul-alias.js | 107 +
- .../tests/wasm/ppc64-simd-vr-clobber.js | 179 +
- js/src/jit-test/tests/wasm/profiling.js | 7 +
- .../wasm/regress-ppc64-extract-lane-ctz.js | 49 +
- .../wasm/regress-ppc64-select-condition.js | 30 +
- .../wasm/regress-ppc64-trap-exit-simd-save.js | 64 +
- .../bug-ppc64-simd-reduce-and-branch.js | 7 +
- .../bug-ppc64-simd-reduce-and-branch.wasm | Bin 0 -> 1148 bytes
- js/src/jit-test/tests/wasm/simd/bug1946618.js | 7 +-
- .../jit-test/tests/wasm/simd/ion-analysis.js | 7 +-
- js/src/jit/Assembler.h | 2 +
- js/src/jit/BaselineIC.cpp | 2 +
- js/src/jit/CacheIRCompiler.cpp | 16 +
- js/src/jit/CodeGenerator.cpp | 6 +
- js/src/jit/CodeGenerator.h | 2 +
- js/src/jit/EffectiveAddressAnalysis.cpp | 2 +-
- js/src/jit/ExecutableAllocator.cpp | 10 +-
- js/src/jit/FlushICache.cpp | 3 +-
- js/src/jit/FlushICache.h | 11 +-
- js/src/jit/GenerateABIFunctionType.py | 100 +
- js/src/jit/JitContext.cpp | 4 +
- js/src/jit/JitFrames.cpp | 10 +
- js/src/jit/JitFrames.h | 12 +-
- js/src/jit/LIR.cpp | 4 +-
- js/src/jit/LIR.h | 10 +-
- js/src/jit/LIROps.yaml | 82 +-
- js/src/jit/Label.h | 2 +-
- js/src/jit/Lowering.cpp | 2 +-
- js/src/jit/Lowering.h | 2 +
- js/src/jit/MacroAssembler-inl.h | 2 +
- js/src/jit/MacroAssembler.cpp | 25 +-
- js/src/jit/MacroAssembler.h | 647 +-
- js/src/jit/MoveEmitter.h | 2 +
- js/src/jit/MoveResolver.cpp | 16 +
- js/src/jit/RegisterAllocator.h | 7 +-
- js/src/jit/Registers.h | 2 +
- js/src/jit/Safepoints.cpp | 11 +
- js/src/jit/SharedICHelpers-inl.h | 2 +
- js/src/jit/SharedICHelpers.h | 2 +
- js/src/jit/SharedICRegisters.h | 2 +
- js/src/jit/Simulator.h | 2 +
- js/src/jit/moz.build | 12 +
- js/src/jit/ppc64/Architecture-ppc64.cpp | 221 +
- js/src/jit/ppc64/Architecture-ppc64.h | 581 ++
- js/src/jit/ppc64/Assembler-ppc64.cpp | 3028 +++++++
- js/src/jit/ppc64/Assembler-ppc64.h | 2114 +++++
- js/src/jit/ppc64/CodeGenerator-ppc64.cpp | 3647 ++++++++
- js/src/jit/ppc64/CodeGenerator-ppc64.h | 101 +
- js/src/jit/ppc64/LIR-ppc64.h | 135 +
- js/src/jit/ppc64/Lowering-ppc64.cpp | 1324 +++
- js/src/jit/ppc64/Lowering-ppc64.h | 105 +
- js/src/jit/ppc64/MacroAssembler-ppc64-inl.h | 6142 ++++++++++++++
- js/src/jit/ppc64/MacroAssembler-ppc64.cpp | 3467 ++++++++
- js/src/jit/ppc64/MacroAssembler-ppc64.h | 2031 +++++
- js/src/jit/ppc64/MoveEmitter-ppc64.cpp | 357 +
- js/src/jit/ppc64/MoveEmitter-ppc64.h | 64 +
- js/src/jit/ppc64/SharedICHelpers-ppc64-inl.h | 83 +
- js/src/jit/ppc64/SharedICHelpers-ppc64.h | 97 +
- js/src/jit/ppc64/SharedICRegisters-ppc64.h | 46 +
- js/src/jit/ppc64/Simulator-ppc64.cpp | 7296 +++++++++++++++++
- js/src/jit/ppc64/Simulator-ppc64.h | 556 ++
- js/src/jit/ppc64/Trampoline-ppc64.cpp | 648 ++
- js/src/jit/shared/Assembler-shared.h | 5 +-
- .../AtomicOperations-feeling-lucky-gcc.h | 3 +-
- js/src/jit/shared/CodeGenerator-shared.cpp | 6 +-
- js/src/jit/shared/Lowering-shared-inl.h | 2 +-
- js/src/js-config.mozbuild | 1 +
- js/src/jsapi-tests/testJitABIcalls.cpp | 3 +
- js/src/jsapi-tests/testWasmReturnCalls.cpp | 10 +-
- js/src/jsapi-tests/testsJit.cpp | 20 +
- js/src/shell/js.cpp | 25 +
- js/src/shell/jsshell.h | 3 +-
- js/src/tests/shell/os.js | 8 +-
- js/src/util/Poison.h | 2 +
- js/src/wasm/WasmAnyRef.h | 7 +-
- js/src/wasm/WasmBCDefs.h | 7 +
- js/src/wasm/WasmBCMemory.cpp | 47 +-
- js/src/wasm/WasmBCRegDefs.h | 12 +-
- js/src/wasm/WasmBaselineCompile.cpp | 148 +-
- js/src/wasm/WasmCodegenConstants.h | 3 +-
- js/src/wasm/WasmCodegenTypes.cpp | 11 +-
- js/src/wasm/WasmCompile.cpp | 6 +-
- js/src/wasm/WasmFrameIter.cpp | 118 +
- js/src/wasm/WasmGC.cpp | 8 +
- js/src/wasm/WasmGenerator.cpp | 18 +-
- js/src/wasm/WasmIonCompile.cpp | 2 +-
- js/src/wasm/WasmMemory.cpp | 4 +-
- js/src/wasm/WasmSignalHandlers.cpp | 20 +-
- js/src/wasm/WasmStacks.cpp | 31 +-
- js/src/wasm/WasmStubs.cpp | 43 +-
- js/src/wasm/WasmSummarizeInsn.cpp | 163 +
- js/src/wasm/WasmValue.cpp | 2 +-
- mfbt/Assertions.h | 5 +
- 108 files changed, 34442 insertions(+), 438 deletions(-)
- create mode 100644 js/src/jit-test/tests/baseline/ppc64-branch8-16-narrow.js
- create mode 100644 js/src/jit-test/tests/ion/mod-constant-pow2-minus-one.js
- create mode 100644 js/src/jit-test/tests/ion/mod-pow2-negative-dividend.js
- create mode 100644 js/src/jit-test/tests/math-min-max-corner-cases.js
- create mode 100644 js/src/jit-test/tests/wasm/ppc64-argon2-tiering.js
- create mode 100644 js/src/jit-test/tests/wasm/ppc64-compare-select-bench.js
- create mode 100644 js/src/jit-test/tests/wasm/ppc64-extmul-alias.js
- create mode 100644 js/src/jit-test/tests/wasm/ppc64-simd-vr-clobber.js
- create mode 100644 js/src/jit-test/tests/wasm/regress-ppc64-extract-lane-ctz.js
- create mode 100644 js/src/jit-test/tests/wasm/regress-ppc64-select-condition.js
- create mode 100644 js/src/jit-test/tests/wasm/regress-ppc64-trap-exit-simd-save.js
- create mode 100644 js/src/jit-test/tests/wasm/regress/bug-ppc64-simd-reduce-and-branch.js
- create mode 100644 js/src/jit-test/tests/wasm/regress/bug-ppc64-simd-reduce-and-branch.wasm
- create mode 100644 js/src/jit/ppc64/Architecture-ppc64.cpp
- create mode 100644 js/src/jit/ppc64/Architecture-ppc64.h
- create mode 100644 js/src/jit/ppc64/Assembler-ppc64.cpp
- create mode 100644 js/src/jit/ppc64/Assembler-ppc64.h
- create mode 100644 js/src/jit/ppc64/CodeGenerator-ppc64.cpp
- create mode 100644 js/src/jit/ppc64/CodeGenerator-ppc64.h
- create mode 100644 js/src/jit/ppc64/LIR-ppc64.h
- create mode 100644 js/src/jit/ppc64/Lowering-ppc64.cpp
- create mode 100644 js/src/jit/ppc64/Lowering-ppc64.h
- create mode 100644 js/src/jit/ppc64/MacroAssembler-ppc64-inl.h
- create mode 100644 js/src/jit/ppc64/MacroAssembler-ppc64.cpp
- create mode 100644 js/src/jit/ppc64/MacroAssembler-ppc64.h
- create mode 100644 js/src/jit/ppc64/MoveEmitter-ppc64.cpp
- create mode 100644 js/src/jit/ppc64/MoveEmitter-ppc64.h
- create mode 100644 js/src/jit/ppc64/SharedICHelpers-ppc64-inl.h
- create mode 100644 js/src/jit/ppc64/SharedICHelpers-ppc64.h
- create mode 100644 js/src/jit/ppc64/SharedICRegisters-ppc64.h
- create mode 100644 js/src/jit/ppc64/Simulator-ppc64.cpp
- create mode 100644 js/src/jit/ppc64/Simulator-ppc64.h
- create mode 100644 js/src/jit/ppc64/Trampoline-ppc64.cpp
-
-diff --git a/config/check_macroassembler_style.py b/config/check_macroassembler_style.py
-index aa1a54104e26..ba73de388099 100644
---- a/config/check_macroassembler_style.py
-+++ b/config/check_macroassembler_style.py
-@@ -33,6 +33,7 @@ all_architecture_names = set([
- "arm64",
- "loong64",
- "riscv64",
-+ "ppc64",
- "wasm32",
- ])
- all_shared_architecture_names = set([
-@@ -41,6 +42,7 @@ all_shared_architecture_names = set([
- "arm64",
- "loong64",
- "riscv64",
-+ "ppc64",
- "wasm32",
- ])
-
-diff --git a/js/moz.configure b/js/moz.configure
-index 26cc85622654..5310dd08506f 100644
---- a/js/moz.configure
-+++ b/js/moz.configure
-@@ -264,6 +264,7 @@ def jit_default(target, enable_portable_baseline_interp):
- "aarch64",
- "mips64",
- "loongarch64",
-+ "ppc64",
- "riscv64",
- ):
- return True
-@@ -285,7 +286,7 @@ def report_deprecated(value):
- # =======================================================
- option(
- "--enable-simulator",
-- choices=("arm", "arm64", "mips64", "loong64", "riscv64"),
-+ choices=("arm", "arm64", "mips64", "loong64", "riscv64", "ppc64"),
- nargs=1,
- help="Enable a JIT code simulator for the specified architecture",
- )
-@@ -302,7 +303,7 @@ def simulator(jit_enabled, simulator_enabled, target):
- if target.cpu != "x86":
- die("The %s simulator only works on x86." % sim_cpu)
-
-- if sim_cpu in ("arm64", "mips64", "loong64", "riscv64"):
-+ if sim_cpu in ("arm64", "mips64", "loong64", "riscv64", "ppc64"):
- if target.cpu != "x86_64" and target.cpu != "aarch64":
- die("The %s simulator only works on x86-64 or arm64." % sim_cpu)
-
-@@ -315,12 +316,14 @@ set_config("JS_SIMULATOR_ARM64", simulator.arm64)
- set_config("JS_SIMULATOR_MIPS64", simulator.mips64)
- set_config("JS_SIMULATOR_LOONG64", simulator.loong64)
- set_config("JS_SIMULATOR_RISCV64", simulator.riscv64)
-+set_config("JS_SIMULATOR_PPC64", simulator.ppc64)
- set_define("JS_SIMULATOR", depends_if(simulator)(lambda x: True))
- set_define("JS_SIMULATOR_ARM", simulator.arm)
- set_define("JS_SIMULATOR_ARM64", simulator.arm64)
- set_define("JS_SIMULATOR_MIPS64", simulator.mips64)
- set_define("JS_SIMULATOR_LOONG64", simulator.loong64)
- set_define("JS_SIMULATOR_RISCV64", simulator.riscv64)
-+set_define("JS_SIMULATOR_PPC64", simulator.ppc64)
-
-
- @depends("--enable-jit", simulator, target)
-@@ -337,6 +340,8 @@ def jit_codegen(jit_enabled, simulator, target):
- return namespace(x64=True)
- elif target.cpu == "loongarch64":
- return namespace(loong64=True)
-+ elif target.cpu == "ppc64":
-+ return namespace(ppc64=True)
- elif target.cpu == "riscv64":
- return namespace(riscv64=True)
-
-@@ -348,6 +353,7 @@ set_config("JS_CODEGEN_ARM", jit_codegen.arm)
- set_config("JS_CODEGEN_ARM64", jit_codegen.arm64)
- set_config("JS_CODEGEN_MIPS64", jit_codegen.mips64)
- set_config("JS_CODEGEN_LOONG64", jit_codegen.loong64)
-+set_config("JS_CODEGEN_PPC64", jit_codegen.ppc64)
- set_config("JS_CODEGEN_RISCV64", jit_codegen.riscv64)
- set_config("JS_CODEGEN_X86", jit_codegen.x86)
- set_config("JS_CODEGEN_X64", jit_codegen.x64)
-@@ -358,6 +364,7 @@ set_define("JS_CODEGEN_ARM", jit_codegen.arm)
- set_define("JS_CODEGEN_ARM64", jit_codegen.arm64)
- set_define("JS_CODEGEN_MIPS64", jit_codegen.mips64)
- set_define("JS_CODEGEN_LOONG64", jit_codegen.loong64)
-+set_define("JS_CODEGEN_PPC64", jit_codegen.ppc64)
- set_define("JS_CODEGEN_RISCV64", jit_codegen.riscv64)
- set_define("JS_CODEGEN_X86", jit_codegen.x86)
- set_define("JS_CODEGEN_X64", jit_codegen.x64)
-@@ -728,7 +735,7 @@ def default_wasm_jspi(
- return
-
- if simulator:
-- return simulator[0] in ("arm64", "arm", "loong64", "mips64", "riscv64")
-+ return simulator[0] in ("arm64", "arm", "loong64", "mips64", "ppc64", "riscv64")
-
- if target.cpu in (
- "x86_64",
-@@ -737,6 +744,7 @@ def default_wasm_jspi(
- "arm",
- "loongarch64",
- "mips64",
-+ "ppc64",
- "riscv64",
- ):
- return True
-@@ -768,10 +776,11 @@ def wasm_jspi(value, jit_enabled, simulator, no_experimental, target):
- "arm",
- "loong64",
- "mips64",
-+ "ppc64",
- "riscv64",
- ):
- die(
-- "--enable-wasm-jspi is only supported for arm64/arm/loong64/mips64/riscv64 simulators"
-+ "--enable-wasm-jspi is only supported for arm64/arm/loong64/mips64/ppc64/riscv64 simulators"
- )
-
- if target.cpu in (
-@@ -781,12 +790,13 @@ def wasm_jspi(value, jit_enabled, simulator, no_experimental, target):
- "arm",
- "loongarch64",
- "mips64",
-+ "ppc64",
- "riscv64",
- ):
- return True
-
- die(
-- "--enable-wasm-jspi only possible when targeting the x86_64/x86/arm64/arm/loongarch64/mips64/riscv64 jits"
-+ "--enable-wasm-jspi only possible when targeting the x86_64/x86/arm64/arm/loongarch64/mips64/ppc64/riscv64 jits"
- )
-
-
-@@ -821,10 +831,10 @@ def default_wasm_simd(jit_enabled, simulator, target):
- if not jit_enabled:
- return
-
-- if simulator and (simulator[0] != "arm64"):
-+ if simulator and simulator[0] not in ("arm64", "ppc64"):
- return
-
-- if target.cpu in ("x86_64", "x86", "aarch64"):
-+ if target.cpu in ("x86_64", "x86", "aarch64", "ppc64"):
- return True
-
-
-@@ -849,13 +859,15 @@ def wasm_simd(value, jit_enabled, simulator, target, no_experimental):
- if not jit_enabled:
- die("--enable-wasm-simd requires --enable-jit")
-
-- if simulator and (simulator[0] != "arm64"):
-- die("--enable-wasm-simd is not supported for simulators, except arm64")
-+ if simulator and simulator[0] not in ("arm64", "ppc64"):
-+ die(
-+ "--enable-wasm-simd is not supported for simulators, except arm64 and ppc64"
-+ )
-
-- if target.cpu in ("x86_64", "x86", "aarch64"):
-+ if target.cpu in ("x86_64", "x86", "aarch64", "ppc64"):
- return True
-
-- die("--enable-wasm-simd only possible when targeting the x86_64/x86/arm64 jits")
-+ die("--enable-wasm-simd only possible when targeting the x86_64/x86/arm64/ppc64 jits")
-
-
- set_config("ENABLE_WASM_SIMD", wasm_simd)
-diff --git a/js/src/builtin/TestingFunctions.cpp b/js/src/builtin/TestingFunctions.cpp
-index be8b3d0e16b6..2291d58dc0a1 100644
---- a/js/src/builtin/TestingFunctions.cpp
-+++ b/js/src/builtin/TestingFunctions.cpp
-@@ -447,6 +447,15 @@ static bool GetBuildConfiguration(JSContext* cx, unsigned argc, Value* vp) {
- return false;
- }
-
-+#ifdef JS_CODEGEN_PPC64
-+ value = BooleanValue(true);
-+#else
-+ value = BooleanValue(false);
-+#endif
-+ if (!JS_SetProperty(cx, info, "ppc64", value)) {
-+ return false;
-+ }
-+
- #ifdef JS_CODEGEN_LOONG64
- value = BooleanValue(true);
- #else
-@@ -483,6 +492,15 @@ static bool GetBuildConfiguration(JSContext* cx, unsigned argc, Value* vp) {
- return false;
- }
-
-+#ifdef JS_SIMULATOR_PPC64
-+ value = BooleanValue(true);
-+#else
-+ value = BooleanValue(false);
-+#endif
-+ if (!JS_SetProperty(cx, info, "ppc64-simulator", value)) {
-+ return false;
-+ }
-+
- #ifdef MOZ_ASAN
- value = BooleanValue(true);
- #else
-diff --git a/js/src/irregexp/RegExpAPI.cpp b/js/src/irregexp/RegExpAPI.cpp
-index 310cd85c6a20..377509574f28 100644
---- a/js/src/irregexp/RegExpAPI.cpp
-+++ b/js/src/irregexp/RegExpAPI.cpp
-@@ -495,7 +495,10 @@ class RegExpDepthCheck final : public v8::internal::regexp::Visitor {
-
- // This size is picked to be comfortably larger than any
- // RegExp*::ToNode stack frame.
--#if !defined(DEBUG) && !defined(MOZ_CODE_COVERAGE)
-+#if defined(__powerpc64__)
-+ // PPC64 ELFv2 has larger minimum stack frames.
-+ static const size_t FRAME_PADDING = 256 * 4;
-+#elif !defined(DEBUG) && !defined(MOZ_CODE_COVERAGE)
- static const size_t FRAME_PADDING = 256;
- #else
- // Use a slightly larger padding for debug and code coverage builds.
-diff --git a/js/src/irregexp/RegExpNativeMacroAssembler.cpp b/js/src/irregexp/RegExpNativeMacroAssembler.cpp
-index ae351226797b..a396aeb3c731 100644
---- a/js/src/irregexp/RegExpNativeMacroAssembler.cpp
-+++ b/js/src/irregexp/RegExpNativeMacroAssembler.cpp
-@@ -990,8 +990,21 @@ void SMRegExpMacroAssembler::CheckBacktrackStackLimit() {
- AbsoluteAddress(isolate()->regexp_stack()->limit_address_address()),
- backtrack_stack_pointer_, &no_stack_overflow);
-
-+#ifdef JS_CODEGEN_PPC64
-+ // LR on PowerPC isn't a GPR, so we have to explicitly save it before
-+ // calling or the regexp's return address will be clobbered.
-+ masm_.xs_mflr(temp1_);
-+ masm_.as_stdu(temp1_, masm_.getStackPointer(), -8);
-+#endif
-+
- masm_.call(&stack_overflow_label_);
-
-+#ifdef JS_CODEGEN_PPC64
-+ masm_.as_ld(temp1_, masm_.getStackPointer(), 0);
-+ masm_.xs_mtlr(temp1_);
-+ masm_.as_addi(masm_.getStackPointer(), masm_.getStackPointer(), 8);
-+#endif
-+
- // Exit with an exception if the call failed
- masm_.branchTest32(Assembler::Zero, temp0_, temp0_,
- &exit_with_exception_label_);
-@@ -1080,6 +1093,13 @@ void SMRegExpMacroAssembler::createStackFrame() {
- masm_.initPseudoStackPtr();
- #endif
-
-+#ifdef JS_CODEGEN_PPC64
-+ // PPC64's link register is an SPR, not a GPR, so it cannot be included in
-+ // SavedNonVolatileRegisters. Save it explicitly before the frame pointer
-+ // so that abiret()'s blr can return to the caller after we restore it.
-+ masm_.pushReturnAddress();
-+#endif
-+
- masm_.Push(js::jit::FramePointer);
- masm_.moveStackPtrTo(js::jit::FramePointer);
-
-@@ -1308,6 +1328,9 @@ void SMRegExpMacroAssembler::exitHandler() {
- // Perform a plain Ret(), as abiret() will move SP <- PSP and that is wrong.
- masm_.Ret(vixl::lr);
- #else
-+# ifdef JS_CODEGEN_PPC64
-+ masm_.popReturnAddress();
-+# endif
- masm_.abiret();
- #endif
-
-@@ -1351,6 +1374,11 @@ void SMRegExpMacroAssembler::stackOverflowHandler() {
-
- // Adjust for the return address on the stack.
- size_t frameOffset = sizeof(void*);
-+#ifdef JS_CODEGEN_PPC64
-+ // CheckBacktrackStackLimit pushes LR before calling us, so there's a
-+ // second return address on the stack.
-+ frameOffset += sizeof(void*);
-+#endif
-
- volatileRegs.takeUnchecked(temp0_);
- volatileRegs.takeUnchecked(temp1_);
-diff --git a/js/src/jit-test/tests/baseline/ppc64-branch8-16-narrow.js b/js/src/jit-test/tests/baseline/ppc64-branch8-16-narrow.js
-new file mode 100644
-index 000000000000..fc1074a9ef8b
---- /dev/null
-+++ b/js/src/jit-test/tests/baseline/ppc64-branch8-16-narrow.js
-@@ -0,0 +1,103 @@
-+// Regression test for PPC64 branch8/branch16 width-narrowing under Equal /
-+// NotEqual / unsigned comparisons. Two prior bugs:
-+//
-+// 1. Sign-extending the load while move32(Imm32) zero-extended the imm
-+// caused spurious mismatch when the loaded byte/halfword had its high
-+// bit set (e.g. "ÀÁÂ".startsWith("ÀÁÂ") returned false because byte 0xC0
-+// sign-extended to 0xFF...C0 but the imm 0xC0 zero-extended to 0x00C0,
-+// so cmpw on the low 32 bits saw a negative vs positive value).
-+//
-+// 2. Always zero-extending the load broke `byte == Imm32(-1)` because -1
-+// sign-extends in the imm path: the loaded 0x000000FF didn't match the
-+// materialized 0xFFFFFFFF.
-+//
-+// Fix: cast the immediate to uint8/uint16 (equality + unsigned) or int8/int16
-+// (signed relational) so both sides have matching bit patterns regardless of
-+// how move32(Imm32) chose to materialize it. Match ARM64/LoongArch64/RISC-V.
-+//
-+// We exercise both byte and halfword branch paths via TypedArray loads and
-+// String.prototype.startsWith with a constant search string (the original
-+// failing site lowered to branch16(NotEqual, addr, Imm32(0xC1C0))).
-+
-+// --- Direct byte/halfword equality through TypedArray ---
-+{
-+ let u8 = new Uint8Array([0, 1, 0x7F, 0x80, 0xC0, 0xC1, 0xFE, 0xFF]);
-+ let i8 = new Int8Array(u8.buffer);
-+ let u16 = new Uint16Array([0x0000, 0x7FFF, 0x8000, 0xC1C0, 0xFFFE, 0xFFFF]);
-+ let i16 = new Int16Array(u16.buffer);
-+
-+ // Force baseline + Ion to specialize the comparisons.
-+ function eqU8(arr, idx, val) {
-+ return arr[idx] === val;
-+ }
-+ function eqI8(arr, idx, val) {
-+ return arr[idx] === val;
-+ }
-+ function eqU16(arr, idx, val) {
-+ return arr[idx] === val;
-+ }
-+ function eqI16(arr, idx, val) {
-+ return arr[idx] === val;
-+ }
-+
-+ for (let i = 0; i < 200; i++) {
-+ // High-bit-set bytes: bit pattern equality must hold both signed and
-+ // unsigned interpretations of the immediate.
-+ assertEq(eqU8(u8, 4, 0xC0), true); // unsigned compare 0xC0 == 0xC0
-+ assertEq(eqU8(u8, 4, 0xC1), false);
-+ assertEq(eqU8(u8, 7, 0xFF), true);
-+ assertEq(eqU8(u8, 7, -1 & 0xFF), true); // 0xFF written as -1&0xFF
-+
-+ // Signed Int8 view: 0xFF is -1, 0xC0 is -64.
-+ assertEq(eqI8(i8, 4, -64), true);
-+ assertEq(eqI8(i8, 7, -1), true);
-+ assertEq(eqI8(i8, 4, -63), false);
-+
-+ // Halfword variants: the original startswith failure pattern was
-+ // (Latin-1 char 0xC1C0) — a 16-bit value with bit 15 set.
-+ assertEq(eqU16(u16, 3, 0xC1C0), true);
-+ assertEq(eqU16(u16, 3, 0xC1C1), false);
-+ assertEq(eqU16(u16, 5, 0xFFFF), true);
-+ assertEq(eqU16(u16, 5, -1 & 0xFFFF), true);
-+
-+ assertEq(eqI16(i16, 3, -15936), true); // 0xC1C0 as i16 = -15936
-+ assertEq(eqI16(i16, 5, -1), true);
-+ assertEq(eqI16(i16, 5, -2), false);
-+ }
-+}
-+
-+// --- String.prototype.startsWith with a Latin-1 constant search ---
-+// This was the original failing site — Ion lowers a constant search string
-+// of length 1..32 into a sequence of byte-wise comparisons.
-+{
-+ let s = "ÀÁÂ"; // Latin-1 length 3, bytes 0xC0 0xC1 0xC2 (all high-bit set)
-+ function check() {
-+ return s.startsWith("ÀÁÂ");
-+ }
-+ for (let i = 0; i < 200; i++) {
-+ assertEq(check(), true);
-+ }
-+
-+ // Mismatch on a single high-bit byte must report not-equal.
-+ let s2 = "ÀÁÃ"; // last byte 0xC3 instead of 0xC2
-+ function check2() {
-+ return s2.startsWith("ÀÁÂ");
-+ }
-+ for (let i = 0; i < 200; i++) {
-+ assertEq(check2(), false);
-+ }
-+}
-+
-+// --- Signed relational comparisons still work (we kept the sign-extend path) ---
-+{
-+ let i8 = new Int8Array([0x7F, -1, -128, 1, 0]);
-+ function ltZero(idx) {
-+ return i8[idx] < 0;
-+ }
-+ for (let i = 0; i < 200; i++) {
-+ assertEq(ltZero(0), false); // 0x7F = +127
-+ assertEq(ltZero(1), true); // -1
-+ assertEq(ltZero(2), true); // -128
-+ assertEq(ltZero(3), false); // 1
-+ }
-+}
-diff --git a/js/src/jit-test/tests/gc/gcparam.js b/js/src/jit-test/tests/gc/gcparam.js
-index 51d58662193f..48e5a97c135f 100644
---- a/js/src/jit-test/tests/gc/gcparam.js
-+++ b/js/src/jit-test/tests/gc/gcparam.js
-@@ -30,7 +30,8 @@ testGetParam("chunkBytes");
- testGetParam("helperThreadCount");
-
- testChangeParam("maxBytes");
--testChangeParam("minNurseryBytes", 16 * 1024);
-+var pageSize = gcparam("systemPageSizeKB") * 1024;
-+testChangeParam("minNurseryBytes", pageSize);
- testChangeParam("maxNurseryBytes", 1024 * 1024);
- testChangeParam("incrementalGCEnabled");
- testChangeParam("perZoneGCEnabled");
-diff --git a/js/src/jit-test/tests/ion/mod-constant-pow2-minus-one.js b/js/src/jit-test/tests/ion/mod-constant-pow2-minus-one.js
-new file mode 100644
-index 000000000000..9028f5587c65
---- /dev/null
-+++ b/js/src/jit-test/tests/ion/mod-constant-pow2-minus-one.js
-@@ -0,0 +1,78 @@
-+// Regression test for a PPC64 Ion miscompile of integer modulo by a
-+// constant of the form 2^n - 1 (e.g. 65535).
-+//
-+// lowerModI routes `x % (2^n - 1)` to LModMaskI, whose codegen
-+// (ma_mod_mask) materialized the mask 2^n - 1 with xs_li(). xs_li takes a
-+// signed int16_t, so a mask of 0xFFFF was truncated to -1, corrupting the
-+// digit-summing reduction. The bug only affected masks that do not fit in a
-+// signed 16-bit immediate, i.e. divisors >= 65535 (n >= 16); smaller
-+// 2^n - 1 divisors such as 255 were unaffected.
-+//
-+// The reference uses a non-constant divisor, which lowers to the
-+// hardware-divide modulo path (LModI) and is therefore independent of the
-+// LModMaskI codegen under test.
-+
-+function refmod(x, d) {
-+ // d is not a constant here -> divide-based modulo, not LModMaskI.
-+ return (x % d) | 0;
-+}
-+
-+// One function per constant divisor so the divisor is a literal and the
-+// LModMaskI path is selected.
-+function mod255(x) { return (x % 255) | 0; }
-+function mod32767(x) { return (x % 32767) | 0; }
-+function mod65535(x) { return (x % 65535) | 0; }
-+function mod131071(x) { return (x % 131071) | 0; }
-+function mod1048575(x) { return (x % 1048575) | 0; }
-+
-+const cases = [
-+ [mod255, 255],
-+ [mod32767, 32767],
-+ [mod65535, 65535],
-+ [mod131071, 131071],
-+ [mod1048575, 1048575],
-+];
-+
-+// Inputs spanning small values, values with bits above the mask width
-+// (so the multi-digit reduction is exercised), and negatives.
-+const inputs = [];
-+for (let i = 0; i < 64; i++) {
-+ inputs.push(Math.imul(i, 2654435761) | 0);
-+ inputs.push((i * 65535 + i) | 0);
-+ inputs.push((i * 131071 - 7) | 0);
-+ inputs.push(-Math.imul(i, 40503) | 0);
-+}
-+inputs.push(0, 1, -1, 65534, 65535, 65536, 0x7fffffff, -0x80000000);
-+
-+// Warm up through the tiers, then assert each constant-divisor result
-+// matches the divide-based reference.
-+for (let iter = 0; iter < 2000; iter++) {
-+ for (const [fn, d] of cases) {
-+ for (const x of inputs) {
-+ assertEq(fn(x), refmod(x, d));
-+ }
-+ }
-+}
-+
-+// Register-pressure variant: mirrors the shape that exposed the bug (many
-+// live locals forcing the mask materialization to interact with spills).
-+function pressure(buf, i) {
-+ let v0 = i, v1 = i + 1, v2 = i + 2, v3 = i + 3, v4 = i + 4, v5 = i + 5;
-+ let v6 = i + 6, v7 = i + 7, v8 = i + 8, v9 = i + 9, v10 = i + 10, v11 = i + 11;
-+ let v12 = i + 12, v13 = i + 13, v14 = i + 14, v15 = i + 15;
-+ const r = (buf[i & 63] % 65535) | 0;
-+ // Keep every local live to the return without altering r.
-+ const live = (v0 ^ v1 ^ v2 ^ v3 ^ v4 ^ v5 ^ v6 ^ v7 ^
-+ v8 ^ v9 ^ v10 ^ v11 ^ v12 ^ v13 ^ v14 ^ v15) & 0;
-+ return r + live;
-+}
-+
-+const buf = new Int32Array(64);
-+for (let i = 0; i < buf.length; i++) {
-+ buf[i] = Math.imul(i, 2654435761) | 0;
-+}
-+for (let iter = 0; iter < 5000; iter++) {
-+ for (let i = 0; i < 64; i++) {
-+ assertEq(pressure(buf, i), refmod(buf[i & 63], 65535));
-+ }
-+}
-diff --git a/js/src/jit-test/tests/ion/mod-pow2-negative-dividend.js b/js/src/jit-test/tests/ion/mod-pow2-negative-dividend.js
-new file mode 100644
-index 000000000000..9905cc4a8f36
---- /dev/null
-+++ b/js/src/jit-test/tests/ion/mod-pow2-negative-dividend.js
-@@ -0,0 +1,71 @@
-+// Regression test for a PPC64 Ion miscompile of integer modulo by a constant
-+// power of two (e.g. 65536) with a negative dividend.
-+//
-+// lowerModI routes `x % 2^n` to LModPowTwoI, whose codegen tested the sign of
-+// the dividend with branchPtr (a 64-bit compare). When the int32 dividend was
-+// held zero-extended in its register, the 64-bit test misclassified a negative
-+// value as non-negative and took the unmasked positive path, returning
-+// `x & (2^n - 1)` instead of the correct (negative) `x % 2^n`. Fixed by using a
-+// 32-bit sign test (branch32).
-+//
-+// The reference uses a non-constant divisor, which lowers to the divide-based
-+// modulo path (LModI), independent of LModPowTwoI.
-+
-+function refmod(x, d) {
-+ return (x % d) | 0;
-+}
-+
-+function mod256(x) { return (x % 256) | 0; }
-+function mod1024(x) { return (x % 1024) | 0; }
-+function mod4096(x) { return (x % 4096) | 0; }
-+function mod65536(x) { return (x % 65536) | 0; }
-+function mod1048576(x) { return (x % 1048576) | 0; }
-+function mod1073741824(x) { return (x % 1073741824) | 0; }
-+
-+const cases = [
-+ [mod256, 256],
-+ [mod1024, 1024],
-+ [mod4096, 4096],
-+ [mod65536, 65536],
-+ [mod1048576, 1048576],
-+ [mod1073741824, 1073741824],
-+];
-+
-+// Heavy on negative dividends (the broken path), plus boundary values.
-+const inputs = [];
-+for (let i = 1; i <= 64; i++) {
-+ inputs.push(-Math.imul(i, 2654435761) | 0);
-+ inputs.push(-(i * 168));
-+ inputs.push(-(i * 70001));
-+ inputs.push(Math.imul(i, 40503) | 0);
-+}
-+inputs.push(0, -1, 1, -168, -65535, -65536, -65537, 168,
-+ 0x7fffffff, -0x80000000, -0x7fffffff);
-+
-+for (let iter = 0; iter < 3000; iter++) {
-+ for (const [fn, d] of cases) {
-+ for (const x of inputs) {
-+ assertEq(fn(x), refmod(x, d));
-+ }
-+ }
-+}
-+
-+// Register-pressure variant: a negative dividend produced at runtime
-+// (float->int) with many live locals, mirroring the shape that exposed the bug.
-+function pressure(seed) {
-+ let v0 = seed, v1 = seed + 1, v2 = seed + 2, v3 = seed + 3, v4 = seed + 4;
-+ let v5 = seed + 5, v6 = seed + 6, v7 = seed + 7, v8 = seed + 8, v9 = seed + 9;
-+ let v10 = seed + 10, v11 = seed + 11, v12 = seed + 12, v13 = seed + 13;
-+ let d0 = seed * 0.5, d1 = seed * 1.5, d2 = -seed * 2.5;
-+ const neg = (Math.fround(-(Math.abs(seed) + 0.7)) | 0);
-+ const r = (neg % 65536) | 0;
-+ const live = (v0 ^ v1 ^ v2 ^ v3 ^ v4 ^ v5 ^ v6 ^ v7 ^ v8 ^ v9 ^
-+ v10 ^ v11 ^ v12 ^ v13 ^ (d0 | 0) ^ (d1 | 0) ^ (d2 | 0)) & 0;
-+ return r + live;
-+}
-+for (let iter = 0; iter < 5000; iter++) {
-+ for (let s = 1; s <= 200; s++) {
-+ const expect = ((Math.fround(-(s + 0.7)) | 0) % 65536) | 0;
-+ assertEq(pressure(s), expect);
-+ }
-+}
-diff --git a/js/src/jit-test/tests/math-min-max-corner-cases.js b/js/src/jit-test/tests/math-min-max-corner-cases.js
-new file mode 100644
-index 000000000000..7ac2c59caeff
---- /dev/null
-+++ b/js/src/jit-test/tests/math-min-max-corner-cases.js
-@@ -0,0 +1,50 @@
-+// Math.min / Math.max corner cases. Exercises the POWER9 xsminjdp /
-+// xsmaxjdp J-form fast path on PPC64 (and the fcmpu/branch fallback on
-+// POWER8 forced); other backends already cover this via shared fp tests
-+// but the truth table is small and worth pinning explicitly.
-+//
-+// JS semantics (ECMA-262):
-+// - Math.max(-0, +0) === +0; Math.min(-0, +0) === -0
-+// - Math.max(-0, -0) === -0; Math.min(+0, +0) === +0
-+// - Any NaN operand → NaN
-+// - ±Inf and ordinary numerics by value
-+
-+function objectIsPositiveZero(v) {
-+ return v === 0 && Object.is(v, 0);
-+}
-+function objectIsNegativeZero(v) {
-+ return v === 0 && Object.is(v, -0);
-+}
-+
-+// Direct calls — these get inlined by Ion as MMinMax intrinsics, which
-+// emit the relevant min/max helper.
-+function check() {
-+ // Max corner cases.
-+ assertEq(objectIsPositiveZero(Math.max(-0, +0)), true);
-+ assertEq(objectIsPositiveZero(Math.max(+0, -0)), true);
-+ assertEq(objectIsNegativeZero(Math.max(-0, -0)), true);
-+ assertEq(objectIsPositiveZero(Math.max(+0, +0)), true);
-+ assertEq(Number.isNaN(Math.max(NaN, 5)), true);
-+ assertEq(Number.isNaN(Math.max(5, NaN)), true);
-+ assertEq(Number.isNaN(Math.max(NaN, NaN)), true);
-+ assertEq(Math.max(-Infinity, 5), 5);
-+ assertEq(Math.max(Infinity, 5), Infinity);
-+ assertEq(Math.max(1, 2), 2);
-+ assertEq(Math.max(-1, -2), -1);
-+ assertEq(Math.max(1.5, 2.5), 2.5);
-+
-+ // Min corner cases.
-+ assertEq(objectIsNegativeZero(Math.min(-0, +0)), true);
-+ assertEq(objectIsNegativeZero(Math.min(+0, -0)), true);
-+ assertEq(objectIsNegativeZero(Math.min(-0, -0)), true);
-+ assertEq(objectIsPositiveZero(Math.min(+0, +0)), true);
-+ assertEq(Number.isNaN(Math.min(NaN, 5)), true);
-+ assertEq(Number.isNaN(Math.min(5, NaN)), true);
-+ assertEq(Math.min(-Infinity, 5), -Infinity);
-+ assertEq(Math.min(Infinity, 5), 5);
-+ assertEq(Math.min(1, 2), 1);
-+}
-+
-+// Run cold (Baseline) and hot (Ion).
-+check();
-+for (let i = 0; i < 50000; i++) check();
-diff --git a/js/src/jit-test/tests/wasm/atomicity.js b/js/src/jit-test/tests/wasm/atomicity.js
-index 34327ec95741..ac1516083325 100644
---- a/js/src/jit-test/tests/wasm/atomicity.js
-+++ b/js/src/jit-test/tests/wasm/atomicity.js
-@@ -8,7 +8,11 @@
- const DEBUG = 0;
-
- // The longer we run, the better, really, but we don't want to time out.
--const ITERATIONS = 100000;
-+// Real PPC64 hardware retries lwarx/stwcx. reservation loops under
-+// contention, which makes the default count exceed jit-test's 150 s
-+// budget on POWER8 and (less so) POWER9/POWER10. Quarter the count
-+// there to keep coverage while fitting the default budget.
-+const ITERATIONS = getBuildConfiguration("ppc64") ? 25000 : 100000;
-
- // If you change NUMWORKERS you must also change the tables for INIT, VAL, and
- // RESULT for all the operations, below, by adding or removing bits.
-@@ -39,7 +43,7 @@ if (getCoreCount() < NUMAGENTS) {
-
- if (getBuildConfiguration("arm-simulator") || getBuildConfiguration("arm64-simulator") ||
- getBuildConfiguration("mips64-simulator") || getBuildConfiguration("riscv64-simulator") ||
-- getBuildConfiguration("loong64-simulator"))
-+ getBuildConfiguration("loong64-simulator") || getBuildConfiguration("ppc64-simulator"))
- {
- if (DEBUG > 0)
- print("Atomicity test disabled on simulator");
-diff --git a/js/src/jit-test/tests/wasm/excessive-inlining.js b/js/src/jit-test/tests/wasm/excessive-inlining.js
-index 91ec710e4e46..a7d3b3211515 100644
---- a/js/src/jit-test/tests/wasm/excessive-inlining.js
-+++ b/js/src/jit-test/tests/wasm/excessive-inlining.js
-@@ -74,23 +74,26 @@ assertEq(tier2codeBytesUsed > 2000, true);
-
- // But not an excessive amount. This is the assertion that checks that
- // the inlining-budget cutoff mechanism is working.
--assertEq(tier2codeBytesUsed < 15000, true);
-+// PPC64 generates larger code due to fixed-width 4-byte instructions,
-+// multi-instruction branch stanzas, and longer constant-loading sequences.
-+let tier2limit = getBuildConfiguration("ppc64") ? 25000 : 15000;
-+assertEq(tier2codeBytesUsed < tier2limit, true);
-
- // The thresholds above are based on the following measurements.
- //
- // tier1codeBytesUsed (baseline size)
- //
--// x64 x32 arm64 arm32
-+// x64 x32 arm64 arm32 ppc64
- //
--// 1378 1010 1408 1008 --enable-debug build
--// 1218 866 1248 856 --disable-debug build
-+// 1378 1010 1408 1008 2736 --enable-debug build
-+// 1218 866 1248 856 --disable-debug build
- //
- // tier2codeBytesUsed (optimized size), with inline-size budgeting enabled
- //
--// x64 x32 arm64 arm32
-+// x64 x32 arm64 arm32 ppc64
- //
--// 5186 6994 7136 5472 --enable-debug build
--// 3698 3730 5472 3888 --disable-debug build
-+// 5186 6994 7136 5472 17408 --enable-debug build
-+// 3698 3730 5472 3888 --disable-debug build
- //
- // tier2codeBytesUsed (optimized size), with inline-size budgeting disabled
- //
-@@ -108,7 +111,7 @@ assertEq(tier2codeBytesUsed < 15000, true);
- // (2) the optimized size will be at least 2000 bytes
- //
- // (3) if the inline-budget mechanism is working as intended, the optimized
--// size will be less than 15000 bytes
-+// size will be less than 15000 bytes (25000 on PPC64)
- //
- //
- // Note (for future testing): inline-size budgeting was disabled by changing
-diff --git a/js/src/jit-test/tests/wasm/memory-oob-message.js b/js/src/jit-test/tests/wasm/memory-oob-message.js
-index 75248c6e6a56..c08e49bcc6e4 100644
---- a/js/src/jit-test/tests/wasm/memory-oob-message.js
-+++ b/js/src/jit-test/tests/wasm/memory-oob-message.js
-@@ -8,8 +8,16 @@ const hasOffsetMessage = wasmHugeMemoryEnabled();
-
- function oobPattern(memIdx, byteOffset) {
- if (hasOffsetMessage) {
-+ // The reported address is whatever the kernel returned in
-+ // siginfo.si_addr for the faulting instruction. Most backends emit
-+ // the wasm access directly so si_addr equals byteOffset. PPC64 emits
-+ // a 1-byte probing load at byteOffset + (size - 1) before each
-+ // multi-byte access (to enforce wasm-spec atomicity on POWER ISA),
-+ // so si_addr there can be up to 15 bytes past byteOffset.
-+ const offsets = [];
-+ for (let i = 0; i < 16; ++i) offsets.push(`${byteOffset + i}`);
- return new RegExp(
-- `out of bounds: memory ${memIdx} access at memory address ${byteOffset}`
-+ `out of bounds: memory ${memIdx} access at memory address (?:${offsets.join('|')})`
- );
- }
- return /index out of bounds/;
-diff --git a/js/src/jit-test/tests/wasm/ppc64-argon2-tiering.js b/js/src/jit-test/tests/wasm/ppc64-argon2-tiering.js
-new file mode 100644
-index 000000000000..04dad9240539
---- /dev/null
-+++ b/js/src/jit-test/tests/wasm/ppc64-argon2-tiering.js
-@@ -0,0 +1,124 @@
-+// Test for wasm tiering correctness with argon2-style SIMD computation.
-+// The argon2 fBlaMka function uses i64x2.extmul_low_i32x4_u, i64x2.shl,
-+// i64x2.add, v128.xor, v128.or, i64x2.shr_u, and i8x16.shuffle.
-+// A tiering bug can cause hash and verify to produce different results
-+// when tier-up happens between them.
-+//
-+// This test runs the computation under both baseline and optimizing
-+// compilers and verifies they produce identical results.
-+
-+var mod = new WebAssembly.Module(wasmTextToBinary(`
-+ (module
-+ (memory (export "mem") 10)
-+ ;; Argon2 fBlaMka: a + b + 2 * trunc32(a) * trunc32(b)
-+ ;; then rotations by 32, 24, 16, 63
-+ (func $G_round (param i32)
-+ (local v128 v128 v128 v128 v128 v128 v128 v128 v128)
-+ (local.set 1 (v128.load (i32.add (local.get 0) (i32.const 0))))
-+ (local.set 2 (v128.load (i32.add (local.get 0) (i32.const 16))))
-+ (local.set 3 (v128.load (i32.add (local.get 0) (i32.const 32))))
-+ (local.set 4 (v128.load (i32.add (local.get 0) (i32.const 48))))
-+ (local.set 5 (v128.load (i32.add (local.get 0) (i32.const 64))))
-+ (local.set 6 (v128.load (i32.add (local.get 0) (i32.const 80))))
-+ (local.set 7 (v128.load (i32.add (local.get 0) (i32.const 96))))
-+ (local.set 8 (v128.load (i32.add (local.get 0) (i32.const 112))))
-+
-+ ;; fBlaMka(v0, v2) + rotr32
-+ (local.set 1 (i64x2.add (i64x2.add (local.get 1) (local.get 3))
-+ (i64x2.shl (i64x2.extmul_low_i32x4_u
-+ (i8x16.shuffle 0 1 2 3 8 9 10 11 0 1 2 3 8 9 10 11 (local.get 1) (local.get 1))
-+ (i8x16.shuffle 0 1 2 3 8 9 10 11 0 1 2 3 8 9 10 11 (local.get 3) (local.get 3)))
-+ (i32.const 1))))
-+ (local.set 9 (v128.xor (local.get 7) (local.get 1)))
-+ (local.set 7 (v128.or (i64x2.shl (local.get 9) (i32.const 32)) (i64x2.shr_u (local.get 9) (i32.const 32))))
-+
-+ ;; fBlaMka(v4, v6) + rotr24
-+ (local.set 5 (i64x2.add (i64x2.add (local.get 5) (local.get 7))
-+ (i64x2.shl (i64x2.extmul_low_i32x4_u
-+ (i8x16.shuffle 0 1 2 3 8 9 10 11 0 1 2 3 8 9 10 11 (local.get 5) (local.get 5))
-+ (i8x16.shuffle 0 1 2 3 8 9 10 11 0 1 2 3 8 9 10 11 (local.get 7) (local.get 7)))
-+ (i32.const 1))))
-+ (local.set 9 (v128.xor (local.get 3) (local.get 5)))
-+ (local.set 3 (v128.or (i64x2.shl (local.get 9) (i32.const 40)) (i64x2.shr_u (local.get 9) (i32.const 24))))
-+
-+ ;; fBlaMka(v0, v2) + rotr16
-+ (local.set 1 (i64x2.add (i64x2.add (local.get 1) (local.get 3))
-+ (i64x2.shl (i64x2.extmul_low_i32x4_u
-+ (i8x16.shuffle 0 1 2 3 8 9 10 11 0 1 2 3 8 9 10 11 (local.get 1) (local.get 1))
-+ (i8x16.shuffle 0 1 2 3 8 9 10 11 0 1 2 3 8 9 10 11 (local.get 3) (local.get 3)))
-+ (i32.const 1))))
-+ (local.set 9 (v128.xor (local.get 7) (local.get 1)))
-+ (local.set 7 (v128.or (i64x2.shl (local.get 9) (i32.const 48)) (i64x2.shr_u (local.get 9) (i32.const 16))))
-+
-+ ;; fBlaMka(v4, v6) + rotr63
-+ (local.set 5 (i64x2.add (i64x2.add (local.get 5) (local.get 7))
-+ (i64x2.shl (i64x2.extmul_low_i32x4_u
-+ (i8x16.shuffle 0 1 2 3 8 9 10 11 0 1 2 3 8 9 10 11 (local.get 5) (local.get 5))
-+ (i8x16.shuffle 0 1 2 3 8 9 10 11 0 1 2 3 8 9 10 11 (local.get 7) (local.get 7)))
-+ (i32.const 1))))
-+ (local.set 9 (v128.xor (local.get 3) (local.get 5)))
-+ (local.set 3 (v128.or (i64x2.shl (local.get 9) (i32.const 1)) (i64x2.shr_u (local.get 9) (i32.const 63))))
-+
-+ (v128.store (i32.add (local.get 0) (i32.const 0)) (local.get 1))
-+ (v128.store (i32.add (local.get 0) (i32.const 16)) (local.get 2))
-+ (v128.store (i32.add (local.get 0) (i32.const 32)) (local.get 3))
-+ (v128.store (i32.add (local.get 0) (i32.const 48)) (local.get 4))
-+ (v128.store (i32.add (local.get 0) (i32.const 64)) (local.get 5))
-+ (v128.store (i32.add (local.get 0) (i32.const 80)) (local.get 6))
-+ (v128.store (i32.add (local.get 0) (i32.const 96)) (local.get 7))
-+ (v128.store (i32.add (local.get 0) (i32.const 112)) (local.get 8)))
-+
-+ (func (export "hash") (param i32) (result i64)
-+ (local i32)
-+ ;; Init with Blake2b IV
-+ (v128.store (i32.const 0) (v128.const i64x2 0x6a09e667f3bcc908 0xbb67ae8584caa73b))
-+ (v128.store (i32.const 16) (v128.const i64x2 0x3c6ef372fe94f82b 0xa54ff53a5f1d36f1))
-+ (v128.store (i32.const 32) (v128.const i64x2 0x510e527fade682d1 0x9b05688c2b3e6c1f))
-+ (v128.store (i32.const 48) (v128.const i64x2 0x1f83d9abfb41bd6b 0x5be0cd19137e2179))
-+ (v128.store (i32.const 64) (v128.const i64x2 0x0123456789abcdef 0xfedcba9876543210))
-+ (v128.store (i32.const 80) (v128.const i64x2 0xdeadbeefcafebabe 0x1122334455667788))
-+ (v128.store (i32.const 96) (v128.const i64x2 0xaabbccdd11223344 0x5566778899aabbcc))
-+ (v128.store (i32.const 112) (v128.const i64x2 0xddeeff0011223344 0x5566778899aabbcc))
-+ (local.set 1 (i32.const 0))
-+ (block (loop
-+ (call $G_round (i32.const 0))
-+ (local.set 1 (i32.add (local.get 1) (i32.const 1)))
-+ (br_if 1 (i32.ge_u (local.get 1) (local.get 0)))
-+ (br 0)))
-+ (i64.xor (i64.load (i32.const 0))
-+ (i64.xor (i64.load (i32.const 8))
-+ (i64.xor (i64.load (i32.const 16))
-+ (i64.xor (i64.load (i32.const 24))
-+ (i64.xor (i64.load (i32.const 32))
-+ (i64.xor (i64.load (i32.const 40))
-+ (i64.xor (i64.load (i32.const 48))
-+ (i64.xor (i64.load (i32.const 56))
-+ (i64.xor (i64.load (i32.const 64))
-+ (i64.xor (i64.load (i32.const 72))
-+ (i64.xor (i64.load (i32.const 80))
-+ (i64.xor (i64.load (i32.const 88))
-+ (i64.xor (i64.load (i32.const 96))
-+ (i64.xor (i64.load (i32.const 104))
-+ (i64.xor (i64.load (i32.const 112))
-+ (i64.load (i32.const 120))))))))))))))))))
-+ )
-+`));
-+
-+var inst = new WebAssembly.Instance(mod);
-+
-+// Get a reference result from the first call.
-+var reference = inst.exports.hash(100);
-+
-+// Run many times to trigger tier-up, then verify result stays the same.
-+var pass = true;
-+for (var i = 0; i < 1000; i++) {
-+ var r = inst.exports.hash(100);
-+ if (r !== reference) {
-+ pass = false;
-+ throw new Error("Tiering mismatch at iteration " + i +
-+ ": got 0x" + BigInt.asUintN(64, r).toString(16) +
-+ ", expected 0x" + BigInt.asUintN(64, reference).toString(16));
-+ }
-+}
-+
-+assertEq(pass, true);
-diff --git a/js/src/jit-test/tests/wasm/ppc64-compare-select-bench.js b/js/src/jit-test/tests/wasm/ppc64-compare-select-bench.js
-new file mode 100644
-index 000000000000..c11ce713f514
---- /dev/null
-+++ b/js/src/jit-test/tests/wasm/ppc64-compare-select-bench.js
-@@ -0,0 +1,70 @@
-+// |jit-test| skip-if: true
-+//
-+// Benchmark only, not a correctness test. Invoke manually as shown below.
-+//
-+// Microbenchmark for wasm compare+select fusion on PPC64.
-+//
-+// Run with:
-+// $JS --wasm-compiler=optimizing \
-+// js/src/jit-test/tests/wasm/ppc64-compare-select-bench.js
-+//
-+// Prints timings for four variants (i32, i64, f32, f64) that exercise a
-+// tight loop of N select-on-compare operations. Used to decide whether
-+// specializing lowerWasmCompareAndSelect beyond Int32 is worth the code.
-+//
-+// The kernel is a 10-stage select chain so the per-op overhead dominates
-+// the loop frame. Each iteration touches 10 compare+select ops plus
-+// ~trivial address math.
-+
-+const N_ITERS = 1_000_000;
-+
-+function buildModule(kind) {
-+ const types = {i32: ['i32', 'i32', 'i32.lt_s'],
-+ u32: ['i32', 'i32', 'i32.lt_u'],
-+ i64: ['i64', 'i64', 'i64.lt_s'],
-+ f32: ['f32', 'i32', 'f32.lt'],
-+ f64: ['f64', 'i32', 'f64.lt']}[kind];
-+ const [ty, iterTy, cmpOp] = types;
-+ // Load a, b; compute chain of (b < a ? b : a) 10 times per iter.
-+ const stage = `
-+ (local.set $a
-+ (select (result ${ty})
-+ (local.get $b) (local.get $a)
-+ (${cmpOp} (local.get $b) (local.get $a))))`;
-+ const body = Array(10).fill(stage).join('\n');
-+ const text = `
-+ (module
-+ (func (export "run") (param $n i32) (result ${ty})
-+ (local $i i32) (local $a ${ty}) (local $b ${ty})
-+ (local.set $a (${ty}.const ${kind === 'f32' || kind === 'f64' ? '3.14' : '12345'}))
-+ (local.set $b (${ty}.const ${kind === 'f32' || kind === 'f64' ? '2.71' : '67890'}))
-+ (loop $L
-+ ${body}
-+ (local.set $i (i32.add (local.get $i) (i32.const 1)))
-+ (br_if $L (i32.lt_s (local.get $i) (local.get $n))))
-+ (local.get $a)))`;
-+ return new WebAssembly.Module(wasmTextToBinary(text));
-+}
-+
-+function bench(kind) {
-+ const inst = new WebAssembly.Instance(buildModule(kind));
-+ // Warmup — ensure Ion compiles.
-+ for (let i = 0; i < 3; i++) inst.exports.run(N_ITERS);
-+ const t0 = dateNow();
-+ const res = inst.exports.run(N_ITERS);
-+ const t1 = dateNow();
-+ return {ms: t1 - t0, result: res};
-+}
-+
-+const kinds = ['i32', 'u32', 'i64', 'f32', 'f64'];
-+const runs = 5;
-+print(`\nwasm compare+select microbench (${N_ITERS.toLocaleString()} iters, 10 ops/iter):`);
-+print(` Each timing is the best of ${runs} runs.\n`);
-+for (const kind of kinds) {
-+ const samples = [];
-+ for (let i = 0; i < runs; i++) samples.push(bench(kind).ms);
-+ samples.sort((a, b) => a - b);
-+ const best = samples[0];
-+ const median = samples[(runs / 2) | 0];
-+ print(` ${kind.padEnd(4)} best=${best.toFixed(1)}ms median=${median.toFixed(1)}ms (samples: ${samples.map(s => s.toFixed(0)).join(',')})`);
-+}
-diff --git a/js/src/jit-test/tests/wasm/ppc64-extmul-alias.js b/js/src/jit-test/tests/wasm/ppc64-extmul-alias.js
-new file mode 100644
-index 000000000000..2aa9507751b6
---- /dev/null
-+++ b/js/src/jit-test/tests/wasm/ppc64-extmul-alias.js
-@@ -0,0 +1,107 @@
-+// Regression test for PPC64 i64x2.extmul_{low,high}_i32x4_{s,u} when the
-+// Ion register allocator picks dest == rhs.
-+//
-+// On PPC64 LE, the old implementation extracted lanes via mtvsrd/mfvsrd and
-+// wrote the low-lane product to dest before reading rhs for the high lane.
-+// `mtvsrd XT, RA` leaves DW1 of XT undefined (POWER9 zeros it), so when
-+// dest aliased rhs the high-lane extract from rhs read garbage, producing
-+// zero in the high i64 lane. On POWER8 the ExtractLaneToGPR fallback
-+// additionally clobbered ScratchSimd128Reg between the two extracts.
-+//
-+// The loop below, discovered via wasm-reduce from argon2.wasm, reliably
-+// reproduced the miscompile: the result's high i64 lane went to 0 on
-+// POWER9 Ion / garbage on POWER8 Ion, while baseline kept the correct
-+// value (lane1 = 48*48 = 2304 in the final iteration).
-+
-+var mod = new WebAssembly.Module(wasmTextToBinary(`
-+ (module
-+ (memory (export "mem") 1)
-+ (func (export "run_u") (param $out i32)
-+ (local $i i32) (local $v4 v128) (local $v5 v128) (local $v9 v128)
-+ (loop
-+ (local.set $v9
-+ (i64x2.add
-+ (v128.const i32x4 1 0 0 0)
-+ (i64x2.extmul_low_i32x4_u (local.get $v5) (local.get $v9))))
-+ (local.set $v4 (local.get $v9))
-+ (local.set $v5 (local.get $v4))
-+ (v128.store (i32.const 0) (local.get $v5))
-+ (local.set $i (i32.add (local.get $i) (i32.const 1)))
-+ (br_if 0 (i32.ne (local.get $i) (i32.const 8))))
-+ (v128.store (local.get $out) (local.get $v9)))
-+
-+ (func (export "run_s") (param $out i32)
-+ (local $i i32) (local $v v128)
-+ (local.set $v (v128.const i32x4 2 3 5 7))
-+ (loop
-+ ;; Force dest==rhs aliasing: v = extmul_low_i32x4_s(const, v).
-+ (local.set $v
-+ (i64x2.extmul_low_i32x4_s
-+ (v128.const i32x4 2 3 5 7)
-+ (local.get $v)))
-+ (local.set $i (i32.add (local.get $i) (i32.const 1)))
-+ (br_if 0 (i32.ne (local.get $i) (i32.const 2))))
-+ (v128.store (local.get $out) (local.get $v)))
-+
-+ (func (export "run_high_u") (param $out i32)
-+ (local $i i32) (local $v v128)
-+ (local.set $v (v128.const i32x4 0 0 2 3))
-+ (loop
-+ (local.set $v
-+ (i64x2.extmul_high_i32x4_u
-+ (v128.const i32x4 0 0 2 3)
-+ (local.get $v)))
-+ (local.set $i (i32.add (local.get $i) (i32.const 1)))
-+ (br_if 0 (i32.ne (local.get $i) (i32.const 2))))
-+ (v128.store (local.get $out) (local.get $v)))
-+
-+ (func (export "run_high_s") (param $out i32)
-+ (local $i i32) (local $v v128)
-+ (local.set $v (v128.const i32x4 0 0 2 3))
-+ (loop
-+ (local.set $v
-+ (i64x2.extmul_high_i32x4_s
-+ (v128.const i32x4 0 0 2 3)
-+ (local.get $v)))
-+ (local.set $i (i32.add (local.get $i) (i32.const 1)))
-+ (br_if 0 (i32.ne (local.get $i) (i32.const 2))))
-+ (v128.store (local.get $out) (local.get $v))))
-+`));
-+
-+function runAndCheck(inst) {
-+ inst.exports.run_u(0);
-+ // After 8 iterations, the value in memory should have lane1 == 2304 = 0x900.
-+ // Bytes 8-15 (i64 lane 1, little-endian) = 0x0000000000000900.
-+ var buf = new Uint8Array(inst.exports.mem.buffer, 0, 16);
-+ var hex = Array.from(buf).map(b => b.toString(16).padStart(2,'0')).join('');
-+ // Expect bytes 8-9 = "00 09" and bytes 10-15 = "00 00 00 00 00 00".
-+ assertEq(hex.slice(16, 32), "0009000000000000");
-+
-+ inst.exports.run_s(16);
-+ // After 2 iterations of v = extmul_low_s(const(2,3,5,7), v) starting v=(2,3,5,7):
-+ // iter 1: i64x2 lane0 = 2*2 = 4, lane1 = 3*3 = 9.
-+ // v becomes i32x4 [4, 0, 9, 0] (each i64 lane occupies two i32 lanes).
-+ // iter 2: extmul_low_s reads i32 lanes 0, 1 of v = (4, 0).
-+ // i64 lane0 = 2*4 = 8; i64 lane1 = 3*0 = 0.
-+ var buf2 = new Uint8Array(inst.exports.mem.buffer, 16, 16);
-+ var hex2 = Array.from(buf2).map(b => b.toString(16).padStart(2,'0')).join('');
-+ assertEq(hex2, "08000000000000000000000000000000");
-+
-+ inst.exports.run_high_u(32);
-+ // v = (0, 0, 2, 3). extmul_high picks lanes 2 and 3.
-+ // iter 1: lane2_prod = 2*2 = 4; lane3_prod = 3*3 = 9. Result stored at bytes 0-7 (lane2_prod) and 8-15 (lane3_prod).
-+ // iter 2: v now has i64x2 lane0 = 4, lane1 = 9, i.e. i32x4 lanes [4, 0, 9, 0].
-+ // extmul_high_u(const(0,0,2,3), v) reads lanes 2, 3 of both:
-+ // const lane2 = 2, lane3 = 3; v lane2 = 9, lane3 = 0.
-+ // result: lane2_prod = 2*9 = 18 at bytes 0-7; lane3_prod = 3*0 = 0 at bytes 8-15.
-+ var buf3 = new Uint8Array(inst.exports.mem.buffer, 32, 16);
-+ var hex3 = Array.from(buf3).map(b => b.toString(16).padStart(2,'0')).join('');
-+ assertEq(hex3, "12000000000000000000000000000000");
-+
-+ inst.exports.run_high_s(48);
-+ var buf4 = new Uint8Array(inst.exports.mem.buffer, 48, 16);
-+ var hex4 = Array.from(buf4).map(b => b.toString(16).padStart(2,'0')).join('');
-+ assertEq(hex4, "12000000000000000000000000000000");
-+}
-+
-+runAndCheck(new WebAssembly.Instance(mod));
-diff --git a/js/src/jit-test/tests/wasm/ppc64-simd-vr-clobber.js b/js/src/jit-test/tests/wasm/ppc64-simd-vr-clobber.js
-new file mode 100644
-index 000000000000..d5f79a1840a6
---- /dev/null
-+++ b/js/src/jit-test/tests/wasm/ppc64-simd-vr-clobber.js
-@@ -0,0 +1,179 @@
-+// |jit-test| skip-if: !wasmSimdEnabled()
-+//
-+// Regression tests for PPC64 SIMD helpers that use VR1..VR5 as undeclared
-+// scratch and silently corrupt live wasm v128 values the register allocator
-+// has placed in those VRs.
-+//
-+// Background: PPC64 Simd128 lives in VR0..VR31. VR0 is non-allocatable
-+// (= ScratchSimd128Reg); VR1..VR31 are allocatable. The helpers below
-+// historically used VR1..VR5 as undeclared scratch:
-+//
-+// negInt8x16, negInt16x8 : clobber VR1 (all CPUs)
-+// negInt32x4, negInt64x2 (POWER8 fallback) : clobber VR1 (POWER8 only)
-+// extAddPairwiseInt8x16 (signed/unsigned) : clobber VR1, VR2, VR3
-+// extAddPairwiseInt16x8 (signed/unsigned) : clobber VR1, VR2, VR3
-+// unsignedWidenHighInt32x4 : clobber VR1
-+//
-+// Each test:
-+// - loads `nLive` "preserve" v128 values from memory at offsets 16..16+16*nLive
-+// - loads ONE additional "input" v128 = repeat(0x18) at offset 128
-+// - applies the suspect helper to the input
-+// - stores the nLive preserved values back to memory at offsets 0..16*nLive
-+// - stores the helper result at offset 16*nLive
-+//
-+// Without the fix, one of the preserved locals (whichever the allocator
-+// placed in the clobbered VR) reads back as the staged input value (0x18)
-+// instead of its original. With the fix (the helper using ScratchSimd128Scope
-+// or proper VR-namespace emit), all preserved locals retain their values.
-+
-+const PRESERVE_PATTERNS = [0xA1, 0xB2, 0xC3, 0xD4, 0xE5, 0xF6, 0x07, 0x29];
-+const INPUT_BYTE = 0x18;
-+
-+function init(mem) {
-+ // Slots at offset 16, 32, ..., 16+16*7 hold the preserve patterns.
-+ for (let slot = 0; slot < PRESERVE_PATTERNS.length; slot++) {
-+ for (let i = 0; i < 16; i++) {
-+ mem[16 + slot * 16 + i] = PRESERVE_PATTERNS[slot];
-+ }
-+ }
-+ // The helper input is at offset 128 (= 16 + 16*7 + 16 = 144? no, 16 + 16*8 = 144).
-+ // Use a fixed offset PAST the preserve area. With nLive max 7, preserve uses
-+ // 16..(16+16*7-1) = 16..127. Input goes at 144 to leave a 16-byte gap.
-+ const INPUT_OFFSET = 144;
-+ for (let i = 0; i < 16; i++) mem[INPUT_OFFSET + i] = INPUT_BYTE;
-+}
-+
-+function repeat(byte) {
-+ const a = new Array(16);
-+ for (let i = 0; i < 16; i++) a[i] = byte;
-+ return a;
-+}
-+
-+// Verify nLive preserved slots match PRESERVE_PATTERNS at output offsets
-+// 0..16*nLive, and that the result slot at 16*nLive matches `expectedResult`.
-+function check(opName, mem, nLive, expectedResult) {
-+ for (let slot = 0; slot < nLive; slot++) {
-+ for (let i = 0; i < 16; i++) {
-+ const got = mem[slot * 16 + i];
-+ const want = PRESERVE_PATTERNS[slot];
-+ assertEq(got, want,
-+ `${opName}: live slot ${slot} byte ${i}: got 0x${got.toString(16)}, expected 0x${want.toString(16)} (allocator-clobbered VR?)`);
-+ }
-+ }
-+ for (let i = 0; i < 16; i++) {
-+ const got = mem[nLive * 16 + i];
-+ const want = expectedResult[i];
-+ assertEq(got, want,
-+ `${opName}: result byte ${i}: got 0x${got.toString(16)}, expected 0x${want.toString(16)}`);
-+ }
-+}
-+
-+// Build a wasm module that:
-+// - loads `nLive` preserve v128 locals from memory at offsets 16..16*nLive
-+// - loads ONE input v128 from offset 144
-+// - applies `op` to the input
-+// - stores all `nLive + 1` v128 values back to memory at offsets 0..16*nLive
-+function buildModule(op, nLive) {
-+ const localDecls = [];
-+ const initLoads = [];
-+ const finalStores = [];
-+ for (let i = 0; i < nLive; i++) {
-+ localDecls.push(`(local $v${i} v128)`);
-+ initLoads.push(`(local.set $v${i} (v128.load (i32.const ${16 + i * 16})))`);
-+ finalStores.push(`(v128.store (i32.const ${i * 16}) (local.get $v${i}))`);
-+ }
-+ // The helper input + result.
-+ localDecls.push(`(local $input v128)`);
-+ initLoads.push(`(local.set $input (v128.load (i32.const 144)))`);
-+ finalStores.push(`(v128.store (i32.const ${nLive * 16}) (local.get $input))`);
-+
-+ const text = `
-+ (module
-+ (memory (export "mem") 1)
-+ (func (export "run")
-+ ${localDecls.join('\n ')}
-+ ${initLoads.join('\n ')}
-+ (local.set $input (${op} (local.get $input)))
-+ ${finalStores.join('\n ')}
-+ )
-+ )`;
-+ return new WebAssembly.Module(wasmTextToBinary(text));
-+}
-+
-+function runOne(opName, op, nLive, expectedResult) {
-+ const mod = buildModule(op, nLive);
-+ const inst = new WebAssembly.Instance(mod);
-+ const mem = new Uint8Array(inst.exports.mem.buffer);
-+ // Run many times so Baseline + Ion both see it.
-+ for (let warm = 0; warm < 50; warm++) {
-+ init(mem);
-+ inst.exports.run();
-+ check(opName, mem, nLive, expectedResult);
-+ }
-+}
-+
-+// ---- Negate helpers ----
-+//
-+// Input lane = 0x18 = 24. neg(24) = -24.
-+// i8x16.neg : -24 mod 256 = 232 = 0xE8 per byte.
-+// i16x8.neg : lane = 0x1818 = 6168, neg = -6168 mod 65536 = 0xE7E8.
-+// Memory LE: per i16 lane bytes 0xE8 0xE7.
-+// i32x4.neg : lane = 0x18181818 = 404232216, neg = 0xE7E7E7E8.
-+// Memory LE: per i32 lane bytes 0xE8 0xE7 0xE7 0xE7.
-+// i64x2.neg : lane = 0x1818181818181818, neg = 0xE7E7E7E7E7E7E7E8.
-+// Memory LE: per i64 lane bytes 0xE8 0xE7 0xE7 0xE7 0xE7 0xE7 0xE7 0xE7.
-+
-+runOne("i8x16.neg", "i8x16.neg", 4, repeat(0xE8));
-+runOne("i16x8.neg", "i16x8.neg", 4,
-+ [0xE8,0xE7, 0xE8,0xE7, 0xE8,0xE7, 0xE8,0xE7,
-+ 0xE8,0xE7, 0xE8,0xE7, 0xE8,0xE7, 0xE8,0xE7]);
-+runOne("i32x4.neg", "i32x4.neg", 4,
-+ [0xE8,0xE7,0xE7,0xE7, 0xE8,0xE7,0xE7,0xE7,
-+ 0xE8,0xE7,0xE7,0xE7, 0xE8,0xE7,0xE7,0xE7]);
-+runOne("i64x2.neg", "i64x2.neg", 4,
-+ [0xE8,0xE7,0xE7,0xE7,0xE7,0xE7,0xE7,0xE7,
-+ 0xE8,0xE7,0xE7,0xE7,0xE7,0xE7,0xE7,0xE7]);
-+
-+// ---- extAddPairwise helpers ----
-+//
-+// extadd_pairwise reads adjacent pairs and widens-then-sums them.
-+// Input = repeat(0x18) = 24.
-+// i16x8.extadd_pairwise_i8x16_s : 24 + 24 = 48 = 0x0030 per i16 lane.
-+// Memory LE: 0x30 0x00 per lane × 8 lanes.
-+// i16x8.extadd_pairwise_i8x16_u : same since input is positive.
-+// i32x4.extadd_pairwise_i16x8_s : i16 lane = 0x1818 = 6168, sum = 12336 = 0x00003030.
-+// Memory LE: 0x30 0x30 0x00 0x00 per lane × 4 lanes.
-+// i32x4.extadd_pairwise_i16x8_u : same since input is positive.
-+
-+runOne("i16x8.extadd_pairwise_i8x16_s",
-+ "i16x8.extadd_pairwise_i8x16_s", 4,
-+ [0x30,0x00, 0x30,0x00, 0x30,0x00, 0x30,0x00,
-+ 0x30,0x00, 0x30,0x00, 0x30,0x00, 0x30,0x00]);
-+
-+runOne("i16x8.extadd_pairwise_i8x16_u",
-+ "i16x8.extadd_pairwise_i8x16_u", 4,
-+ [0x30,0x00, 0x30,0x00, 0x30,0x00, 0x30,0x00,
-+ 0x30,0x00, 0x30,0x00, 0x30,0x00, 0x30,0x00]);
-+
-+runOne("i32x4.extadd_pairwise_i16x8_s",
-+ "i32x4.extadd_pairwise_i16x8_s", 4,
-+ [0x30,0x30,0x00,0x00, 0x30,0x30,0x00,0x00,
-+ 0x30,0x30,0x00,0x00, 0x30,0x30,0x00,0x00]);
-+
-+runOne("i32x4.extadd_pairwise_i16x8_u",
-+ "i32x4.extadd_pairwise_i16x8_u", 4,
-+ [0x30,0x30,0x00,0x00, 0x30,0x30,0x00,0x00,
-+ 0x30,0x30,0x00,0x00, 0x30,0x30,0x00,0x00]);
-+
-+// ---- unsignedWidenHighInt32x4 ----
-+//
-+// i64x2.extend_high_i32x4_u: take the high two i32 lanes (lanes 2 and 3) of
-+// the input, zero-extend each to i64, lay them out as i64x2.
-+// Input lane = 0x18181818 (positive, =404232216).
-+// Result: two i64 lanes, each = 0x0000000018181818.
-+// Memory LE: per i64 lane bytes 0x18 0x18 0x18 0x18 0x00 0x00 0x00 0x00.
-+
-+runOne("i64x2.extend_high_i32x4_u",
-+ "i64x2.extend_high_i32x4_u", 4,
-+ [0x18,0x18,0x18,0x18,0x00,0x00,0x00,0x00,
-+ 0x18,0x18,0x18,0x18,0x00,0x00,0x00,0x00]);
-diff --git a/js/src/jit-test/tests/wasm/profiling.js b/js/src/jit-test/tests/wasm/profiling.js
-index f4872b07cde8..ccd9690a262f 100644
---- a/js/src/jit-test/tests/wasm/profiling.js
-+++ b/js/src/jit-test/tests/wasm/profiling.js
-@@ -117,6 +117,13 @@ for (let type of ['f32', 'f64']) {
- if (getBuildConfiguration("arm64")) {
- continue;
- }
-+ // PPC64 inlines ceil/floor/trunc as frip/frim/friz (see
-+ // Assembler-ppc64.h HasRoundInstruction), so no builtin thunk
-+ // frames exist to profile. `nearest` still goes through the
-+ // thunk because PPC64's frin is not IEEE round-to-even.
-+ if (getBuildConfiguration("ppc64") && func !== 'nearest') {
-+ continue;
-+ }
- test(`(module
- (func (export "") (param ${type}) (result ${type})
- local.get 0
-diff --git a/js/src/jit-test/tests/wasm/regress-ppc64-extract-lane-ctz.js b/js/src/jit-test/tests/wasm/regress-ppc64-extract-lane-ctz.js
-new file mode 100644
-index 000000000000..e2cf5def541e
---- /dev/null
-+++ b/js/src/jit-test/tests/wasm/regress-ppc64-extract-lane-ctz.js
-@@ -0,0 +1,49 @@
-+// |jit-test| --wasm-compiler=optimizing; skip-if: !wasmSimdEnabled()
-+//
-+// Regression test for a PPC64 i32x4.extract_lane canonicalization bug.
-+//
-+// ExtractLaneToGPR leaves the adjacent lane in the high 32 bits of the GPR for
-+// the unshifted lanes (0 and 2), so extractLaneInt32x4 must sign-extend its i32
-+// result (as the i8x16/i16x8 extracts do). Without that, a consumer that reads
-+// the full 64-bit register sees garbage in the high half. The POWER8 i32.ctz
-+// emulation is such a consumer: its 64-bit neg/and. zero-check disagrees with
-+// its 32-bit cntlzw, so ctz of a zero lane sitting next to a nonzero neighbour
-+// returned -1 instead of 32.
-+//
-+// The vector comes from memory (runtime, not constant-foldable) and is passed
-+// through a SIMD op so the extract is a genuine vector-register extract. Run
-+// under MOZ_PPC64_FORCE_POWER8=1 to exercise the emulated ctz path; in every
-+// other mode this is simply a correctness check.
-+
-+const ins = wasmEvalText(`(module
-+ (memory (export "mem") 1)
-+ (func $v (result v128)
-+ ;; identity AND keeps the value in a vector register and forces a real
-+ ;; extractLaneInt32x4 rather than an extract-of-load fold.
-+ (v128.and (v128.load (i32.const 0)) (v128.const i32x4 -1 -1 -1 -1)))
-+ (func (export "ctz0") (result i32) (i32.ctz (i32x4.extract_lane 0 (call $v))))
-+ (func (export "ctz1") (result i32) (i32.ctz (i32x4.extract_lane 1 (call $v))))
-+ (func (export "ctz2") (result i32) (i32.ctz (i32x4.extract_lane 2 (call $v))))
-+ (func (export "ctz3") (result i32) (i32.ctz (i32x4.extract_lane 3 (call $v))))
-+ (func (export "sext0") (result i64) (i64.extend_i32_s (i32x4.extract_lane 0 (call $v))))
-+ (func (export "sext2") (result i64) (i64.extend_i32_s (i32x4.extract_lane 2 (call $v))))
-+)`).exports;
-+
-+const mem = new Int32Array(ins.mem.buffer);
-+function setLanes(a, b, c, d) { mem[0] = a; mem[1] = b; mem[2] = c; mem[3] = d; }
-+
-+// Each lane = 0 surrounded by nonzero neighbours: ctz must be 32, never -1.
-+setLanes(0, -1, -1, -1); assertEq(ins.ctz0(), 32);
-+setLanes(-1, 0, -1, -1); assertEq(ins.ctz1(), 32);
-+setLanes(-1, -1, 0, -1); assertEq(ins.ctz2(), 32);
-+setLanes(-1, -1, -1, 0); assertEq(ins.ctz3(), 32);
-+
-+// Nonzero lanes: ctz of the lane value, regardless of neighbours.
-+setLanes(0x10, -1, 0x100000, -1);
-+assertEq(ins.ctz0(), 4);
-+assertEq(ins.ctz2(), 20);
-+
-+// A negative lane must sign-extend correctly (the canonicalization is extsw).
-+setLanes(-2, 7, -3, 7);
-+assertEq(ins.sext0(), -2n);
-+assertEq(ins.sext2(), -3n);
-diff --git a/js/src/jit-test/tests/wasm/regress-ppc64-select-condition.js b/js/src/jit-test/tests/wasm/regress-ppc64-select-condition.js
-new file mode 100644
-index 000000000000..c38975dce859
---- /dev/null
-+++ b/js/src/jit-test/tests/wasm/regress-ppc64-select-condition.js
-@@ -0,0 +1,30 @@
-+// |jit-test| --wasm-compiler=optimizing; skip-if: !wasmSimdEnabled()
-+//
-+// Regression test for a PPC64 wasm Ion miscompile of `select` with a 32-bit
-+// condition. visitWasmSelect tested the i32 condition with a 64-bit compare
-+// (cmpdi / branchTestPtr). When the condition was zero in its low 32 bits but
-+// had garbage in the high 32 bits (as can happen under register pressure), the
-+// 64-bit test read it as non-zero and select returned the wrong operand.
-+//
-+// Here the condition `$x3` is 0; `select($x8, -952809828, $x3)` must therefore
-+// return -952809828. The surrounding SIMD shuffle/bitselect/swizzle chain
-+// supplies the v128 register pressure that exposed the bug.
-+
-+const wat = `(module (func (export "f") (result i64)
-+ (local $x3 i32)(local $x7 i32)(local $x8 i32)
-+ (local $w0 v128)(local $w1 v128)(local $w2 v128)(local $w3 v128)
-+ (local $w4 v128)(local $w5 v128)(local $w6 v128)(local $w7 v128)
-+ (local.set $w0 (v128.const i32x4 1708443454 1532218695 2107423610 -1265775005))
-+ (local.set $w2 (v128.const i32x4 -752312355 -625530572 -844666500 832036408))
-+ (local.set $w7 (v128.const i32x4 115003496 -970441117 -43225935 1874128204))
-+ (local.set $w4 (i8x16.shuffle 15 18 13 2 6 22 20 8 19 10 12 8 11 5 6 28 (local.get $w7) (local.get $w3)))
-+ (local.set $w6 (v128.bitselect (local.get $w4) (local.get $w0) (local.get $w7)))
-+ (local.set $w1 (v128.const i32x4 -1635025264 -629784132 1517869852 1651771825))
-+ (local.set $w7 (v128.bitselect (local.get $w6) (local.get $w2) (local.get $w2)))
-+ (local.set $w6 (i8x16.swizzle (local.get $w1) (local.get $w7)))
-+ (local.set $x3 (i32x4.extract_lane 2 (local.get $w6)))
-+ (local.set $x7 (select (local.get $x8) (i32.const -952809828) (local.get $x3)))
-+ (i64.extend_i32_s (local.get $x7))))`;
-+
-+const ins = new WebAssembly.Instance(new WebAssembly.Module(wasmTextToBinary(wat)));
-+assertEq(ins.exports.f(), -952809828n);
-diff --git a/js/src/jit-test/tests/wasm/regress-ppc64-trap-exit-simd-save.js b/js/src/jit-test/tests/wasm/regress-ppc64-trap-exit-simd-save.js
-new file mode 100644
-index 000000000000..4887f8df119c
---- /dev/null
-+++ b/js/src/jit-test/tests/wasm/regress-ppc64-trap-exit-simd-save.js
-@@ -0,0 +1,64 @@
-+// |jit-test| exitstatus: 0; skip-if: !wasmSimdEnabled()
-+//
-+// Regression test for the PPC64 wasm trap exit losing live v128 state.
-+//
-+// On PPC64, doubles live in the FPRs (VSR0-31) while wasm v128 values live in
-+// the VRs (VSR32-63) -- disjoint physical pools. The trap exit's
-+// RegsToPreserve used AllDoubleMask only, so a trap firing while a v128 was
-+// live resumed with whatever the C++ interrupt path's libc left in the VRs
-+// (glibc's misaligned vector memcpy leaves lvsl alignment-control byte
-+// patterns there). Interrupt checks fire via traps at loop back-edges, where
-+// a loop-carried v128 accumulator is exactly what is live.
-+//
-+// The loop below keeps an i32x4 accumulator live across every back-edge while
-+// interrupts fire repeatedly; the callback does large misaligned copies to
-+// pull libc's vector memcpy through the VRs. On an unfixed build (real
-+// silicon; the simulator's VRs are insulated from native libc) the
-+// accumulator comes back holding garbage and the final lane values are wrong.
-+
-+const ins = wasmEvalText(`(module
-+ (func (export "run") (param $n i32) (result i32)
-+ (local $acc v128)
-+ (block $done
-+ (loop $top
-+ (br_if $done (i32.eqz (local.get $n)))
-+ (local.set $acc (i32x4.add (local.get $acc) (v128.const i32x4 1 2 3 4)))
-+ (local.set $n (i32.sub (local.get $n) (i32.const 1)))
-+ (br $top)))
-+ ;; Fold the four lanes so any lane corruption shows up.
-+ (i32.xor
-+ (i32.xor (i32x4.extract_lane 0 (local.get $acc))
-+ (i32.rotl (i32x4.extract_lane 1 (local.get $acc)) (i32.const 8)))
-+ (i32.xor (i32.rotl (i32x4.extract_lane 2 (local.get $acc)) (i32.const 16))
-+ (i32.rotl (i32x4.extract_lane 3 (local.get $acc)) (i32.const 24)))))
-+)`).exports;
-+
-+// Misaligned big copies drive glibc's lvsl/vperm memcpy path on PPC.
-+const big = new Uint8Array(1 << 20);
-+const src = big.subarray(1, (1 << 19) + 1);
-+const dst = new Uint8Array(1 << 19);
-+
-+let fires = 0;
-+function onInterrupt() {
-+ fires++;
-+ for (let i = 0; i < 4; i++) {
-+ dst.set(src);
-+ }
-+ if (fires < 25) {
-+ timeout(0.02, onInterrupt);
-+ }
-+ return true;
-+}
-+
-+function expected(n) {
-+ const r = (x, k) => ((x << k) | (x >>> (32 - k))) | 0;
-+ const l = [n | 0, (2 * n) | 0, (3 * n) | 0, (4 * n) | 0];
-+ return ((l[0] ^ r(l[1], 8)) ^ (r(l[2], 16) ^ r(l[3], 24))) | 0;
-+}
-+
-+const N = 1 << 26;
-+timeout(0.02, onInterrupt);
-+const got = ins.run(N);
-+// Cancel any pending watchdog before finishing.
-+timeout(-1);
-+assertEq(got, expected(N));
-diff --git a/js/src/jit-test/tests/wasm/regress/bug-ppc64-simd-reduce-and-branch.js b/js/src/jit-test/tests/wasm/regress/bug-ppc64-simd-reduce-and-branch.js
-new file mode 100644
-index 000000000000..b7ec0d9548bb
---- /dev/null
-+++ b/js/src/jit-test/tests/wasm/regress/bug-ppc64-simd-reduce-and-branch.js
-@@ -0,0 +1,7 @@
-+// Regression test for a PPC64-specific wasm Ion crash in
-+// CodeGenerator::visitWasmReduceAndBranchSimd128 — it called
-+// LBlock::label() directly on the branch targets without going through
-+// skipTrivialBlocks(), so a trivial goto-only successor tripped
-+// LBlock::label()'s !isTrivial() assertion. Reduced from grantkot.com/poly
-+// with wasm-reduce. Triggers the bug under --wasm-compiler=optimizing.
-+new WebAssembly.Module(os.file.readFile(scriptdir + "/bug-ppc64-simd-reduce-and-branch.wasm", "binary"));
-diff --git a/js/src/jit-test/tests/wasm/simd/bug1946618.js b/js/src/jit-test/tests/wasm/simd/bug1946618.js
-index cc02d0d8dfd7..fcf3a2a35e82 100644
---- a/js/src/jit-test/tests/wasm/simd/bug1946618.js
-+++ b/js/src/jit-test/tests/wasm/simd/bug1946618.js
-@@ -48,7 +48,12 @@ for (let op of ["f32x4.relaxed_min", "f32x4.relaxed_max",
- // baseline.
- let result1 = i.exports.variant1();
- let result2 = i.exports.variant2();
-- if (getBuildConfiguration("arm64")) {
-+ if (getBuildConfiguration("ppc64")) {
-+ // PPC64: xvminsp/xvmaxsp always returns the non-NaN operand,
-+ // regardless of operand order. Both variants give zero (non-NaN).
-+ assertEq(result1, 0);
-+ assertEq(result2, 0);
-+ } else if (getBuildConfiguration("arm64")) {
- // The relaxed_min/max operation appears to propagate NaNs symmetrically
- // from either arg
- assertEq(result1, 65535);
-diff --git a/js/src/jit-test/tests/wasm/simd/ion-analysis.js b/js/src/jit-test/tests/wasm/simd/ion-analysis.js
-index d12af6e6fbc9..335f831ff6a9 100644
---- a/js/src/jit-test/tests/wasm/simd/ion-analysis.js
-+++ b/js/src/jit-test/tests/wasm/simd/ion-analysis.js
-@@ -12,6 +12,7 @@
- // generates the expected result.
-
- var isArm64 = getBuildConfiguration("arm64");
-+var isPPC64 = getBuildConfiguration("ppc64");
-
- // 32-bit permutation that is not a rotation.
- let perm32x4_pattern = [4, 5, 6, 7, 12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3];
-@@ -846,7 +847,7 @@ for ( let [ty128,size] of [['i8x16',1], ['i16x8',2], ['i32x4',4]] ) {
- let ops = { all_true: allTrue, any_true: anyTrue, bitmask };
-
- for ( let op of ['any_true', 'all_true', 'bitmask'] ) {
-- let folded = op != 'bitmask' || (size == 2 && !isArm64);
-+ let folded = op != 'bitmask' || (size == 2 && !isArm64 && !isPPC64);
- let operation = op == 'any_true' ? 'v128.any_true' : `${ty128}.${op}`;
- let positive =
- wasmCompile(
-@@ -898,12 +899,12 @@ for ( let [ty128,size] of [['i8x16',1], ['i16x8',2], ['i32x4',4]] ) {
-
- // Bitselect with constant mask folded into shuffle operation
-
--if (!isArm64) {
-+if (!isArm64 && !isPPC64) {
- wasmCompile(`
- (module (func (param v128) (param v128) (result v128)
- (v128.bitselect (local.get 0) (local.get 1) (v128.const i8x16 0 -1 -1 0 0 0 0 0 -1 -1 -1 -1 -1 -1 0 0))))
- `);
-- assertEq(wasmSimdAnalysis(), "shuffle -> blend 8x16");
-+ assertEq(wasmSimdAnalysis(), "shuffle -> blend 8x16");
- }
-
- // Library
-diff --git a/js/src/jit/Assembler.h b/js/src/jit/Assembler.h
-index 97c2e337625b..cb7244776605 100644
---- a/js/src/jit/Assembler.h
-+++ b/js/src/jit/Assembler.h
-@@ -19,6 +19,8 @@
- # include "jit/loong64/Assembler-loong64.h"
- #elif defined(JS_CODEGEN_RISCV64)
- # include "jit/riscv64/Assembler-riscv64.h"
-+#elif defined(JS_CODEGEN_PPC64)
-+# include "jit/ppc64/Assembler-ppc64.h"
- #elif defined(JS_CODEGEN_WASM32)
- # include "jit/wasm32/Assembler-wasm32.h"
- #elif defined(JS_CODEGEN_NONE)
-diff --git a/js/src/jit/BaselineIC.cpp b/js/src/jit/BaselineIC.cpp
-index c356538a024e..5ab631838f0e 100644
---- a/js/src/jit/BaselineIC.cpp
-+++ b/js/src/jit/BaselineIC.cpp
-@@ -120,6 +120,8 @@ AllocatableGeneralRegisterSet BaselineICAvailableGeneralRegs(size_t numInputs) {
- MOZ_ASSERT(!regs.has(PseudoStackPointer));
- MOZ_ASSERT(!regs.has(RealStackPointer));
- MOZ_ASSERT(!regs.has(ICTailCallReg));
-+#elif defined(JS_CODEGEN_PPC64)
-+ regs.take(ICTailCallReg);
- #endif
- regs.take(ICStubReg);
-
-diff --git a/js/src/jit/CacheIRCompiler.cpp b/js/src/jit/CacheIRCompiler.cpp
-index 4eb952e497e3..ee4888495103 100644
---- a/js/src/jit/CacheIRCompiler.cpp
-+++ b/js/src/jit/CacheIRCompiler.cpp
-@@ -10302,6 +10302,14 @@ bool CacheIRCompiler::emitConcatStringsResult(StringOperandId lhsId,
- liveRegs.add(ICTailCallReg);
- #endif
- liveRegs.takeUnchecked(output.valueReg());
-+
-+#ifdef JS_CODEGEN_PPC64
-+ // On PPC64, LR is an SPR, not a GPR, so ICTailCallReg is a regular
-+ // GPR that does not shadow LR. The inner bctrl will clobber LR, so
-+ // save/restore it explicitly.
-+ masm.xs_mflr(r0);
-+ masm.push(r0);
-+#endif
- masm.PushRegsInMask(liveRegs);
-
- // The stub expects lhs in CallTempReg0 and rhs in CallTempReg1.
-@@ -10322,11 +10330,19 @@ bool CacheIRCompiler::emitConcatStringsResult(StringOperandId lhsId,
- masm.branchTestPtr(Assembler::Zero, CallTempReg5, CallTempReg5, &vmCall);
- masm.tagValue(JSVAL_TYPE_STRING, CallTempReg5, output.valueReg());
- masm.PopRegsInMask(liveRegs);
-+#ifdef JS_CODEGEN_PPC64
-+ masm.pop(r0);
-+ masm.xs_mtlr(r0);
-+#endif
- masm.jump(&done);
-
- masm.bind(&vmCall);
- masm.setFramePushed(framePushed);
- masm.PopRegsInMask(liveRegs);
-+#ifdef JS_CODEGEN_PPC64
-+ masm.pop(r0);
-+ masm.xs_mtlr(r0);
-+#endif
- }
-
- {
-diff --git a/js/src/jit/CodeGenerator.cpp b/js/src/jit/CodeGenerator.cpp
-index a1c01409e9f7..2a2c6007aec0 100644
---- a/js/src/jit/CodeGenerator.cpp
-+++ b/js/src/jit/CodeGenerator.cpp
-@@ -2519,6 +2519,12 @@ static bool PrepareAndExecuteRegExp(MacroAssembler& masm, Register regexp,
- masm.computeEffectiveAddress(Address(FramePointer, ioOffset), temp2);
- masm.PushRegsInMask(volatileRegs);
- masm.setupUnalignedABICall(temp3);
-+#if defined(JS_CODEGEN_PPC64)
-+ // temp1 aliases argregs on this platform, so we need to reuse temp3
-+ // or we'll stomp on the code pointer when we pass the first ABI argument.
-+ masm.movePtr(codePointer, temp3);
-+ codePointer = temp3;
-+#endif
- masm.passABIArg(temp2);
- masm.callWithABI(codePointer);
- masm.storeCallInt32Result(temp1);
-diff --git a/js/src/jit/CodeGenerator.h b/js/src/jit/CodeGenerator.h
-index 58c047dea41b..3781b9595dfd 100644
---- a/js/src/jit/CodeGenerator.h
-+++ b/js/src/jit/CodeGenerator.h
-@@ -23,6 +23,8 @@
- # include "jit/loong64/CodeGenerator-loong64.h"
- #elif defined(JS_CODEGEN_RISCV64)
- # include "jit/riscv64/CodeGenerator-riscv64.h"
-+#elif defined(JS_CODEGEN_PPC64)
-+# include "jit/ppc64/CodeGenerator-ppc64.h"
- #elif defined(JS_CODEGEN_WASM32)
- # include "jit/wasm32/CodeGenerator-wasm32.h"
- #elif defined(JS_CODEGEN_NONE)
-diff --git a/js/src/jit/EffectiveAddressAnalysis.cpp b/js/src/jit/EffectiveAddressAnalysis.cpp
-index e1bd1bd045ef..88697c06907c 100644
---- a/js/src/jit/EffectiveAddressAnalysis.cpp
-+++ b/js/src/jit/EffectiveAddressAnalysis.cpp
-@@ -60,7 +60,7 @@ static bool OffsetIsSmallEnough(int32_t imm) {
- // `movn #imm`. arm32 is similar.
- return imm >= -0xFFFF && imm <= 0xFFFF;
- #elif defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_LOONG64) || \
-- defined(JS_CODEGEN_MIPS64)
-+ defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_PPC64)
- return imm >= -0xFFF && imm <= 0xFFF;
- #elif defined(JS_CODEGEN_WASM32) || defined(JS_CODEGEN_NONE)
- return true;
-diff --git a/js/src/jit/ExecutableAllocator.cpp b/js/src/jit/ExecutableAllocator.cpp
-index 340a63964b52..c9336fe8ec4e 100644
---- a/js/src/jit/ExecutableAllocator.cpp
-+++ b/js/src/jit/ExecutableAllocator.cpp
-@@ -306,13 +306,19 @@ void ExecutableAllocator::poisonCode(JSRuntime* rt,
- }
- }
-
-- // Make the pools executable again and drop references. We don't flush the
-- // ICache here to not add extra overhead.
-+ // Make the pools executable again and drop references. On architectures with
-+ // incoherent ICache (PPC64), we must flush to prevent stale instruction
-+ // execution when code regions are reused after sweeping.
- for (size_t i = 0; i < ranges.length(); i++) {
- ExecutablePool* pool = ranges[i].pool;
- if (pool->isMarked()) {
-+#ifdef JS_CODEGEN_PPC64
-+ reprotectPool(rt, pool, ProtectionSetting::Executable,
-+ MustFlushICache::Yes);
-+#else
- reprotectPool(rt, pool, ProtectionSetting::Executable,
- MustFlushICache::No);
-+#endif
- pool->unmark();
- }
- pool->release();
-diff --git a/js/src/jit/FlushICache.cpp b/js/src/jit/FlushICache.cpp
-index d3b1657a6be2..9590687c9803 100644
---- a/js/src/jit/FlushICache.cpp
-+++ b/js/src/jit/FlushICache.cpp
-@@ -13,7 +13,8 @@
- # include "jit/arm64/vixl/Simulator-vixl.h"
- #endif
-
--#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
-+#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \
-+ defined(JS_CODEGEN_PPC64)
-
- # ifdef __linux__
- # include <linux/version.h>
-diff --git a/js/src/jit/FlushICache.h b/js/src/jit/FlushICache.h
-index af79da356ee5..58396f62ae0d 100644
---- a/js/src/jit/FlushICache.h
-+++ b/js/src/jit/FlushICache.h
-@@ -21,7 +21,7 @@ inline void FlushICache(void* code, size_t size) {
- }
- #elif (defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)) || \
- defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
-- defined(JS_CODEGEN_RISCV64)
-+ defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
-
- // Invalidate the given code range from the icache. This will also flush the
- // execution context for this core. If this code is to be executed on another
-@@ -37,7 +37,7 @@ inline void FlushICache(void* code, size_t size) { MOZ_CRASH(); }
- # error "Unknown architecture!"
- #endif
-
--#if (defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)) || \
-+#if (defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)) || \
- defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
- defined(JS_CODEGEN_RISCV64)
-
-@@ -55,10 +55,11 @@ inline void FlushExecutionContext() { MOZ_CRASH(); }
- inline bool CanFlushExecutionContextForAllThreads() { MOZ_CRASH(); }
- inline void FlushExecutionContextForAllThreads() { MOZ_CRASH(); }
-
--#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
-+#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \
-+ defined(JS_CODEGEN_PPC64)
-
--// ARM and ARM64 must flush the instruction pipeline of the current core
--// before executing newly JIT'ed code. This will remove any stale data from
-+// ARM, ARM64, and PPC64 must flush the instruction pipeline of the current
-+// core before executing newly JIT'ed code. This will remove any stale data from
- // the pipeline that may have referenced invalidated instructions.
- //
- // `FlushICache` will perform this for the thread that compiles the code, but
-diff --git a/js/src/jit/GenerateABIFunctionType.py b/js/src/jit/GenerateABIFunctionType.py
-index 04be10d1de2a..815427ec6771 100644
---- a/js/src/jit/GenerateABIFunctionType.py
-+++ b/js/src/jit/GenerateABIFunctionType.py
-@@ -538,6 +538,102 @@ def riscv64_simulator_dispatch(func_types):
- return contents
-
-
-+# PPC64 ELFv2 ABI: 8 int arg regs (r3-r10), 13 FP arg regs (f1-f13).
-+# Each floating-point argument consumes BOTH a float-arg slot AND a
-+# general-purpose-register shadow slot (capped at 8 GPR slots), matching
-+# what GCC and the JIT's ABIArgGenerator do for ELFv2 PPC64LE. Without
-+# the shadow, integer args following a float go to the wrong register
-+# at the call boundary, producing a use-after-free / wrong-pointer crash
-+# in the C callee. (Verified empirically by disassembling
-+# NumberBigIntCompare(double, BigInt*) on real PPC64: BigInt* is read
-+# from r4, not r3.)
-+def ppc64_args(func_type):
-+ contents = ""
-+ numIntArgRegs = 8
-+ numFloatArgRegs = 13
-+ intRegIndex = 0
-+ floatRegIndex = 0
-+ stackOffset = 0
-+ for i, arg in enumerate(func_type["args"]):
-+ if i != 0:
-+ contents += ", "
-+
-+ if arg == "General":
-+ if intRegIndex == numIntArgRegs:
-+ contents += f"sp_[{stackOffset}]"
-+ stackOffset += 1
-+ else:
-+ contents += f"a{intRegIndex}_"
-+ intRegIndex += 1
-+ elif arg == "Int32":
-+ if intRegIndex == numIntArgRegs:
-+ contents += f"I32(sp_[{stackOffset}])"
-+ stackOffset += 1
-+ else:
-+ contents += f"I32(a{intRegIndex}_)"
-+ intRegIndex += 1
-+ elif arg == "Int64":
-+ if intRegIndex == numIntArgRegs:
-+ contents += f"sp_[{stackOffset}]"
-+ stackOffset += 1
-+ else:
-+ contents += f"a{intRegIndex}_"
-+ intRegIndex += 1
-+ elif arg == "Float32":
-+ if floatRegIndex == numFloatArgRegs:
-+ contents += f"*mozilla::BitwiseCast<float*>(sp_[{stackOffset}])"
-+ stackOffset += 1
-+ else:
-+ contents += f"f{floatRegIndex}_s"
-+ floatRegIndex += 1
-+ # ELFv2: FP arg also consumes a GPR shadow slot.
-+ if intRegIndex < numIntArgRegs:
-+ intRegIndex += 1
-+ elif arg == "Float64":
-+ if floatRegIndex == numFloatArgRegs:
-+ contents += f"mozilla::BitwiseCast<double>(sp_[{stackOffset}])"
-+ stackOffset += 1
-+ else:
-+ contents += f"f{floatRegIndex}_d"
-+ floatRegIndex += 1
-+ # ELFv2: FP arg also consumes a GPR shadow slot.
-+ if intRegIndex < numIntArgRegs:
-+ intRegIndex += 1
-+ assert intRegIndex <= numIntArgRegs
-+ assert floatRegIndex <= numFloatArgRegs
-+ return contents
-+
-+
-+def ppc64_simulator_dispatch(func_types):
-+ contents = ""
-+ for func_type in func_types:
-+ args = ppc64_args(func_type)
-+ contents += f"case js::jit::Args_{func_type_name(func_type)}: {{\\\n"
-+ contents += f" auto target = reinterpret_cast<Prototype_{func_type_name(func_type)}>(nativeFn);\\\n"
-+ ret = func_type["ret"]
-+ if ret == "Void":
-+ contents += f" target({args});\\\n"
-+ else:
-+ contents += f" auto ret = target({args});\\\n"
-+ if ret == "Void":
-+ pass
-+ elif ret == "General":
-+ contents += " setCallResult(ret);\\\n"
-+ elif ret == "Int32":
-+ contents += " setCallResult(I64(ret));\\\n"
-+ elif ret == "Int64":
-+ contents += " setCallResult(ret);\\\n"
-+ elif ret == "Float32":
-+ contents += " setCallResultFloat(ret);\\\n"
-+ elif ret == "Float64":
-+ contents += " setCallResultDouble(ret);\\\n"
-+ else:
-+ raise ValueError(f"Unknown ret type: {ret}")
-+ contents += " break;\\\n"
-+ contents += "}\\\n"
-+ return contents
-+
-+
- def main(c_out, yaml_path):
- func_types = load_yaml(yaml_path)
-
-@@ -581,4 +677,8 @@ def main(c_out, yaml_path):
- contents += riscv64_simulator_dispatch(func_types)
- contents += "\n"
-
-+ contents += "#define ABI_FUNCTION_TYPE_PPC64_SIM_DISPATCH \\\n"
-+ contents += ppc64_simulator_dispatch(func_types)
-+ contents += "\n"
-+
- generate_header(c_out, "jit_ABIFunctionTypeGenerated_h", contents)
-diff --git a/js/src/jit/JitContext.cpp b/js/src/jit/JitContext.cpp
-index 79b22d9f249f..d399ddd36fd4 100644
---- a/js/src/jit/JitContext.cpp
-+++ b/js/src/jit/JitContext.cpp
-@@ -121,6 +121,10 @@ bool jit::InitializeJit() {
- RVFlags::Init();
- #endif
-
-+#ifdef JS_CODEGEN_PPC64
-+ PPC64Flags::Init();
-+#endif
-+
- #ifndef JS_CODEGEN_NONE
- MOZ_ASSERT(js::jit::CPUFlagsHaveBeenComputed());
- #endif
-diff --git a/js/src/jit/JitFrames.cpp b/js/src/jit/JitFrames.cpp
-index 3653af3a21f4..bbd1376dec69 100644
---- a/js/src/jit/JitFrames.cpp
-+++ b/js/src/jit/JitFrames.cpp
-@@ -1824,7 +1824,12 @@ Value SnapshotIterator::allocationValue(const RValueAllocation& alloc,
- return DoubleValue(fromRegister<double>(alloc.fpuReg()));
-
- case RValueAllocation::FLOAT32_REG:
-+#if defined(JS_CODEGEN_PPC64) || defined(JS_CODEGEN_RISCV64)
-+ return Float32Value(
-+ float(fromRegister<double>(alloc.fpuReg().asDouble())));
-+#else
- return Float32Value(fromRegister<float>(alloc.fpuReg()));
-+#endif
-
- case RValueAllocation::FLOAT32_STACK:
- return Float32Value(ReadFrameFloat32Slot(fp_, alloc.stackOffset()));
-@@ -2625,7 +2630,12 @@ uintptr_t MachineState::read(Register reg) const {
-
- template <typename T>
- T MachineState::read(FloatRegister reg) const {
-+#if defined(JS_CODEGEN_PPC64) || defined(JS_CODEGEN_RISCV64)
-+ // PPC64/RISCV64 always store FloatRegisters as 64-bit doubles.
-+ MOZ_ASSERT(reg.size() >= sizeof(T));
-+#else
- MOZ_ASSERT(reg.size() == sizeof(T));
-+#endif
-
- #if !defined(JS_CODEGEN_NONE) && !defined(JS_CODEGEN_WASM32)
- if (state_.is<BailoutState>()) {
-diff --git a/js/src/jit/JitFrames.h b/js/src/jit/JitFrames.h
-index ac7005a5fcfc..490834e62fec 100644
---- a/js/src/jit/JitFrames.h
-+++ b/js/src/jit/JitFrames.h
-@@ -322,6 +322,16 @@ enum class ExceptionResumeKind : int32_t {
-
- // Data needed to recover from an exception.
- struct ResumeFromException {
-+#if defined(JS_CODEGEN_PPC64)
-+ // This struct is built on the stack as part of exception returns. Because
-+ // it goes right on top of the stack, an ABI-compliant routine can wreck
-+ // it, so we implement a minimum Power ISA linkage area (four doublewords).
-+ void* _ppc_sp_;
-+ void* _ppc_cr_;
-+ void* _ppc_lr_;
-+ void* _ppc_toc_;
-+#endif
-+
- uint8_t* framePointer;
- uint8_t* stackPointer;
- uint8_t* target;
-@@ -373,7 +383,7 @@ struct ResumeFromException {
- }
- };
-
--#if defined(JS_CODEGEN_ARM64)
-+#if defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_PPC64)
- static_assert(sizeof(ResumeFromException) % 16 == 0,
- "ResumeFromException should be aligned");
- #endif
-diff --git a/js/src/jit/LIR.cpp b/js/src/jit/LIR.cpp
-index 2f89fb407349..a9f634b7fcc1 100644
---- a/js/src/jit/LIR.cpp
-+++ b/js/src/jit/LIR.cpp
-@@ -779,8 +779,8 @@ bool LMoveGroup::add(LAllocation from, LAllocation to, LDefinition::Type type) {
- // CodeGeneratorShared::CodeGeneratorShared and in general everywhere
- // SimdMemoryAignment is used. Likely, alignment requirements will return.
- # if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) || \
-- defined(JS_CODEGEN_ARM64)
-- // No need for any check on x86/x64/arm64.
-+ defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_PPC64)
-+ // No need for any check on x86/x64/arm64/ppc64.
- # else
- # error "Need to consider SIMD alignment on this target."
- // The following code may be of use if we need alignment checks on
-diff --git a/js/src/jit/LIR.h b/js/src/jit/LIR.h
-index 3f4efeda7955..3354cb96b0cb 100644
---- a/js/src/jit/LIR.h
-+++ b/js/src/jit/LIR.h
-@@ -200,7 +200,7 @@ class LUse : public LAllocation {
- static const uint32_t POLICY_BITS = 3;
- static const uint32_t POLICY_SHIFT = 0;
- static const uint32_t POLICY_MASK = (1 << POLICY_BITS) - 1;
--#ifdef JS_CODEGEN_ARM64
-+#if defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_PPC64)
- static const uint32_t REG_BITS = 7;
- #else
- static const uint32_t REG_BITS = 6;
-@@ -619,12 +619,18 @@ class LDefinition {
- Type type() const { return (Type)((bits_ >> TYPE_SHIFT) & TYPE_MASK); }
-
- static bool isFloatRegCompatible(Type type, FloatRegister reg) {
-+#if defined(JS_CODEGEN_PPC64) || defined(JS_CODEGEN_RISCV64)
-+ if (type == FLOAT32 || type == DOUBLE) {
-+ return reg.isSingle() || reg.isDouble();
-+ }
-+#else
- if (type == FLOAT32) {
- return reg.isSingle();
- }
- if (type == DOUBLE) {
- return reg.isDouble();
- }
-+#endif
- MOZ_ASSERT(type == SIMD128);
- return reg.isSimd128();
- }
-@@ -2292,6 +2298,8 @@ AnyRegister LAllocation::toAnyRegister() const {
- # include "jit/loong64/LIR-loong64.h"
- #elif defined(JS_CODEGEN_RISCV64)
- # include "jit/riscv64/LIR-riscv64.h"
-+#elif defined(JS_CODEGEN_PPC64)
-+# include "jit/ppc64/LIR-ppc64.h"
- #elif defined(JS_CODEGEN_MIPS64)
- # include "jit/mips-shared/LIR-mips-shared.h"
- # include "jit/mips64/LIR-mips64.h"
-diff --git a/js/src/jit/LIROps.yaml b/js/src/jit/LIROps.yaml
-index 315ff5fd5348..7fbea9e2ebec 100644
---- a/js/src/jit/LIROps.yaml
-+++ b/js/src/jit/LIROps.yaml
-@@ -2210,7 +2210,7 @@
- oldval: WordSized
- newval: WordSized
- # Needs additional temps on LL/SC platforms to extract/insert bits of word.
--#if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64)
-+#if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- num_temps: 4
- #else
- num_temps: 1
-@@ -2224,7 +2224,7 @@
- index: WordSized
- value: WordSized
- # Needs additional temps on LL/SC platforms to extract/insert bits of word.
--#if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64)
-+#if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- num_temps: 4
- #else
- num_temps: 1
-@@ -2238,7 +2238,7 @@
- index: WordSized
- value: WordSized
- # Needs additional temps on LL/SC platforms to extract/insert bits of word.
--#if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64)
-+#if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- num_temps: 4
- #else
- num_temps: 2
-@@ -2255,7 +2255,7 @@
- # Needs additional temps on LL/SC platforms to extract/insert bits of word.
- #if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
- num_temps: 1
--#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64)
-+#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- num_temps: 3
- #endif
- mir_op: AtomicTypedArrayElementBinop
-@@ -3066,7 +3066,7 @@
- operands:
- ptr: WordSized
- memoryBase: WordSized
--#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64)
-+#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- num_temps: 1
- #endif
- mir_op: true
-@@ -3078,7 +3078,7 @@
- memoryBase: WordSized
- #ifdef JS_CODEGEN_ARM
- num_temps: 2
--#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64)
-+#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- num_temps: 1
- #endif
- mir_op: WasmLoad
-@@ -3088,7 +3088,7 @@
- ptr: WordSized
- value: WordSized
- memoryBase: WordSized
--#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64)
-+#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- num_temps: 1
- #endif
- mir_op: true
-@@ -3098,7 +3098,7 @@
- ptr: WordSized
- value: Int64
- memoryBase: WordSized
--#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64)
-+#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- num_temps: 1
- #endif
- mir_op: WasmStore
-@@ -3128,7 +3128,7 @@
- memoryBase: WordSized
- #ifdef JS_CODEGEN_X86
- num_temps: 1
--#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64)
-+#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- # Temp that may be used on LL/SC platforms for extract/insert bits of word.
- num_temps: 3
- #endif
-@@ -3142,7 +3142,7 @@
- memoryBase: WordSized
- #ifdef JS_CODEGEN_X86
- num_temps: 1
--#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64)
-+#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- # Temp that may be used on LL/SC platforms for extract/insert bits of word.
- num_temps: 3
- #endif
-@@ -3154,7 +3154,7 @@
- ptr: WordSized
- value: WordSized
- memoryBase: WordSized
--#if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64)
-+#if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- # Temp that may be used on LL/SC platforms for extract/insert bits of word.
- num_temps: 3
- #elifdef JS_CODEGEN_X86
-@@ -3171,7 +3171,7 @@
- ptr: WordSized
- value: WordSized
- memoryBase: WordSized
--#if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64)
-+#if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- # Temp that may be used on LL/SC platforms for extract/insert bits of word.
- num_temps: 3
- #elif defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
-@@ -4424,6 +4424,64 @@
- mir_op: WasmAtomicExchangeHeap
- #endif
-
-+#ifdef JS_CODEGEN_PPC64
-+- name: DivOrModI64
-+ gen_boilerplate: false
-+
-+- name: UDivOrMod
-+ gen_boilerplate: false
-+
-+- name: UDivOrModI64
-+ gen_boilerplate: false
-+
-+- name: ModMaskI
-+ result_type: WordSized
-+ operands:
-+ input: WordSized
-+ arguments:
-+ shift: int32_t
-+ num_temps: 2
-+ mir_op: Mod
-+
-+- name: WasmTruncateToInt64
-+ result_type: Int64
-+ operands:
-+ input: WordSized
-+ mir_op: true
-+
-+- name: Int64ToFloatingPoint
-+ result_type: WordSized
-+ operands:
-+ input: Int64
-+ mir_op: true
-+
-+- name: WasmCompareExchangeI64
-+ result_type: Int64
-+ operands:
-+ ptr: WordSized
-+ oldValue: Int64
-+ newValue: Int64
-+ memoryBase: WordSized
-+ mir_op: WasmCompareExchangeHeap
-+
-+- name: WasmAtomicBinopI64
-+ result_type: Int64
-+ operands:
-+ ptr: WordSized
-+ value: Int64
-+ memoryBase: WordSized
-+ num_temps64: 1
-+ mir_op: WasmAtomicBinopHeap
-+
-+- name: WasmAtomicExchangeI64
-+ result_type: Int64
-+ operands:
-+ ptr: WordSized
-+ value: Int64
-+ memoryBase: WordSized
-+ mir_op: WasmAtomicExchangeHeap
-+#endif
-+
- #ifdef JS_CODEGEN_RISCV64
- - name: UDiv
- result_type: WordSized
-diff --git a/js/src/jit/Label.h b/js/src/jit/Label.h
-index 061bf978d26f..2a49ded9c967 100644
---- a/js/src/jit/Label.h
-+++ b/js/src/jit/Label.h
-@@ -23,7 +23,7 @@ struct LabelBase {
- uint32_t offset_ : 31;
-
- #if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
-- defined(JS_CODEGEN_RISCV64)
-+ defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- public:
- #endif
- static const uint32_t INVALID_OFFSET = 0x7fffffff; // UINT31_MAX.
-diff --git a/js/src/jit/Lowering.cpp b/js/src/jit/Lowering.cpp
-index 9c1c4b0df491..e3fe71ea9c83 100644
---- a/js/src/jit/Lowering.cpp
-+++ b/js/src/jit/Lowering.cpp
-@@ -1174,7 +1174,7 @@ void LIRGenerator::visitTest(MTest* test) {
-
- #if defined(ENABLE_WASM_SIMD) && \
- (defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) || \
-- defined(JS_CODEGEN_ARM64))
-+ defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_PPC64))
- // Check if the operand for this test is an any_true/all_true SIMD operation.
- // If it is, we want to emit an LWasmReduceAndBranchSimd128 node to avoid
- // generating an intermediate boolean result.
-diff --git a/js/src/jit/Lowering.h b/js/src/jit/Lowering.h
-index b4f133758eb6..d973a68989b5 100644
---- a/js/src/jit/Lowering.h
-+++ b/js/src/jit/Lowering.h
-@@ -23,6 +23,8 @@
- # include "jit/loong64/Lowering-loong64.h"
- #elif defined(JS_CODEGEN_RISCV64)
- # include "jit/riscv64/Lowering-riscv64.h"
-+#elif defined(JS_CODEGEN_PPC64)
-+# include "jit/ppc64/Lowering-ppc64.h"
- #elif defined(JS_CODEGEN_WASM32)
- # include "jit/wasm32/Lowering-wasm32.h"
- #elif defined(JS_CODEGEN_NONE)
-diff --git a/js/src/jit/MacroAssembler-inl.h b/js/src/jit/MacroAssembler-inl.h
-index 4747a22e171b..d7385df895d5 100644
---- a/js/src/jit/MacroAssembler-inl.h
-+++ b/js/src/jit/MacroAssembler-inl.h
-@@ -39,6 +39,8 @@
- # include "jit/loong64/MacroAssembler-loong64-inl.h"
- #elif defined(JS_CODEGEN_RISCV64)
- # include "jit/riscv64/MacroAssembler-riscv64-inl.h"
-+#elif defined(JS_CODEGEN_PPC64)
-+# include "jit/ppc64/MacroAssembler-ppc64-inl.h"
- #elif defined(JS_CODEGEN_WASM32)
- # include "jit/wasm32/MacroAssembler-wasm32-inl.h"
- #elif !defined(JS_CODEGEN_NONE)
-diff --git a/js/src/jit/MacroAssembler.cpp b/js/src/jit/MacroAssembler.cpp
-index eb95d6c9e2c4..5b28e811c88d 100644
---- a/js/src/jit/MacroAssembler.cpp
-+++ b/js/src/jit/MacroAssembler.cpp
-@@ -6128,7 +6128,7 @@ static void MoveDataBlock(MacroAssembler& masm, Register base, int32_t from,
- static constexpr Register scratch = ABINonArgReg0;
- masm.push(scratch);
- #elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
-- defined(JS_CODEGEN_RISCV64)
-+ defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- UseScratchRegisterScope temps(masm);
- Register scratch = temps.Acquire();
- #elif !defined(JS_CODEGEN_NONE)
-@@ -6315,6 +6315,12 @@ static void CollapseWasmFrameFast(MacroAssembler& masm,
-
- #ifdef JS_USE_LINK_REGISTER
- // RA is already in its place, just move stack.
-+# ifdef JS_CODEGEN_PPC64
-+ // PPC64's LR is not a GPR, so WasmTailCallRAScratchReg is a normal GPR
-+ // (r14). We must explicitly move it to LR so the callee's prologue
-+ // (pushReturnAddress) saves the correct return address.
-+ masm.xs_mtlr(tempForRA);
-+# endif
- masm.addToStackPtr(Imm32(framePushedAtStart + newArgDest));
- #else
- // Push RA to new frame: store RA, restore temp, and move stack.
-@@ -6463,6 +6469,12 @@ static void CollapseWasmFrameSlow(MacroAssembler& masm,
- #ifdef JS_USE_LINK_REGISTER
- masm.freeStack(reserved);
- // RA is already in its place, just move stack.
-+# ifdef JS_CODEGEN_PPC64
-+ // PPC64's LR is not a GPR, so WasmTailCallRAScratchReg is a normal GPR
-+ // (r14). We must explicitly move the trampoline address to LR so the
-+ // callee returns to the trampoline.
-+ masm.xs_mtlr(tempForRA);
-+# endif
- masm.addToStackPtr(Imm32(framePushedAtStart + newArgDest));
- #else
- // Push RA to new frame: store RA, restore temp, and move stack.
-@@ -8527,7 +8539,7 @@ void MacroAssembler::debugAssertCanonicalInt32(Register r) {
- breakpoint();
- bind(&ok);
- # elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
-- defined(JS_CODEGEN_RISCV64)
-+ defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- Label ok;
- UseScratchRegisterScope temps(*this);
- Register scratch = temps.Acquire();
-@@ -10567,6 +10579,15 @@ void MacroAssembler::orderedHashTableLookup(Register setOrMapObj,
- unboxInt32(Address(setOrMapObj, TableObject::offsetOfLiveCount()), temp1);
- branchTest32(Assembler::Zero, temp1, temp1, ¬Found);
-
-+#if defined(JS_CODEGEN_PPC64)
-+ // If this was preceded by a MoveGroup instruction, the hash may have been
-+ // loaded algebraically since it's an Int32 (and thus sign-extended); the
-+ // operation doesn't know to keep the upper bits clear, failing the assert.
-+ if (isBigInt == IsBigInt::No) {
-+ as_rldicl(hash, hash, 0, 32);
-+ }
-+#endif
-+
- #ifdef DEBUG
- PushRegsInMask(LiveRegisterSet(RegisterSet::Volatile()));
-
-diff --git a/js/src/jit/MacroAssembler.h b/js/src/jit/MacroAssembler.h
-index 6c08bb554ca8..754e8642bb57 100644
---- a/js/src/jit/MacroAssembler.h
-+++ b/js/src/jit/MacroAssembler.h
-@@ -23,6 +23,8 @@
- # include "jit/loong64/MacroAssembler-loong64.h"
- #elif defined(JS_CODEGEN_RISCV64)
- # include "jit/riscv64/MacroAssembler-riscv64.h"
-+#elif defined(JS_CODEGEN_PPC64)
-+# include "jit/ppc64/MacroAssembler-ppc64.h"
- #elif defined(JS_CODEGEN_WASM32)
- # include "jit/wasm32/MacroAssembler-wasm32.h"
- #elif defined(JS_CODEGEN_NONE)
-@@ -93,8 +95,9 @@
- // }
- // ////}}} check_macroassembler_style
-
--#define ALL_ARCH mips64, arm, arm64, x86, x64, loong64, riscv64, wasm32
--#define ALL_SHARED_ARCH arm, arm64, loong64, mips64, riscv64, x86_shared, wasm32
-+#define ALL_ARCH mips64, arm, arm64, x86, x64, loong64, riscv64, ppc64, wasm32
-+#define ALL_SHARED_ARCH \
-+ arm, arm64, loong64, mips64, riscv64, ppc64, x86_shared, wasm32
-
- // * How this macro works:
- //
-@@ -140,6 +143,7 @@
- #define DEFINED_ON_mips64
- #define DEFINED_ON_loong64
- #define DEFINED_ON_riscv64
-+#define DEFINED_ON_ppc64
- #define DEFINED_ON_wasm32
- #define DEFINED_ON_none
-
-@@ -169,6 +173,9 @@
- #elif defined(JS_CODEGEN_RISCV64)
- # undef DEFINED_ON_riscv64
- # define DEFINED_ON_riscv64 define
-+#elif defined(JS_CODEGEN_PPC64)
-+# undef DEFINED_ON_ppc64
-+# define DEFINED_ON_ppc64 define
- #elif defined(JS_CODEGEN_WASM32)
- # undef DEFINED_ON_wasm32
- # define DEFINED_ON_wasm32 define
-@@ -562,7 +569,7 @@ class MacroAssembler : public MacroAssemblerSpecific {
- void Pop(const Register64 reg);
- void PopFlags() DEFINED_ON(x86_shared);
- void PopStackPtr()
-- DEFINED_ON(arm, mips64, x86_shared, loong64, riscv64, wasm32);
-+ DEFINED_ON(arm, mips64, x86_shared, loong64, riscv64, ppc64, wasm32);
-
- // Move the stack pointer based on the requested amount.
- void adjustStack(int amount);
-@@ -620,9 +627,9 @@ class MacroAssembler : public MacroAssemblerSpecific {
-
- // These do not adjust framePushed().
- void pushReturnAddress()
-- DEFINED_ON(mips64, arm, arm64, loong64, riscv64, wasm32);
-+ DEFINED_ON(mips64, arm, arm64, loong64, riscv64, ppc64, wasm32);
- void popReturnAddress()
-- DEFINED_ON(mips64, arm, arm64, loong64, riscv64, wasm32);
-+ DEFINED_ON(mips64, arm, arm64, loong64, riscv64, ppc64, wasm32);
-
- // Useful for dealing with two-valued returns.
- void moveRegPair(Register src0, Register src1, Register dst0, Register dst1,
-@@ -641,7 +648,7 @@ class MacroAssembler : public MacroAssemblerSpecific {
- CodeOffset farJumpWithPatch() PER_SHARED_ARCH;
- void patchFarJump(CodeOffset farJump, uint32_t targetOffset) PER_SHARED_ARCH;
- static void patchFarJump(uint8_t* farJump, uint8_t* target)
-- DEFINED_ON(arm, arm64, x86_shared, loong64, mips64, riscv64);
-+ DEFINED_ON(arm, arm64, x86_shared, loong64, mips64, riscv64, ppc64);
-
- // Emit a nop that can be patched to and from a nop and a call with int32
- // relative displacement.
-@@ -667,9 +674,9 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // target behaviour is only provided for `n` in the range 0 .. 2^31-1
- // inclusive.
- CodeOffset move32WithPatch(Register dest)
-- DEFINED_ON(x86_shared, arm, arm64, loong64, mips64, riscv64);
-+ DEFINED_ON(x86_shared, arm, arm64, loong64, mips64, riscv64, ppc64);
- void patchMove32(CodeOffset offset, Imm32 n)
-- DEFINED_ON(x86_shared, arm, arm64, loong64, mips64, riscv64);
-+ DEFINED_ON(x86_shared, arm, arm64, loong64, mips64, riscv64, ppc64);
-
- public:
- // ===============================================================
-@@ -1174,13 +1181,13 @@ class MacroAssembler : public MacroAssemblerSpecific {
- inline void mulPtr(ImmWord rhs, Register srcDest) PER_ARCH;
-
- inline void mul64(const Register64& rhs, const Register64& srcDest)
-- DEFINED_ON(x64, arm64, mips64, loong64, riscv64);
-+ DEFINED_ON(x64, arm64, mips64, loong64, riscv64, ppc64);
- inline void mul64(const Operand& src, const Register64& dest) DEFINED_ON(x64);
- inline void mul64(const Operand& src, const Register64& dest,
- const Register temp) DEFINED_ON(x64);
- inline void mul64(Imm64 imm, const Register64& dest) PER_ARCH;
- inline void mul64(Imm64 imm, const Register64& dest, const Register temp)
-- DEFINED_ON(x86, x64, arm, mips64, loong64, riscv64);
-+ DEFINED_ON(x86, x64, arm, mips64, loong64, riscv64, ppc64);
- inline void mul64(const Register64& src, const Register64& dest,
- const Register temp) PER_ARCH;
- inline void mul64(const Register64& src1, const Register64& src2,
-@@ -1202,11 +1209,11 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // On ARM, the chip must have hardware division instructions.
- inline void quotient32(Register lhs, Register rhs, Register dest,
- bool isUnsigned)
-- DEFINED_ON(mips64, arm, arm64, loong64, riscv64, wasm32);
-+ DEFINED_ON(mips64, arm, arm64, loong64, riscv64, wasm32, ppc64);
-
- inline void quotient64(Register lhs, Register rhs, Register dest,
- bool isUnsigned)
-- DEFINED_ON(arm64, loong64, mips64, riscv64);
-+ DEFINED_ON(arm64, loong64, mips64, riscv64, ppc64);
-
- // As above, but lhs and dest must be eax and tempEdx must be edx.
- inline void quotient32(Register lhs, Register rhs, Register dest,
-@@ -1219,11 +1226,11 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // On ARM, the chip must have hardware division instructions.
- inline void remainder32(Register lhs, Register rhs, Register dest,
- bool isUnsigned)
-- DEFINED_ON(mips64, arm, arm64, loong64, riscv64, wasm32);
-+ DEFINED_ON(mips64, arm, arm64, loong64, riscv64, wasm32, ppc64);
-
- inline void remainder64(Register lhs, Register rhs, Register dest,
- bool isUnsigned)
-- DEFINED_ON(arm64, loong64, mips64, riscv64);
-+ DEFINED_ON(arm64, loong64, mips64, riscv64, ppc64);
-
- // As above, but lhs and dest must be eax and tempEdx must be edx.
- inline void remainder32(Register lhs, Register rhs, Register dest,
-@@ -2080,7 +2087,7 @@ class MacroAssembler : public MacroAssemblerSpecific {
- template <typename T>
- void branchValueIsNurseryCellImpl(Condition cond, const T& value,
- Register temp, Label* label)
-- DEFINED_ON(arm64, x64, mips64, loong64, riscv64);
-+ DEFINED_ON(arm64, x64, mips64, loong64, riscv64, ppc64);
-
- template <typename T>
- inline void branchTestUndefinedImpl(Condition cond, const T& t, Label* label)
-@@ -2245,7 +2252,7 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // from all the other registers, on all supported targets.
- inline void wasmAddSubI128HI64(Register lhsLo, Register lhsHi, Register rhsLo,
- Register rhsHi, Register output, bool isAdd)
-- DEFINED_ON(x64, arm64, riscv64, loong64, mips64);
-+ DEFINED_ON(x64, arm64, riscv64, loong64, mips64, ppc64);
-
- // Produces the top 64 bits of the 128-bit value `RAX *widen rhs`. The result
- // will be in RAX. RDX is trashed. `rhs` may not be RAX or RDX. Callers
-@@ -2256,7 +2263,7 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // what the registers may be.
- inline void wasmMulI64WideHI64(Register lhs, Register rhs, Register output,
- bool isSigned)
-- DEFINED_ON(arm64, riscv64, loong64, mips64);
-+ DEFINED_ON(arm64, riscv64, loong64, mips64, ppc64);
-
- // ========================================================================
- // Canonicalization primitives.
-@@ -2355,68 +2362,68 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // Moves
-
- inline void moveSimd128(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Constants
-
- inline void loadConstantSimd128(const SimdConstant& v, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Splat
-
- inline void splatX16(Register src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void splatX16(uint32_t srcLane, FloatRegister src, FloatRegister dest)
- DEFINED_ON(arm64);
-
- inline void splatX8(Register src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void splatX8(uint32_t srcLane, FloatRegister src, FloatRegister dest)
- DEFINED_ON(arm64);
-
- inline void splatX4(Register src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void splatX4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void splatX2(Register64 src, FloatRegister dest)
-- DEFINED_ON(x86, x64, arm64);
-+ DEFINED_ON(x86, x64, arm64, ppc64);
-
- inline void splatX2(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Extract lane as scalar. Float extraction does not canonicalize the value.
-
- inline void extractLaneInt8x16(uint32_t lane, FloatRegister src,
-- Register dest) DEFINED_ON(x86_shared, arm64);
-+ Register dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedExtractLaneInt8x16(uint32_t lane, FloatRegister src,
- Register dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void extractLaneInt16x8(uint32_t lane, FloatRegister src,
-- Register dest) DEFINED_ON(x86_shared, arm64);
-+ Register dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedExtractLaneInt16x8(uint32_t lane, FloatRegister src,
- Register dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void extractLaneInt32x4(uint32_t lane, FloatRegister src,
-- Register dest) DEFINED_ON(x86_shared, arm64);
-+ Register dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void extractLaneInt64x2(uint32_t lane, FloatRegister src,
-- Register64 dest) DEFINED_ON(x86, x64, arm64);
-+ Register64 dest) DEFINED_ON(x86, x64, arm64, ppc64);
-
- inline void extractLaneFloat32x4(uint32_t lane, FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void extractLaneFloat64x2(uint32_t lane, FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Replace lane value
-
-@@ -2425,21 +2432,21 @@ class MacroAssembler : public MacroAssemblerSpecific {
-
- inline void replaceLaneInt8x16(unsigned lane, Register rhs,
- FloatRegister lhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void replaceLaneInt16x8(unsigned lane, FloatRegister lhs, Register rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void replaceLaneInt16x8(unsigned lane, Register rhs,
- FloatRegister lhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void replaceLaneInt32x4(unsigned lane, FloatRegister lhs, Register rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void replaceLaneInt32x4(unsigned lane, Register rhs,
- FloatRegister lhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void replaceLaneInt64x2(unsigned lane, FloatRegister lhs,
- Register64 rhs, FloatRegister dest)
-@@ -2447,7 +2454,7 @@ class MacroAssembler : public MacroAssemblerSpecific {
-
- inline void replaceLaneInt64x2(unsigned lane, Register64 rhs,
- FloatRegister lhsDest)
-- DEFINED_ON(x86, x64, arm64);
-+ DEFINED_ON(x86, x64, arm64, ppc64);
-
- inline void replaceLaneFloat32x4(unsigned lane, FloatRegister lhs,
- FloatRegister rhs, FloatRegister dest)
-@@ -2455,7 +2462,7 @@ class MacroAssembler : public MacroAssemblerSpecific {
-
- inline void replaceLaneFloat32x4(unsigned lane, FloatRegister rhs,
- FloatRegister lhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void replaceLaneFloat64x2(unsigned lane, FloatRegister lhs,
- FloatRegister rhs, FloatRegister dest)
-@@ -2463,7 +2470,7 @@ class MacroAssembler : public MacroAssemblerSpecific {
-
- inline void replaceLaneFloat64x2(unsigned lane, FloatRegister rhs,
- FloatRegister lhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Shuffle - blend and permute with immediate indices, and its many
- // specializations. Lane values other than those mentioned are illegal.
-@@ -2471,11 +2478,11 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // lane values 0..31
- inline void shuffleInt8x16(const uint8_t lanes[16], FloatRegister rhs,
- FloatRegister lhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void shuffleInt8x16(const uint8_t lanes[16], FloatRegister lhs,
- FloatRegister rhs, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Lane values must be 0 (select from lhs) or FF (select from rhs).
- // The behavior is undefined for lane values that are neither 0 nor FF.
-@@ -2502,39 +2509,39 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // The implementation works effectively for I8x16, I16x8, I32x4, and I64x2.
- inline void laneSelectSimd128(FloatRegister mask, FloatRegister lhs,
- FloatRegister rhs, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void interleaveHighInt8x16(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void interleaveHighInt16x8(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void interleaveHighInt32x4(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void interleaveHighInt64x2(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void interleaveLowInt8x16(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void interleaveLowInt16x8(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void interleaveLowInt32x4(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void interleaveLowInt64x2(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Permute - permute with immediate indices.
-
-@@ -2544,7 +2551,7 @@ class MacroAssembler : public MacroAssemblerSpecific {
-
- // lane values 0..7
- inline void permuteInt16x8(const uint16_t lanes[8], FloatRegister src,
-- FloatRegister dest) DEFINED_ON(arm64);
-+ FloatRegister dest) DEFINED_ON(arm64, ppc64);
-
- // lane values 0..3 [sic].
- inline void permuteHighInt16x8(const uint16_t lanes[4], FloatRegister src,
-@@ -2562,80 +2569,80 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // low_16_bytes_of((lhs ++ rhs) >> shift*8), shift must be < 16
- inline void concatAndRightShiftSimd128(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest, uint32_t shift)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Rotate right by immediate count:
- // low_16_bytes_of((src ++ src) >> shift*8), shift must be < 16
- inline void rotateRightSimd128(FloatRegister src, FloatRegister dest,
-- uint32_t shift) DEFINED_ON(arm64);
-+ uint32_t shift) DEFINED_ON(arm64, ppc64);
-
- // Shift bytes with immediate count, shifting in zeroes. Shift count 0..15.
-
- inline void leftShiftSimd128(Imm32 count, FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void rightShiftSimd128(Imm32 count, FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Zero extend int values.
-
- inline void zeroExtend8x16To16x8(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
- inline void zeroExtend8x16To32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
- inline void zeroExtend8x16To64x2(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
- inline void zeroExtend16x8To32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
- inline void zeroExtend16x8To64x2(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
- inline void zeroExtend32x4To64x2(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Reverse bytes in lanes.
-
- inline void reverseInt16x8(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void reverseInt32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void reverseInt64x2(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Swizzle - permute with variable indices. `rhs` holds the lanes parameter.
-
- inline void swizzleInt8x16(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void swizzleInt8x16Relaxed(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Integer Add
-
- inline void addInt8x16(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void addInt8x16(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void addInt16x8(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void addInt16x8(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void addInt32x4(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void addInt32x4(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void addInt64x2(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void addInt64x2(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-@@ -2643,13 +2650,13 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // Integer Subtract
-
- inline void subInt8x16(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void subInt8x16(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void subInt16x8(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void subInt16x8(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-@@ -2658,24 +2665,24 @@ class MacroAssembler : public MacroAssemblerSpecific {
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void subInt32x4(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void subInt64x2(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void subInt64x2(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Integer Multiply
-
- inline void mulInt16x8(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void mulInt16x8(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void mulInt32x4(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void mulInt32x4(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-@@ -2691,100 +2698,100 @@ class MacroAssembler : public MacroAssemblerSpecific {
-
- inline void mulInt64x2(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest, FloatRegister temp1,
-- FloatRegister temp2) DEFINED_ON(arm64);
-+ FloatRegister temp2) DEFINED_ON(arm64, ppc64);
-
- // Note for the extMul opcodes, the NxM designation is for the input lanes;
- // the output lanes are twice as wide.
- inline void extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedExtMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedExtMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedExtMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedExtMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedExtMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedExtMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Integer Negate
-
- inline void negInt8x16(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void negInt16x8(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void negInt32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void negInt64x2(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Saturating integer add
-
- inline void addSatInt8x16(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void addSatInt8x16(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void unsignedAddSatInt8x16(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedAddSatInt8x16(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void addSatInt16x8(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void addSatInt16x8(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void unsignedAddSatInt16x8(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedAddSatInt16x8(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-@@ -2792,27 +2799,27 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // Saturating integer subtract
-
- inline void subSatInt8x16(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void subSatInt8x16(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void unsignedSubSatInt8x16(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedSubSatInt8x16(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void subSatInt16x8(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void subSatInt16x8(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void unsignedSubSatInt16x8(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedSubSatInt16x8(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-@@ -2820,40 +2827,40 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // Lane-wise integer minimum
-
- inline void minInt8x16(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void minInt8x16(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void unsignedMinInt8x16(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedMinInt8x16(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void minInt16x8(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void minInt16x8(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void unsignedMinInt16x8(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedMinInt16x8(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void minInt32x4(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void minInt32x4(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void unsignedMinInt32x4(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedMinInt32x4(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-@@ -2861,40 +2868,40 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // Lane-wise integer maximum
-
- inline void maxInt8x16(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void maxInt8x16(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void unsignedMaxInt8x16(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedMaxInt8x16(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void maxInt16x8(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void maxInt16x8(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void unsignedMaxInt16x8(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedMaxInt16x8(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void maxInt32x4(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void maxInt32x4(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void unsignedMaxInt32x4(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedMaxInt32x4(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-@@ -2903,25 +2910,25 @@ class MacroAssembler : public MacroAssemblerSpecific {
-
- inline void unsignedAverageInt8x16(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedAverageInt16x8(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Lane-wise integer absolute value
-
- inline void absInt8x16(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void absInt16x8(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void absInt32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void absInt64x2(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Left shift by scalar. Immediates and variable shifts must have been
- // masked; shifts of zero will work but may or may not generate code.
-@@ -2930,41 +2937,41 @@ class MacroAssembler : public MacroAssemblerSpecific {
- FloatRegister temp) DEFINED_ON(x86_shared);
-
- inline void leftShiftInt8x16(FloatRegister lhs, Register rhs,
-- FloatRegister dest) DEFINED_ON(arm64);
-+ FloatRegister dest) DEFINED_ON(arm64, ppc64);
-
- inline void leftShiftInt8x16(Imm32 count, FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void leftShiftInt16x8(Register rhs, FloatRegister lhsDest)
- DEFINED_ON(x86_shared);
-
- inline void leftShiftInt16x8(FloatRegister lhs, Register rhs,
-- FloatRegister dest) DEFINED_ON(arm64);
-+ FloatRegister dest) DEFINED_ON(arm64, ppc64);
-
- inline void leftShiftInt16x8(Imm32 count, FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void leftShiftInt32x4(Register rhs, FloatRegister lhsDest)
- DEFINED_ON(x86_shared);
-
- inline void leftShiftInt32x4(FloatRegister lhs, Register rhs,
-- FloatRegister dest) DEFINED_ON(arm64);
-+ FloatRegister dest) DEFINED_ON(arm64, ppc64);
-
- inline void leftShiftInt32x4(Imm32 count, FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void leftShiftInt64x2(Register rhs, FloatRegister lhsDest)
- DEFINED_ON(x86_shared);
-
- inline void leftShiftInt64x2(FloatRegister lhs, Register rhs,
-- FloatRegister dest) DEFINED_ON(arm64);
-+ FloatRegister dest) DEFINED_ON(arm64, ppc64);
-
- inline void leftShiftInt64x2(Imm32 count, FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Right shift by scalar. Immediates and variable shifts must have been
- // masked; shifts of zero will work but may or may not generate code.
-@@ -2973,82 +2980,82 @@ class MacroAssembler : public MacroAssemblerSpecific {
- FloatRegister temp) DEFINED_ON(x86_shared);
-
- inline void rightShiftInt8x16(FloatRegister lhs, Register rhs,
-- FloatRegister dest) DEFINED_ON(arm64);
-+ FloatRegister dest) DEFINED_ON(arm64, ppc64);
-
- inline void rightShiftInt8x16(Imm32 count, FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedRightShiftInt8x16(Register rhs, FloatRegister lhsDest,
- FloatRegister temp)
- DEFINED_ON(x86_shared);
-
- inline void unsignedRightShiftInt8x16(FloatRegister lhs, Register rhs,
-- FloatRegister dest) DEFINED_ON(arm64);
-+ FloatRegister dest) DEFINED_ON(arm64, ppc64);
-
- inline void unsignedRightShiftInt8x16(Imm32 count, FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void rightShiftInt16x8(Register rhs, FloatRegister lhsDest)
- DEFINED_ON(x86_shared);
-
- inline void rightShiftInt16x8(FloatRegister lhs, Register rhs,
-- FloatRegister dest) DEFINED_ON(arm64);
-+ FloatRegister dest) DEFINED_ON(arm64, ppc64);
-
- inline void rightShiftInt16x8(Imm32 count, FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedRightShiftInt16x8(Register rhs, FloatRegister lhsDest)
- DEFINED_ON(x86_shared);
-
- inline void unsignedRightShiftInt16x8(FloatRegister lhs, Register rhs,
-- FloatRegister dest) DEFINED_ON(arm64);
-+ FloatRegister dest) DEFINED_ON(arm64, ppc64);
-
- inline void unsignedRightShiftInt16x8(Imm32 count, FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void rightShiftInt32x4(Register rhs, FloatRegister lhsDest)
- DEFINED_ON(x86_shared);
-
- inline void rightShiftInt32x4(FloatRegister lhs, Register rhs,
-- FloatRegister dest) DEFINED_ON(arm64);
-+ FloatRegister dest) DEFINED_ON(arm64, ppc64);
-
- inline void rightShiftInt32x4(Imm32 count, FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedRightShiftInt32x4(Register rhs, FloatRegister lhsDest)
- DEFINED_ON(x86_shared);
-
- inline void unsignedRightShiftInt32x4(FloatRegister lhs, Register rhs,
-- FloatRegister dest) DEFINED_ON(arm64);
-+ FloatRegister dest) DEFINED_ON(arm64, ppc64);
-
- inline void unsignedRightShiftInt32x4(Imm32 count, FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void rightShiftInt64x2(Register rhs, FloatRegister lhsDest,
- FloatRegister temp) DEFINED_ON(x86_shared);
-
- inline void rightShiftInt64x2(Imm32 count, FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void rightShiftInt64x2(FloatRegister lhs, Register rhs,
-- FloatRegister dest) DEFINED_ON(arm64);
-+ FloatRegister dest) DEFINED_ON(arm64, ppc64);
-
- inline void unsignedRightShiftInt64x2(Register rhs, FloatRegister lhsDest)
- DEFINED_ON(x86_shared);
-
- inline void unsignedRightShiftInt64x2(FloatRegister lhs, Register rhs,
-- FloatRegister dest) DEFINED_ON(arm64);
-+ FloatRegister dest) DEFINED_ON(arm64, ppc64);
-
- inline void unsignedRightShiftInt64x2(Imm32 count, FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Sign replication operation
-
-@@ -3067,47 +3074,47 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // Bitwise and, or, xor, not
-
- inline void bitwiseAndSimd128(FloatRegister rhs, FloatRegister lhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void bitwiseAndSimd128(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void bitwiseAndSimd128(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void bitwiseOrSimd128(FloatRegister rhs, FloatRegister lhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void bitwiseOrSimd128(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void bitwiseOrSimd128(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void bitwiseXorSimd128(FloatRegister rhs, FloatRegister lhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void bitwiseXorSimd128(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void bitwiseXorSimd128(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void bitwiseNotSimd128(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Bitwise AND with compliment: dest = lhs & ~rhs, note only arm64 can do it.
- inline void bitwiseAndNotSimd128(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister lhsDest) DEFINED_ON(arm64);
-+ FloatRegister lhsDest) DEFINED_ON(arm64, ppc64);
-
- // Bitwise AND with complement: dest = ~lhs & rhs, note this is not what Wasm
- // wants but what the x86 hardware offers. Hence the name.
-
- inline void bitwiseNotAndSimd128(FloatRegister rhs, FloatRegister lhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void bitwiseNotAndSimd128(FloatRegister lhs, FloatRegister rhs,
- FloatRegister lhsDest)
-@@ -3120,34 +3127,34 @@ class MacroAssembler : public MacroAssemblerSpecific {
- FloatRegister temp) DEFINED_ON(x86_shared);
-
- inline void bitwiseSelectSimd128(FloatRegister onTrue, FloatRegister onFalse,
-- FloatRegister maskDest) DEFINED_ON(arm64);
-+ FloatRegister maskDest) DEFINED_ON(arm64, ppc64);
-
- // Population count
-
- inline void popcntInt8x16(FloatRegister src, FloatRegister dest,
-- FloatRegister temp) DEFINED_ON(x86_shared);
-+ FloatRegister temp) DEFINED_ON(x86_shared, ppc64);
-
- inline void popcntInt8x16(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(arm64);
-+ DEFINED_ON(arm64, ppc64);
-
- // Any lane true, ie, any bit set
-
- inline void anyTrueSimd128(FloatRegister src, Register dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // All lanes true
-
- inline void allTrueInt8x16(FloatRegister src, Register dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void allTrueInt16x8(FloatRegister src, Register dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void allTrueInt32x4(FloatRegister src, Register dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void allTrueInt64x2(FloatRegister src, Register dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Bitmask, ie extract and compress high bits of all lanes
-
-@@ -3155,31 +3162,31 @@ class MacroAssembler : public MacroAssemblerSpecific {
- DEFINED_ON(x86_shared);
-
- inline void bitmaskInt8x16(FloatRegister src, Register dest,
-- FloatRegister temp) DEFINED_ON(arm64);
-+ FloatRegister temp) DEFINED_ON(arm64, ppc64);
-
- inline void bitmaskInt16x8(FloatRegister src, Register dest)
- DEFINED_ON(x86_shared);
-
- inline void bitmaskInt16x8(FloatRegister src, Register dest,
-- FloatRegister temp) DEFINED_ON(arm64);
-+ FloatRegister temp) DEFINED_ON(arm64, ppc64);
-
- inline void bitmaskInt32x4(FloatRegister src, Register dest)
- DEFINED_ON(x86_shared);
-
- inline void bitmaskInt32x4(FloatRegister src, Register dest,
-- FloatRegister temp) DEFINED_ON(arm64);
-+ FloatRegister temp) DEFINED_ON(arm64, ppc64);
-
- inline void bitmaskInt64x2(FloatRegister src, Register dest)
- DEFINED_ON(x86_shared);
-
- inline void bitmaskInt64x2(FloatRegister src, Register dest,
-- FloatRegister temp) DEFINED_ON(arm64);
-+ FloatRegister temp) DEFINED_ON(arm64, ppc64);
-
- // Comparisons (integer and floating-point)
-
- inline void compareInt8x16(Assembler::Condition cond, FloatRegister rhs,
- FloatRegister lhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // On x86_shared, limited to !=, ==, <=, >
- inline void compareInt8x16(Assembler::Condition cond, FloatRegister lhs,
-@@ -3189,15 +3196,15 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // On arm64, use any integer comparison condition.
- inline void compareInt8x16(Assembler::Condition cond, FloatRegister lhs,
- FloatRegister rhs, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void compareInt16x8(Assembler::Condition cond, FloatRegister rhs,
- FloatRegister lhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void compareInt16x8(Assembler::Condition cond, FloatRegister lhs,
- FloatRegister rhs, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // On x86_shared, limited to !=, ==, <=, >
- inline void compareInt16x8(Assembler::Condition cond, FloatRegister lhs,
-@@ -3207,7 +3214,7 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // On x86_shared, limited to !=, ==, <=, >
- inline void compareInt32x4(Assembler::Condition cond, FloatRegister rhs,
- FloatRegister lhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void compareInt32x4(Assembler::Condition cond, FloatRegister lhs,
- const SimdConstant& rhs, FloatRegister dest)
-@@ -3216,7 +3223,7 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // On arm64, use any integer comparison condition.
- inline void compareInt32x4(Assembler::Condition cond, FloatRegister lhs,
- FloatRegister rhs, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void compareForEqualityInt64x2(Assembler::Condition cond,
- FloatRegister lhs, FloatRegister rhs,
-@@ -3230,15 +3237,15 @@ class MacroAssembler : public MacroAssemblerSpecific {
- DEFINED_ON(x86_shared);
-
- inline void compareInt64x2(Assembler::Condition cond, FloatRegister rhs,
-- FloatRegister lhsDest) DEFINED_ON(arm64);
-+ FloatRegister lhsDest) DEFINED_ON(arm64, ppc64);
-
- inline void compareInt64x2(Assembler::Condition cond, FloatRegister lhs,
- FloatRegister rhs, FloatRegister dest)
-- DEFINED_ON(arm64);
-+ DEFINED_ON(arm64, ppc64);
-
- inline void compareFloat32x4(Assembler::Condition cond, FloatRegister rhs,
- FloatRegister lhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // On x86_shared, limited to ==, !=, <, <=
- inline void compareFloat32x4(Assembler::Condition cond, FloatRegister lhs,
-@@ -3249,11 +3256,11 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // On arm64, use any float-point comparison condition.
- inline void compareFloat32x4(Assembler::Condition cond, FloatRegister lhs,
- FloatRegister rhs, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void compareFloat64x2(Assembler::Condition cond, FloatRegister rhs,
- FloatRegister lhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // On x86_shared, limited to ==, !=, <, <=
- inline void compareFloat64x2(Assembler::Condition cond, FloatRegister lhs,
-@@ -3264,7 +3271,7 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // On arm64, use any float-point comparison condition.
- inline void compareFloat64x2(Assembler::Condition cond, FloatRegister lhs,
- FloatRegister rhs, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Load
-
-@@ -3273,92 +3280,92 @@ class MacroAssembler : public MacroAssemblerSpecific {
-
- inline FaultingCodeOffset loadUnalignedSimd128(const Address& src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline FaultingCodeOffset loadUnalignedSimd128(const BaseIndex& src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Store
-
- inline FaultingCodeOffset storeUnalignedSimd128(FloatRegister src,
- const Address& dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline FaultingCodeOffset storeUnalignedSimd128(FloatRegister src,
- const BaseIndex& dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Floating point negation
-
- inline void negFloat32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void negFloat64x2(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Floating point absolute value
-
- inline void absFloat32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void absFloat64x2(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // NaN-propagating minimum
-
- inline void minFloat32x4(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest, FloatRegister temp1,
-- FloatRegister temp2) DEFINED_ON(x86_shared);
-+ FloatRegister temp2) DEFINED_ON(x86_shared, ppc64);
-
- inline void minFloat32x4(FloatRegister rhs, FloatRegister lhsDest)
-- DEFINED_ON(arm64);
-+ DEFINED_ON(arm64, ppc64);
-
- inline void minFloat32x4(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(arm64);
-+ FloatRegister dest) DEFINED_ON(arm64, ppc64);
-
- inline void minFloat64x2(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest, FloatRegister temp1,
-- FloatRegister temp2) DEFINED_ON(x86_shared);
-+ FloatRegister temp2) DEFINED_ON(x86_shared, ppc64);
-
- inline void minFloat64x2(FloatRegister rhs, FloatRegister lhsDest)
-- DEFINED_ON(arm64);
-+ DEFINED_ON(arm64, ppc64);
-
- inline void minFloat64x2(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(arm64);
-+ FloatRegister dest) DEFINED_ON(arm64, ppc64);
-
- // NaN-propagating maximum
-
- inline void maxFloat32x4(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest, FloatRegister temp1,
-- FloatRegister temp2) DEFINED_ON(x86_shared);
-+ FloatRegister temp2) DEFINED_ON(x86_shared, ppc64);
-
- inline void maxFloat32x4(FloatRegister rhs, FloatRegister lhsDest)
-- DEFINED_ON(arm64);
-+ DEFINED_ON(arm64, ppc64);
-
- inline void maxFloat32x4(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(arm64);
-+ FloatRegister dest) DEFINED_ON(arm64, ppc64);
-
- inline void maxFloat64x2(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest, FloatRegister temp1,
-- FloatRegister temp2) DEFINED_ON(x86_shared);
-+ FloatRegister temp2) DEFINED_ON(x86_shared, ppc64);
-
- inline void maxFloat64x2(FloatRegister rhs, FloatRegister lhsDest)
-- DEFINED_ON(arm64);
-+ DEFINED_ON(arm64, ppc64);
-
- inline void maxFloat64x2(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(arm64);
-+ FloatRegister dest) DEFINED_ON(arm64, ppc64);
-
- // Floating add
-
- inline void addFloat32x4(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void addFloat32x4(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void addFloat64x2(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void addFloat64x2(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-@@ -3366,13 +3373,13 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // Floating subtract
-
- inline void subFloat32x4(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void subFloat32x4(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void subFloat64x2(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void subFloat64x2(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-@@ -3380,13 +3387,13 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // Floating division
-
- inline void divFloat32x4(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void divFloat32x4(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void divFloat64x2(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void divFloat64x2(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-@@ -3394,13 +3401,13 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // Floating Multiply
-
- inline void mulFloat32x4(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void mulFloat32x4(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void mulFloat64x2(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void mulFloat64x2(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-@@ -3408,91 +3415,91 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // Pairwise add
-
- inline void extAddPairwiseInt8x16(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedExtAddPairwiseInt8x16(FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void extAddPairwiseInt16x8(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedExtAddPairwiseInt16x8(FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Floating square root
-
- inline void sqrtFloat32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void sqrtFloat64x2(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Integer to floating point with rounding
-
- inline void convertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedConvertInt32x4ToFloat32x4(FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void convertInt32x4ToFloat64x2(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedConvertInt32x4ToFloat64x2(FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Floating point to integer with saturation
-
- inline void truncSatFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedTruncSatFloat32x4ToInt32x4(FloatRegister src,
- FloatRegister dest,
- FloatRegister temp)
-- DEFINED_ON(x86_shared);
-+ DEFINED_ON(x86_shared, ppc64);
-
- inline void unsignedTruncSatFloat32x4ToInt32x4(FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(arm64);
-+ DEFINED_ON(arm64, ppc64);
-
- inline void truncSatFloat64x2ToInt32x4(FloatRegister src, FloatRegister dest,
- FloatRegister temp)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedTruncSatFloat64x2ToInt32x4(FloatRegister src,
- FloatRegister dest,
- FloatRegister temp)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void truncFloat32x4ToInt32x4Relaxed(FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedTruncFloat32x4ToInt32x4Relaxed(FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void truncFloat64x2ToInt32x4Relaxed(FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedTruncFloat64x2ToInt32x4Relaxed(FloatRegister src,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Floating point narrowing
-
- inline void convertFloat64x2ToFloat32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Floating point widening
-
- inline void convertFloat32x4ToFloat64x2(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Integer to integer narrowing
-
-@@ -3500,65 +3507,65 @@ class MacroAssembler : public MacroAssemblerSpecific {
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void narrowInt16x8(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedNarrowInt16x8(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void unsignedNarrowInt16x8(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void narrowInt32x4(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void narrowInt32x4(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedNarrowInt32x4(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void unsignedNarrowInt32x4(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Integer to integer widening
-
- inline void widenLowInt8x16(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void widenHighInt8x16(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedWidenLowInt8x16(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedWidenHighInt8x16(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void widenLowInt16x8(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void widenHighInt16x8(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedWidenLowInt16x8(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedWidenHighInt16x8(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void widenLowInt32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedWidenLowInt32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void widenHighInt32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void unsignedWidenHighInt32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Compare-based minimum/maximum
- //
-@@ -3570,47 +3577,47 @@ class MacroAssembler : public MacroAssemblerSpecific {
-
- inline void pseudoMinFloat32x4(FloatRegister rhsOrRhsDest,
- FloatRegister lhsOrLhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void pseudoMinFloat32x4(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void pseudoMinFloat64x2(FloatRegister rhsOrRhsDest,
- FloatRegister lhsOrLhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void pseudoMinFloat64x2(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void pseudoMaxFloat32x4(FloatRegister rhsOrRhsDest,
- FloatRegister lhsOrLhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void pseudoMaxFloat32x4(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void pseudoMaxFloat64x2(FloatRegister rhsOrRhsDest,
- FloatRegister lhsOrLhsDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void pseudoMaxFloat64x2(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Widening/pairwise integer dot product
-
- inline void widenDotInt16x8(FloatRegister lhs, FloatRegister rhs,
-- FloatRegister dest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister dest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void widenDotInt16x8(FloatRegister lhs, const SimdConstant& rhs,
- FloatRegister dest) DEFINED_ON(x86_shared);
-
- inline void dotInt8x16Int7x16(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void dotInt8x16Int7x16ThenAdd(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-@@ -3618,81 +3625,81 @@ class MacroAssembler : public MacroAssemblerSpecific {
-
- inline void dotInt8x16Int7x16ThenAdd(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest, FloatRegister temp)
-- DEFINED_ON(arm64);
-+ DEFINED_ON(arm64, ppc64);
-
- // Floating point rounding
-
- inline void ceilFloat32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void ceilFloat64x2(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void floorFloat32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void floorFloat64x2(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void truncFloat32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void truncFloat64x2(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void nearestFloat32x4(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void nearestFloat64x2(FloatRegister src, FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- // Floating multiply-accumulate: srcDest [+-]= src1 * src2
-
- inline void fmaFloat32x4(FloatRegister src1, FloatRegister src2,
-- FloatRegister srcDest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister srcDest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void fnmaFloat32x4(FloatRegister src1, FloatRegister src2,
- FloatRegister srcDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void fmaFloat64x2(FloatRegister src1, FloatRegister src2,
-- FloatRegister srcDest) DEFINED_ON(x86_shared, arm64);
-+ FloatRegister srcDest) DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void fnmaFloat64x2(FloatRegister src1, FloatRegister src2,
- FloatRegister srcDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void minFloat32x4Relaxed(FloatRegister src, FloatRegister srcDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void minFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void maxFloat32x4Relaxed(FloatRegister src, FloatRegister srcDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void maxFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void minFloat64x2Relaxed(FloatRegister src, FloatRegister srcDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void minFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void maxFloat64x2Relaxed(FloatRegister src, FloatRegister srcDest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void maxFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- inline void q15MulrInt16x8Relaxed(FloatRegister lhs, FloatRegister rhs,
- FloatRegister dest)
-- DEFINED_ON(x86_shared, arm64);
-+ DEFINED_ON(x86_shared, arm64, ppc64);
-
- public:
- // ========================================================================
-@@ -3717,10 +3724,10 @@ class MacroAssembler : public MacroAssemblerSpecific {
-
- // temp required on x86 and x64; must be undefined on mips64 and loong64.
- void convertUInt64ToFloat32(Register64 src, FloatRegister dest, Register temp)
-- DEFINED_ON(arm64, mips64, loong64, riscv64, wasm32, x64, x86);
-+ DEFINED_ON(arm64, mips64, loong64, ppc64, riscv64, wasm32, x64, x86);
-
- void convertInt64ToFloat32(Register64 src, FloatRegister dest)
-- DEFINED_ON(arm64, mips64, loong64, riscv64, wasm32, x64, x86);
-+ DEFINED_ON(arm64, mips64, loong64, ppc64, riscv64, wasm32, x64, x86);
-
- bool convertUInt64ToDoubleNeedsTemp() PER_ARCH;
-
-@@ -3801,16 +3808,16 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // Scalar::Int64.
- void wasmLoad(const wasm::MemoryAccessDesc& access, Register memoryBase,
- Register ptr, Register ptrScratch, AnyRegister output)
-- DEFINED_ON(arm, loong64, riscv64, mips64);
-+ DEFINED_ON(arm, loong64, riscv64, mips64, ppc64);
- void wasmLoadI64(const wasm::MemoryAccessDesc& access, Register memoryBase,
- Register ptr, Register ptrScratch, Register64 output)
-- DEFINED_ON(arm, mips64, loong64, riscv64);
-+ DEFINED_ON(arm, mips64, loong64, riscv64, ppc64);
- void wasmStore(const wasm::MemoryAccessDesc& access, AnyRegister value,
- Register memoryBase, Register ptr, Register ptrScratch)
-- DEFINED_ON(arm, loong64, riscv64, mips64);
-+ DEFINED_ON(arm, loong64, riscv64, mips64, ppc64);
- void wasmStoreI64(const wasm::MemoryAccessDesc& access, Register64 value,
- Register memoryBase, Register ptr, Register ptrScratch)
-- DEFINED_ON(arm, mips64, loong64, riscv64);
-+ DEFINED_ON(arm, mips64, loong64, riscv64, ppc64);
-
- // These accept general memoryBase + ptr + offset (in `access`); the offset is
- // always smaller than the guard region. They will insert an additional add
-@@ -3889,11 +3896,11 @@ class MacroAssembler : public MacroAssemblerSpecific {
- void wasmTruncateDoubleToInt64(FloatRegister input, Register64 output,
- bool isSaturating, Label* oolEntry,
- Label* oolRejoin, FloatRegister tempDouble)
-- DEFINED_ON(arm64, x86, x64, mips64, loong64, riscv64, wasm32);
-+ DEFINED_ON(arm64, x86, x64, mips64, loong64, riscv64, wasm32, ppc64);
- void wasmTruncateDoubleToUInt64(FloatRegister input, Register64 output,
- bool isSaturating, Label* oolEntry,
- Label* oolRejoin, FloatRegister tempDouble)
-- DEFINED_ON(arm64, x86, x64, mips64, loong64, riscv64, wasm32);
-+ DEFINED_ON(arm64, x86, x64, mips64, loong64, riscv64, wasm32, ppc64);
- void oolWasmTruncateCheckF64ToI64(FloatRegister input, Register64 output,
- TruncFlags flags,
- const wasm::TrapSiteDesc& trapSiteDesc,
-@@ -3902,11 +3909,11 @@ class MacroAssembler : public MacroAssemblerSpecific {
- void wasmTruncateFloat32ToInt64(FloatRegister input, Register64 output,
- bool isSaturating, Label* oolEntry,
- Label* oolRejoin, FloatRegister tempDouble)
-- DEFINED_ON(arm64, x86, x64, mips64, loong64, riscv64, wasm32);
-+ DEFINED_ON(arm64, x86, x64, mips64, loong64, riscv64, wasm32, ppc64);
- void wasmTruncateFloat32ToUInt64(FloatRegister input, Register64 output,
- bool isSaturating, Label* oolEntry,
- Label* oolRejoin, FloatRegister tempDouble)
-- DEFINED_ON(arm64, x86, x64, mips64, loong64, riscv64, wasm32);
-+ DEFINED_ON(arm64, x86, x64, mips64, loong64, riscv64, wasm32, ppc64);
- void oolWasmTruncateCheckF32ToI64(FloatRegister input, Register64 output,
- TruncFlags flags,
- const wasm::TrapSiteDesc& trapSiteDesc,
-@@ -4220,7 +4227,8 @@ class MacroAssembler : public MacroAssemblerSpecific {
- // convention, which requires predictable high bits. In practice, this means
- // that the 32-bit value will be zero-extended or sign-extended to 64 bits as
- // appropriate for the platform.
-- void widenInt32(Register r) DEFINED_ON(arm64, x64, mips64, loong64, riscv64);
-+ void widenInt32(Register r)
-+ DEFINED_ON(arm64, x64, mips64, loong64, riscv64, ppc64);
-
- // As enterFakeExitFrame(), but using register conventions appropriate for
- // wasm stubs.
-@@ -4287,13 +4295,13 @@ class MacroAssembler : public MacroAssemblerSpecific {
- const Address& mem, Register expected,
- Register replacement, Register valueTemp,
- Register offsetTemp, Register maskTemp, Register output)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- void compareExchange(Scalar::Type type, Synchronization sync,
- const BaseIndex& mem, Register expected,
- Register replacement, Register valueTemp,
- Register offsetTemp, Register maskTemp, Register output)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- // x86: `expected` and `output` must be edx:eax; `replacement` is ecx:ebx.
- // x64: `output` must be rax.
-@@ -4303,12 +4311,12 @@ class MacroAssembler : public MacroAssemblerSpecific {
- void compareExchange64(Synchronization sync, const Address& mem,
- Register64 expected, Register64 replacement,
- Register64 output)
-- DEFINED_ON(arm, arm64, x64, x86, mips64, loong64, riscv64);
-+ DEFINED_ON(arm, arm64, x64, x86, mips64, loong64, riscv64, ppc64);
-
- void compareExchange64(Synchronization sync, const BaseIndex& mem,
- Register64 expected, Register64 replacement,
- Register64 output)
-- DEFINED_ON(arm, arm64, x64, x86, mips64, loong64, riscv64);
-+ DEFINED_ON(arm, arm64, x64, x86, mips64, loong64, riscv64, ppc64);
-
- // Exchange with memory. Return the value initially in memory.
- // MIPS: `valueTemp`, `offsetTemp` and `maskTemp` must be defined for 8-bit
-@@ -4325,12 +4333,12 @@ class MacroAssembler : public MacroAssemblerSpecific {
- void atomicExchange(Scalar::Type type, Synchronization sync,
- const Address& mem, Register value, Register valueTemp,
- Register offsetTemp, Register maskTemp, Register output)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- void atomicExchange(Scalar::Type type, Synchronization sync,
- const BaseIndex& mem, Register value, Register valueTemp,
- Register offsetTemp, Register maskTemp, Register output)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- // x86: `value` must be ecx:ebx; `output` must be edx:eax.
- // ARM: `value` and `output` must be distinct and (even,odd) pairs.
-@@ -4338,11 +4346,11 @@ class MacroAssembler : public MacroAssemblerSpecific {
-
- void atomicExchange64(Synchronization sync, const Address& mem,
- Register64 value, Register64 output)
-- DEFINED_ON(arm, arm64, x64, x86, mips64, loong64, riscv64);
-+ DEFINED_ON(arm, arm64, x64, x86, mips64, loong64, riscv64, ppc64);
-
- void atomicExchange64(Synchronization sync, const BaseIndex& mem,
- Register64 value, Register64 output)
-- DEFINED_ON(arm, arm64, x64, x86, mips64, loong64, riscv64);
-+ DEFINED_ON(arm, arm64, x64, x86, mips64, loong64, riscv64, ppc64);
-
- // Read-modify-write with memory. Return the value in memory before the
- // operation.
-@@ -4376,12 +4384,12 @@ class MacroAssembler : public MacroAssemblerSpecific {
- void atomicFetchOp(Scalar::Type type, Synchronization sync, AtomicOp op,
- Register value, const Address& mem, Register valueTemp,
- Register offsetTemp, Register maskTemp, Register output)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- void atomicFetchOp(Scalar::Type type, Synchronization sync, AtomicOp op,
- Register value, const BaseIndex& mem, Register valueTemp,
- Register offsetTemp, Register maskTemp, Register output)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- // x86:
- // `temp` must be ecx:ebx; `output` must be edx:eax.
-@@ -4395,7 +4403,7 @@ class MacroAssembler : public MacroAssemblerSpecific {
-
- void atomicFetchOp64(Synchronization sync, AtomicOp op, Register64 value,
- const Address& mem, Register64 temp, Register64 output)
-- DEFINED_ON(arm, arm64, x64, mips64, loong64, riscv64);
-+ DEFINED_ON(arm, arm64, x64, mips64, loong64, riscv64, ppc64);
-
- void atomicFetchOp64(Synchronization sync, AtomicOp op, const Address& value,
- const Address& mem, Register64 temp, Register64 output)
-@@ -4403,7 +4411,7 @@ class MacroAssembler : public MacroAssemblerSpecific {
-
- void atomicFetchOp64(Synchronization sync, AtomicOp op, Register64 value,
- const BaseIndex& mem, Register64 temp, Register64 output)
-- DEFINED_ON(arm, arm64, x64, mips64, loong64, riscv64);
-+ DEFINED_ON(arm, arm64, x64, mips64, loong64, riscv64, ppc64);
-
- void atomicFetchOp64(Synchronization sync, AtomicOp op, const Address& value,
- const BaseIndex& mem, Register64 temp, Register64 output)
-@@ -4421,14 +4429,14 @@ class MacroAssembler : public MacroAssemblerSpecific {
-
- void atomicEffectOp64(Synchronization sync, AtomicOp op, Register64 value,
- const Address& mem, Register64 temp)
-- DEFINED_ON(arm, arm64, mips64, loong64, riscv64);
-+ DEFINED_ON(arm, arm64, mips64, loong64, riscv64, ppc64);
-
- void atomicEffectOp64(Synchronization sync, AtomicOp op, Register64 value,
- const BaseIndex& mem) DEFINED_ON(x64);
-
- void atomicEffectOp64(Synchronization sync, AtomicOp op, Register64 value,
- const BaseIndex& mem, Register64 temp)
-- DEFINED_ON(arm, arm64, mips64, loong64, riscv64);
-+ DEFINED_ON(arm, arm64, mips64, loong64, riscv64, ppc64);
-
- // 64-bit atomic load. On 64-bit systems, use regular load with
- // Synchronization::Load, not this method.
-@@ -4481,14 +4489,14 @@ class MacroAssembler : public MacroAssemblerSpecific {
- Register replacement, Register valueTemp,
- Register offsetTemp, Register maskTemp,
- Register output)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- void wasmCompareExchange(const wasm::MemoryAccessDesc& access,
- const BaseIndex& mem, Register expected,
- Register replacement, Register valueTemp,
- Register offsetTemp, Register maskTemp,
- Register output)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- void wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
- const Address& mem, Register value, Register output)
-@@ -4502,13 +4510,13 @@ class MacroAssembler : public MacroAssemblerSpecific {
- const Address& mem, Register value,
- Register valueTemp, Register offsetTemp,
- Register maskTemp, Register output)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- void wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
- const BaseIndex& mem, Register value,
- Register valueTemp, Register offsetTemp,
- Register maskTemp, Register output)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- void wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, AtomicOp op,
- Register value, const Address& mem, Register temp,
-@@ -4529,13 +4537,14 @@ class MacroAssembler : public MacroAssemblerSpecific {
- void wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, AtomicOp op,
- Register value, const Address& mem, Register valueTemp,
- Register offsetTemp, Register maskTemp,
-- Register output) DEFINED_ON(mips64, loong64, riscv64);
-+ Register output)
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- void wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, AtomicOp op,
- Register value, const BaseIndex& mem,
- Register valueTemp, Register offsetTemp,
- Register maskTemp, Register output)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- // Read-modify-write with memory. Return no value.
- //
-@@ -4562,13 +4571,13 @@ class MacroAssembler : public MacroAssemblerSpecific {
- Register value, const Address& mem,
- Register valueTemp, Register offsetTemp,
- Register maskTemp)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- void wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, AtomicOp op,
- Register value, const BaseIndex& mem,
- Register valueTemp, Register offsetTemp,
- Register maskTemp)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- // 64-bit wide operations.
-
-@@ -4626,12 +4635,12 @@ class MacroAssembler : public MacroAssemblerSpecific {
- void wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access, AtomicOp op,
- Register64 value, const Address& mem,
- Register64 temp, Register64 output)
-- DEFINED_ON(arm, arm64, mips64, loong64, riscv64, x64);
-+ DEFINED_ON(arm, arm64, mips64, loong64, riscv64, ppc64, x64);
-
- void wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access, AtomicOp op,
- Register64 value, const BaseIndex& mem,
- Register64 temp, Register64 output)
-- DEFINED_ON(arm, arm64, mips64, loong64, riscv64, x64);
-+ DEFINED_ON(arm, arm64, mips64, loong64, riscv64, ppc64, x64);
-
- void wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access, AtomicOp op,
- const Address& value, const Address& mem,
-@@ -4684,14 +4693,14 @@ class MacroAssembler : public MacroAssemblerSpecific {
- Register replacement, Register valueTemp,
- Register offsetTemp, Register maskTemp, Register temp,
- AnyRegister output)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- void compareExchangeJS(Scalar::Type arrayType, Synchronization sync,
- const BaseIndex& mem, Register expected,
- Register replacement, Register valueTemp,
- Register offsetTemp, Register maskTemp, Register temp,
- AnyRegister output)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- void atomicExchangeJS(Scalar::Type arrayType, Synchronization sync,
- const Address& mem, Register value, Register temp,
-@@ -4705,13 +4714,13 @@ class MacroAssembler : public MacroAssemblerSpecific {
- const Address& mem, Register value, Register valueTemp,
- Register offsetTemp, Register maskTemp, Register temp,
- AnyRegister output)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- void atomicExchangeJS(Scalar::Type arrayType, Synchronization sync,
- const BaseIndex& mem, Register value,
- Register valueTemp, Register offsetTemp,
- Register maskTemp, Register temp, AnyRegister output)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- void atomicFetchOpJS(Scalar::Type arrayType, Synchronization sync,
- AtomicOp op, Register value, const Address& mem,
-@@ -4737,13 +4746,13 @@ class MacroAssembler : public MacroAssemblerSpecific {
- AtomicOp op, Register value, const Address& mem,
- Register valueTemp, Register offsetTemp,
- Register maskTemp, Register temp, AnyRegister output)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- void atomicFetchOpJS(Scalar::Type arrayType, Synchronization sync,
- AtomicOp op, Register value, const BaseIndex& mem,
- Register valueTemp, Register offsetTemp,
- Register maskTemp, Register temp, AnyRegister output)
-- DEFINED_ON(mips64, loong64, riscv64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- void atomicEffectOpJS(Scalar::Type arrayType, Synchronization sync,
- AtomicOp op, Register value, const Address& mem,
-@@ -4764,12 +4773,14 @@ class MacroAssembler : public MacroAssemblerSpecific {
- void atomicEffectOpJS(Scalar::Type arrayType, Synchronization sync,
- AtomicOp op, Register value, const Address& mem,
- Register valueTemp, Register offsetTemp,
-- Register maskTemp) DEFINED_ON(mips64, loong64, riscv64);
-+ Register maskTemp)
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- void atomicEffectOpJS(Scalar::Type arrayType, Synchronization sync,
- AtomicOp op, Register value, const BaseIndex& mem,
- Register valueTemp, Register offsetTemp,
-- Register maskTemp) DEFINED_ON(mips64, loong64, riscv64);
-+ Register maskTemp)
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64);
-
- void atomicIsLockFreeJS(Register value, Register output);
-
-@@ -5928,7 +5939,7 @@ class MacroAssembler : public MacroAssemblerSpecific {
- inline void addStackPtrTo(T t);
-
- void subFromStackPtr(Imm32 imm32)
-- DEFINED_ON(mips64, loong64, riscv64, wasm32, arm, x86, x64);
-+ DEFINED_ON(mips64, loong64, riscv64, ppc64, wasm32, arm, x86, x64);
- void subFromStackPtr(Register reg);
-
- template <typename T>
-diff --git a/js/src/jit/MoveEmitter.h b/js/src/jit/MoveEmitter.h
-index 642829c070d6..3a883c596ca0 100644
---- a/js/src/jit/MoveEmitter.h
-+++ b/js/src/jit/MoveEmitter.h
-@@ -17,6 +17,8 @@
- # include "jit/loong64/MoveEmitter-loong64.h"
- #elif defined(JS_CODEGEN_RISCV64)
- # include "jit/riscv64/MoveEmitter-riscv64.h"
-+#elif defined(JS_CODEGEN_PPC64)
-+# include "jit/ppc64/MoveEmitter-ppc64.h"
- #elif defined(JS_CODEGEN_WASM32)
- # include "jit/wasm32/MoveEmitter-wasm32.h"
- #elif defined(JS_CODEGEN_NONE)
-diff --git a/js/src/jit/MoveResolver.cpp b/js/src/jit/MoveResolver.cpp
-index d2e1f12700bd..8e622407a0a8 100644
---- a/js/src/jit/MoveResolver.cpp
-+++ b/js/src/jit/MoveResolver.cpp
-@@ -57,6 +57,22 @@ bool MoveResolver::addMove(const MoveOperand& from, const MoveOperand& to,
- MoveOp::Type type) {
- // Assert that we're not doing no-op moves.
- MOZ_ASSERT(!(from == to));
-+#ifdef JS_CODEGEN_PPC64
-+ // PPC64 FloatRegisters expose Single/Double kinds that have distinct code()
-+ // values but share one physical register. The register allocator can emit a
-+ // move between two such kind-views of the same FPR (e.g. f2-Double to
-+ // f2-Single); these are no-ops on the hardware, are not caught by the
-+ // (from == to) assert above, and would otherwise trip the
-+ // !from().aliases(to()) invariant the resolver relies on later. Drop them.
-+ //
-+ // This would be correct for any backend whose FloatRegister has multiple
-+ // kinds aliasing one physical register, and could be un-gated if another
-+ // such backend needs it, but it is scoped to PPC64 so move resolution on
-+ // tier-1 platforms is left unchanged.
-+ if (from.aliases(to)) {
-+ return true;
-+ }
-+#endif
- PendingMove* pm = movePool_.allocate(from, to, type);
- if (!pm) {
- return false;
-diff --git a/js/src/jit/RegisterAllocator.h b/js/src/jit/RegisterAllocator.h
-index eda9933f6322..42e48111046a 100644
---- a/js/src/jit/RegisterAllocator.h
-+++ b/js/src/jit/RegisterAllocator.h
-@@ -262,9 +262,10 @@ class RegisterAllocator {
- public:
- template <typename TakeableSet>
- static void takeWasmRegisters(TakeableSet& regs) {
--#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) || \
-- defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \
-- defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64)
-+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) || \
-+ defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \
-+ defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || \
-+ defined(JS_CODEGEN_PPC64)
- regs.take(HeapReg);
- #endif
- MOZ_ASSERT(!regs.has(FramePointer));
-diff --git a/js/src/jit/Registers.h b/js/src/jit/Registers.h
-index e0d02e2fb60d..423777ce38cd 100644
---- a/js/src/jit/Registers.h
-+++ b/js/src/jit/Registers.h
-@@ -20,6 +20,8 @@
- # include "jit/loong64/Architecture-loong64.h"
- #elif defined(JS_CODEGEN_RISCV64)
- # include "jit/riscv64/Architecture-riscv64.h"
-+#elif defined(JS_CODEGEN_PPC64)
-+# include "jit/ppc64/Architecture-ppc64.h"
- #elif defined(JS_CODEGEN_WASM32)
- # include "jit/wasm32/Architecture-wasm32.h"
- #elif defined(JS_CODEGEN_NONE)
-diff --git a/js/src/jit/Safepoints.cpp b/js/src/jit/Safepoints.cpp
-index 42e305f053af..8e3a25c3c5ff 100644
---- a/js/src/jit/Safepoints.cpp
-+++ b/js/src/jit/Safepoints.cpp
-@@ -63,6 +63,11 @@ static void WriteFloatRegisterMask(CompactBufferWriter& stream,
- stream.writeUnsigned64(bits.low());
- stream.writeUnsigned64(bits.high());
- break;
-+#elif defined(JS_CODEGEN_PPC64)
-+ case 16:
-+ stream.writeUnsigned64(static_cast<uint64_t>(bits));
-+ stream.writeUnsigned64(static_cast<uint64_t>(bits >> 64));
-+ break;
- #else
- case 1:
- stream.writeByte(bits);
-@@ -88,6 +93,12 @@ static FloatRegisters::SetType ReadFloatRegisterMask(
- uint64_t high = stream.readUnsigned64();
- return Bitset128(high, low);
- }
-+#elif defined(JS_CODEGEN_PPC64)
-+ case 16: {
-+ uint64_t low = stream.readUnsigned64();
-+ uint64_t high = stream.readUnsigned64();
-+ return FloatRegisters::SetType(high) << 64 | FloatRegisters::SetType(low);
-+ }
- #else
- case 1:
- return stream.readByte();
-diff --git a/js/src/jit/SharedICHelpers-inl.h b/js/src/jit/SharedICHelpers-inl.h
-index eedccc831732..1005b140f1df 100644
---- a/js/src/jit/SharedICHelpers-inl.h
-+++ b/js/src/jit/SharedICHelpers-inl.h
-@@ -19,6 +19,8 @@
- # include "jit/loong64/SharedICHelpers-loong64-inl.h"
- #elif defined(JS_CODEGEN_RISCV64)
- # include "jit/riscv64/SharedICHelpers-riscv64-inl.h"
-+#elif defined(JS_CODEGEN_PPC64)
-+# include "jit/ppc64/SharedICHelpers-ppc64-inl.h"
- #elif defined(JS_CODEGEN_WASM32)
- # include "jit/wasm32/SharedICHelpers-wasm32-inl.h"
- #elif defined(JS_CODEGEN_NONE)
-diff --git a/js/src/jit/SharedICHelpers.h b/js/src/jit/SharedICHelpers.h
-index 1ebd61e44509..f2703c6f986c 100644
---- a/js/src/jit/SharedICHelpers.h
-+++ b/js/src/jit/SharedICHelpers.h
-@@ -19,6 +19,8 @@
- # include "jit/loong64/SharedICHelpers-loong64.h"
- #elif defined(JS_CODEGEN_RISCV64)
- # include "jit/riscv64/SharedICHelpers-riscv64.h"
-+#elif defined(JS_CODEGEN_PPC64)
-+# include "jit/ppc64/SharedICHelpers-ppc64.h"
- #elif defined(JS_CODEGEN_WASM32)
- # include "jit/wasm32/SharedICHelpers-wasm32.h"
- #elif defined(JS_CODEGEN_NONE)
-diff --git a/js/src/jit/SharedICRegisters.h b/js/src/jit/SharedICRegisters.h
-index c3ab86bf0a82..5b270d0c166a 100644
---- a/js/src/jit/SharedICRegisters.h
-+++ b/js/src/jit/SharedICRegisters.h
-@@ -19,6 +19,8 @@
- # include "jit/loong64/SharedICRegisters-loong64.h"
- #elif defined(JS_CODEGEN_RISCV64)
- # include "jit/riscv64/SharedICRegisters-riscv64.h"
-+#elif defined(JS_CODEGEN_PPC64)
-+# include "jit/ppc64/SharedICRegisters-ppc64.h"
- #elif defined(JS_CODEGEN_WASM32)
- # include "jit/wasm32/SharedICRegisters-wasm32.h"
- #elif defined(JS_CODEGEN_NONE)
-diff --git a/js/src/jit/Simulator.h b/js/src/jit/Simulator.h
-index 39503716f10d..9f60baf53198 100644
---- a/js/src/jit/Simulator.h
-+++ b/js/src/jit/Simulator.h
-@@ -15,6 +15,8 @@
- # include "jit/loong64/Simulator-loong64.h"
- #elif defined(JS_SIMULATOR_RISCV64)
- # include "jit/riscv64/Simulator-riscv64.h"
-+#elif defined(JS_SIMULATOR_PPC64)
-+# include "jit/ppc64/Simulator-ppc64.h"
- #elif defined(JS_SIMULATOR)
- # error "Unexpected simulator platform"
- #endif
-diff --git a/js/src/jit/moz.build b/js/src/jit/moz.build
-index 5b5df3e5b7b2..36ef65d6221a 100644
---- a/js/src/jit/moz.build
-+++ b/js/src/jit/moz.build
-@@ -228,6 +228,18 @@ elif CONFIG["JS_CODEGEN_LOONG64"]:
- ]
- if CONFIG["JS_SIMULATOR_LOONG64"]:
- UNIFIED_SOURCES += ["loong64/Simulator-loong64.cpp"]
-+elif CONFIG["JS_CODEGEN_PPC64"]:
-+ UNIFIED_SOURCES += [
-+ "ppc64/Architecture-ppc64.cpp",
-+ "ppc64/Assembler-ppc64.cpp",
-+ "ppc64/CodeGenerator-ppc64.cpp",
-+ "ppc64/Lowering-ppc64.cpp",
-+ "ppc64/MacroAssembler-ppc64.cpp",
-+ "ppc64/MoveEmitter-ppc64.cpp",
-+ "ppc64/Trampoline-ppc64.cpp",
-+ ]
-+ if CONFIG["JS_SIMULATOR_PPC64"]:
-+ UNIFIED_SOURCES += ["ppc64/Simulator-ppc64.cpp"]
- elif CONFIG["JS_CODEGEN_RISCV64"]:
- UNIFIED_SOURCES += [
- "riscv64/Architecture-riscv64.cpp",
-diff --git a/js/src/jit/ppc64/Architecture-ppc64.cpp b/js/src/jit/ppc64/Architecture-ppc64.cpp
-new file mode 100644
-index 000000000000..5632865556ac
---- /dev/null
-+++ b/js/src/jit/ppc64/Architecture-ppc64.cpp
-@@ -0,0 +1,221 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#include "jit/ppc64/Architecture-ppc64.h"
-+
-+#ifndef JS_SIMULATOR
-+# include <sys/auxv.h>
-+#endif
-+
-+#include "jit/FlushICache.h" // js::jit::FlushICache
-+#include "jit/RegisterSets.h"
-+
-+namespace js {
-+namespace jit {
-+
-+Registers::Code Registers::FromName(const char* name) {
-+ for (size_t i = 0; i < Total; i++) {
-+ if (strcmp(GetName(i), name) == 0) {
-+ return Code(i);
-+ }
-+ }
-+
-+ return Invalid;
-+}
-+
-+FloatRegisters::Code FloatRegisters::FromName(const char* name) {
-+ for (size_t i = 0; i < Total; i++) {
-+ if (strcmp(GetName(i), name) == 0) {
-+ return Code(i);
-+ }
-+ }
-+
-+ return Invalid;
-+}
-+
-+FloatRegisterSet FloatRegister::ReduceSetForPush(const FloatRegisterSet& s) {
-+ SetType all = s.bits();
-+ SetType simd128Set =
-+ (all >> (uint32_t(FloatRegisters::Simd128) * FloatRegisters::TotalPhys)) &
-+ FloatRegisters::AllPhysMask;
-+ SetType doubleSet =
-+ (all >> (uint32_t(FloatRegisters::Double) * FloatRegisters::TotalPhys)) &
-+ FloatRegisters::AllPhysMask;
-+ SetType singleSet =
-+ (all >> (uint32_t(FloatRegisters::Single) * FloatRegisters::TotalPhys)) &
-+ FloatRegisters::AllPhysMask;
-+
-+ // Single+Double share physical FPRs (push as Double, 8-byte slot);
-+ // Simd128 lives in its own physical VRs (push as Simd128, 16-byte
-+ // slot). Different physical pools — no dedup. Note that
-+ // sizeof(FloatRegisters::RegisterContent) is 8 bytes (no v128 in the
-+ // union), so RegisterDump::FPUArray is 32 × 8 = 256 bytes, matching
-+ // the Float-only layout PushRegsInMask produces.
-+ SetType set64 = singleSet | doubleSet;
-+
-+ SetType reduced =
-+ (simd128Set << (uint32_t(FloatRegisters::Simd128) *
-+ FloatRegisters::TotalPhys)) |
-+ (set64 << (uint32_t(FloatRegisters::Double) * FloatRegisters::TotalPhys));
-+ return FloatRegisterSet(reduced);
-+}
-+
-+uint32_t FloatRegister::GetPushSizeInBytes(const FloatRegisterSet& s) {
-+ SetType all = s.bits();
-+ SetType simd128Set =
-+ (all >> (uint32_t(FloatRegisters::Simd128) * FloatRegisters::TotalPhys)) &
-+ FloatRegisters::AllPhysMask;
-+ SetType doubleSet =
-+ (all >> (uint32_t(FloatRegisters::Double) * FloatRegisters::TotalPhys)) &
-+ FloatRegisters::AllPhysMask;
-+ SetType singleSet =
-+ (all >> (uint32_t(FloatRegisters::Single) * FloatRegisters::TotalPhys)) &
-+ FloatRegisters::AllPhysMask;
-+
-+ // Natural per-kind slot sizes. See ReduceSetForPush comment.
-+ SetType set64 = singleSet | doubleSet;
-+
-+ uint32_t count64 = std::popcount(static_cast<uint64_t>(set64));
-+ uint32_t count128 = std::popcount(static_cast<uint64_t>(simd128Set));
-+
-+ return count64 * sizeof(double) + count128 * 16;
-+}
-+
-+uint32_t FloatRegister::getRegisterDumpOffsetInBytes() {
-+ // Simd128 encoding is 32-63 — mask back to 0-31 for the FPUArray-
-+ // relative offset. (FPUArray has 32 slots; Simd128 should never be in
-+ // a SafepointState/BailoutState anyway.)
-+ return (encoding() & 31) * sizeof(FloatRegisters::RegisterContent);
-+}
-+
-+static bool sPOWER9Detected = false;
-+static bool sPOWER10Detected = false;
-+static bool sCPUFlagsComputed = false;
-+
-+#ifndef JS_SIMULATOR
-+// Cache line sizes, detected at startup from ELF auxiliary vector.
-+// Fallback to 32 bytes (safe minimum per LuaJIT/LLVM compiler-rt).
-+static size_t sDCacheLineSize = 0;
-+static size_t sICacheLineSize = 0;
-+#endif
-+
-+void PPC64Flags::Init() {
-+ if (sCPUFlagsComputed) {
-+ return;
-+ }
-+#ifndef JS_SIMULATOR
-+ unsigned long hwcap2 = getauxval(AT_HWCAP2);
-+ // PPC_FEATURE2_ARCH_3_00 = 0x00800000 (ISA 3.0 / POWER9)
-+ sPOWER9Detected = (hwcap2 & 0x00800000) != 0;
-+ // PPC_FEATURE2_ARCH_3_1 = 0x00040000 (ISA 3.1 / POWER10)
-+ sPOWER10Detected = (hwcap2 & 0x00040000) != 0;
-+ // Allow forcing POWER8 mode for testing: MOZ_PPC64_FORCE_POWER8=1.
-+ // P10 implies P9; downgrade clears both.
-+ const char* forceP8 = getenv("MOZ_PPC64_FORCE_POWER8");
-+ if (forceP8 && forceP8[0] == '1') {
-+ sPOWER9Detected = false;
-+ sPOWER10Detected = false;
-+ }
-+
-+ size_t dcache = getauxval(AT_DCACHEBSIZE);
-+ size_t icache = getauxval(AT_ICACHEBSIZE);
-+ sDCacheLineSize = dcache ? dcache : 32;
-+ sICacheLineSize = icache ? icache : 32;
-+#endif
-+ // FORCE_POWER9/10 opt into the corresponding ISA fast paths. Useful under
-+ // the simulator; on real silicon below the gated level they are foot-guns
-+ // because the CPU will trap on undefined ops. Outside the JS_SIMULATOR
-+ // guard so the sim can opt in via env.
-+ //
-+ // FORCE_POWER10 also implies FORCE_POWER9 — this matches what real-P10
-+ // silicon advertises in hwcap2 (both ARCH_3_00 and ARCH_3_1 bits set), so
-+ // we don't ask sim users to pass both vars separately.
-+ const char* forceP9 = getenv("MOZ_PPC64_FORCE_POWER9");
-+ if (forceP9 && forceP9[0] == '1') {
-+ sPOWER9Detected = true;
-+ }
-+ const char* forceP10 = getenv("MOZ_PPC64_FORCE_POWER10");
-+ if (forceP10 && forceP10[0] == '1') {
-+ sPOWER10Detected = true;
-+ sPOWER9Detected = true;
-+ }
-+ sCPUFlagsComputed = true;
-+}
-+
-+bool HasPOWER9() {
-+ MOZ_ASSERT(sCPUFlagsComputed);
-+ return sPOWER9Detected;
-+}
-+
-+bool HasPOWER10() {
-+ MOZ_ASSERT(sCPUFlagsComputed);
-+ return sPOWER10Detected;
-+}
-+
-+bool CPUFlagsHaveBeenComputed() { return sCPUFlagsComputed; }
-+
-+// Per-bit feature flags packed into the wasm code signature. Adding a
-+// new bit (e.g., POWER10, VSX4) should be a 1-line change here plus a
-+// corresponding HasPOWER10()/IsVSX4Available() probe above. The value
-+// is also assert-checked into a fixed-width field in
-+// js/src/wasm/WasmCompile.cpp — if that field ever overflows, widen
-+// it there before landing more bits here.
-+uint32_t GetPPC64Flags() {
-+ uint32_t flags = 0;
-+ if (sPOWER9Detected) {
-+ flags |= PPC64Flag_POWER9;
-+ }
-+ return flags;
-+}
-+
-+void FlushICache(void* code, size_t size) {
-+#if defined(JS_SIMULATOR)
-+ js::jit::SimulatorProcess::FlushICache(code, size);
-+#else
-+ // PPC64 has incoherent I/D caches. GCC's __builtin___clear_cache is a
-+ // no-op on PPC64 Linux, so we implement the flush explicitly.
-+ // This follows the same approach as QEMU (util/cacheflush.c) and the
-+ // Linux kernel (arch/powerpc/mm/cacheflush.c):
-+ // dcbst loop -> sync -> icbi loop -> sync -> isync
-+ if (!size) {
-+ return;
-+ }
-+ MOZ_ASSERT(sCPUFlagsComputed,
-+ "PPC64Flags::Init must run before any FlushICache call");
-+
-+ uintptr_t start = reinterpret_cast<uintptr_t>(code);
-+ uintptr_t end = start + size;
-+
-+ // Step 1: Write back data cache to memory.
-+ for (uintptr_t addr = start & ~(sDCacheLineSize - 1); addr < end;
-+ addr += sDCacheLineSize) {
-+ asm volatile("dcbst 0, %0" : : "r"(addr) : "memory");
-+ }
-+ asm volatile("sync" ::: "memory");
-+
-+ // Step 2: Invalidate instruction cache.
-+ for (uintptr_t addr = start & ~(sICacheLineSize - 1); addr < end;
-+ addr += sICacheLineSize) {
-+ asm volatile("icbi 0, %0" : : "r"(addr) : "memory");
-+ }
-+ // The extra sync before isync matches the Linux kernel and QEMU.
-+ // It ensures all icbi operations complete before the pipeline flush.
-+ asm volatile("sync" ::: "memory");
-+ asm volatile("isync" ::: "memory");
-+#endif
-+}
-+
-+void FlushExecutionContext() {
-+#if !defined(JS_SIMULATOR)
-+ // PPC64's isync flushes the instruction pipeline on the current core,
-+ // ensuring any previously invalidated icache entries are discarded and
-+ // instructions are re-fetched from coherent memory.
-+ asm volatile("isync" ::: "memory");
-+#endif
-+}
-+
-+} // namespace jit
-+} // namespace js
-diff --git a/js/src/jit/ppc64/Architecture-ppc64.h b/js/src/jit/ppc64/Architecture-ppc64.h
-new file mode 100644
-index 000000000000..efaab0b0c854
---- /dev/null
-+++ b/js/src/jit/ppc64/Architecture-ppc64.h
-@@ -0,0 +1,581 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#ifndef jit_ppc64_Architecture_ppc64_h
-+#define jit_ppc64_Architecture_ppc64_h
-+
-+#include <algorithm>
-+#include <bit>
-+
-+#include "jit/shared/Architecture-shared.h"
-+
-+#include "js/Utility.h"
-+
-+namespace js {
-+namespace jit {
-+
-+// PPC64 has 32 64-bit general purpose registers, r0 through r31.
-+// The program counter is not directly accessible as a register.
-+// The link register (LR) and count register (CTR) are SPRs.
-+
-+// PPC64 ELFv2 GPR Convention:
-+// Name Usage
-+// r0 Volatile, cannot be base register in load/store
-+// r1 Stack pointer (callee-saved)
-+// r2 TOC pointer (reserved)
-+// r3 Return value / first argument
-+// r4-r10 Arguments 2-8
-+// r11 Environment pointer / scratch
-+// r12 Branch target / scratch
-+// r13 Thread pointer (reserved, TLS)
-+// r14-r31 Callee-saved
-+
-+// PPC64 ELFv2 FPR Convention:
-+// f0 Scratch
-+// f1-f13 Arguments / volatile
-+// f14-f31 Callee-saved
-+
-+class Registers {
-+ public:
-+ enum RegisterID {
-+ r0 = 0,
-+ r1,
-+ r2,
-+ r3,
-+ r4,
-+ r5,
-+ r6,
-+ r7,
-+ r8,
-+ r9,
-+ r10,
-+ r11,
-+ r12,
-+ r13,
-+ r14,
-+ r15,
-+ r16,
-+ r17,
-+ r18,
-+ r19,
-+ r20,
-+ r21,
-+ r22,
-+ r23,
-+ r24,
-+ r25,
-+ r26,
-+ r27,
-+ r28,
-+ r29,
-+ r30,
-+ r31,
-+ sp = r1,
-+ invalid_reg,
-+ };
-+ typedef uint8_t Code;
-+ typedef RegisterID Encoding;
-+ typedef uint32_t SetType;
-+
-+ static const Encoding StackPointer = sp;
-+ static const Encoding Invalid = invalid_reg;
-+
-+ union RegisterContent {
-+ uintptr_t r;
-+ };
-+
-+ static uint32_t SetSize(SetType x) { return std::popcount(x); }
-+ static uint32_t FirstBit(SetType x) {
-+ MOZ_ASSERT(x);
-+ return std::countr_zero(x);
-+ }
-+ static uint32_t LastBit(SetType x) {
-+ MOZ_ASSERT(x);
-+ return std::bit_width(x) - 1;
-+ }
-+
-+ static const char* GetName(uint32_t code) {
-+ static const char* const Names[] = {
-+ "r0", "sp", "r2", "r3", "r4", "r5", "r6", "r7",
-+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
-+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
-+ "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"};
-+ static_assert(Total == std::size(Names), "Table is the correct size");
-+ if (code >= Total) {
-+ return "invalid";
-+ }
-+ return Names[code];
-+ }
-+
-+ static Code FromName(const char* name);
-+
-+ static const uint32_t Total = 32;
-+ static const uint32_t TotalPhys = 32;
-+ static const uint32_t Allocatable = 24;
-+
-+ static const SetType AllMask = 0xFFFFFFFF;
-+ static const SetType NoneMask = 0x0;
-+
-+ static const SetType ArgRegMask =
-+ (1U << Registers::r3) | (1U << Registers::r4) | (1U << Registers::r5) |
-+ (1U << Registers::r6) | (1U << Registers::r7) | (1U << Registers::r8) |
-+ (1U << Registers::r9) | (1U << Registers::r10);
-+
-+ // r0, r11, r12 are also volatile but handled separately.
-+ static const SetType VolatileMask = ArgRegMask;
-+
-+ // ELFv2 callee-saved GPRs are r14..r31. r2 (TOC) and r13 (TLS) are
-+ // dedicated registers, NOT general callee-saved: r2 is restored by the
-+ // PLT-call linkage convention (`ld r2, 24(r1)` after every cross-module
-+ // call); r13 is the thread pointer and must NEVER be written. Including
-+ // them here previously made `PushRegsInMask(NonVolatileMask)` save and
-+ // restore them — wasted 16 bytes per wasm-stub frame at best, latent
-+ // TLS corruption if save/restore were ever misordered. Verified that
-+ // no JIT-emitted code writes r2 or r13 (both are NonAllocatable, and
-+ // grep across js/src/jit/ppc64/ finds no `as_*` site assigning to
-+ // them), so they're preserved across the JIT body for free.
-+ static const SetType NonVolatileMask =
-+ (1U << Registers::r14) |
-+ (1U << Registers::r15) | (1U << Registers::r16) | (1U << Registers::r17) |
-+ (1U << Registers::r18) | (1U << Registers::r19) | (1U << Registers::r20) |
-+ (1U << Registers::r21) | (1U << Registers::r22) | (1U << Registers::r23) |
-+ (1U << Registers::r24) | (1U << Registers::r25) | (1U << Registers::r26) |
-+ (1U << Registers::r27) | (1U << Registers::r28) | (1U << Registers::r29) |
-+ (1U << Registers::r30) | (1U << Registers::r31);
-+
-+ static const SetType NonAllocatableMask =
-+ (1U << Registers::r0) | // Cannot be base in load/store.
-+ (1U << Registers::sp) | // Stack pointer.
-+ (1U << Registers::r2) | // TOC pointer (ELFv2).
-+ (1U << Registers::r11) | // Third scratch.
-+ (1U << Registers::r12) | // Second scratch / addressTempRegister.
-+ (1U << Registers::r13) | // Thread-local storage (ELFv2).
-+ (1U << Registers::r16) | // Saved scratch register.
-+ (1U << Registers::r31); // Frame pointer.
-+
-+ static const SetType WrapperMask = VolatileMask;
-+
-+ // Registers returned from a JS -> JS call.
-+ static const SetType JSCallMask = (1U << Registers::r5);
-+
-+ // Registers returned from a JS -> C call.
-+ static const SetType CallMask = (1U << Registers::r3);
-+
-+ static const SetType AllocatableMask = AllMask & ~NonAllocatableMask;
-+};
-+
-+typedef uint32_t PackedRegisterMask;
-+
-+template <typename T>
-+class TypedRegisterSet;
-+
-+class FloatRegisters {
-+ public:
-+ enum FPRegisterID {
-+ f0 = 0,
-+ f1,
-+ f2,
-+ f3,
-+ f4,
-+ f5,
-+ f6,
-+ f7,
-+ f8,
-+ f9,
-+ f10,
-+ f11,
-+ f12,
-+ f13,
-+ f14,
-+ f15,
-+ f16,
-+ f17,
-+ f18,
-+ f19,
-+ f20,
-+ f21,
-+ f22,
-+ f23,
-+ f24,
-+ f25,
-+ f26,
-+ f27,
-+ f28,
-+ f29,
-+ f30,
-+ f31,
-+ };
-+
-+ // Eight bits: (invalid << 7) | (kind << 5) | encoding
-+ typedef uint8_t Code;
-+ typedef FPRegisterID Encoding;
-+ // 3 kinds × 32 regs = 96 bits needed. Use __uint128_t.
-+ typedef __uint128_t SetType;
-+
-+ enum Kind : uint8_t { Double, Single, Simd128, NumTypes };
-+
-+ static constexpr Code Invalid = 0x80;
-+
-+ static const char* GetName(uint32_t code) {
-+ static const char* const Names[] = {
-+ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
-+ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
-+ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
-+ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"};
-+ static_assert(TotalPhys == std::size(Names), "Table is the correct size");
-+ if (code >= Total) {
-+ return "invalid";
-+ }
-+ return Names[code % TotalPhys];
-+ }
-+
-+ static Code FromName(const char* name);
-+
-+ static const uint32_t TotalPhys = 32;
-+ static const uint32_t Total = TotalPhys * NumTypes;
-+ static const uint32_t Allocatable = 31; // Without f0, the scratch register.
-+
-+ static_assert(sizeof(SetType) * 8 >= Total,
-+ "SetType should be large enough to enumerate all registers.");
-+
-+ static const SetType SpreadSingle = SetType(1)
-+ << (uint32_t(Single) * TotalPhys);
-+ static const SetType SpreadDouble = SetType(1)
-+ << (uint32_t(Double) * TotalPhys);
-+ static const SetType SpreadSimd128 = SetType(1)
-+ << (uint32_t(Simd128) * TotalPhys);
-+ static const SetType Spread = SpreadSingle | SpreadDouble | SpreadSimd128;
-+
-+ static const SetType AllPhysMask = ((SetType(1) << TotalPhys) - 1);
-+ static const SetType AllMask = AllPhysMask * Spread;
-+ static const SetType AllSingleMask = AllPhysMask * SpreadSingle;
-+ static const SetType AllDoubleMask = AllPhysMask * SpreadDouble;
-+ static const SetType AllSimd128Mask = AllPhysMask * SpreadSimd128;
-+ static const SetType NoneMask = SetType(0);
-+
-+ // ELFv2: f14-f31 are non-volatile (callee-saved) for scalar FP.
-+ // The upper 64 bits of VSR 0-31 are volatile, so Simd128 view is all-volatile.
-+ static const SetType NonVolatilePhysMask =
-+ SetType((1U << FloatRegisters::f14) | (1U << FloatRegisters::f15) |
-+ (1U << FloatRegisters::f16) | (1U << FloatRegisters::f17) |
-+ (1U << FloatRegisters::f18) | (1U << FloatRegisters::f19) |
-+ (1U << FloatRegisters::f20) | (1U << FloatRegisters::f21) |
-+ (1U << FloatRegisters::f22) | (1U << FloatRegisters::f23) |
-+ (1U << FloatRegisters::f24) | (1U << FloatRegisters::f25) |
-+ (1U << FloatRegisters::f26) | (1U << FloatRegisters::f27) |
-+ (1U << FloatRegisters::f28) | (1U << FloatRegisters::f29) |
-+ (1U << FloatRegisters::f30) | (1U << FloatRegisters::f31));
-+ // Simd128 lives in VR-namespace (VSR32-63 = VR0-VR31). Per ELFv2 ABI,
-+ // VR20-VR31 are non-volatile (callee-saved). Encoding storage is 20-31
-+ // with kind=Simd128.
-+ static const SetType SimdNonVolatilePhysMask =
-+ SetType((1U << 20) | (1U << 21) | (1U << 22) | (1U << 23) |
-+ (1U << 24) | (1U << 25) | (1U << 26) | (1U << 27) |
-+ (1U << 28) | (1U << 29) | (1U << 30) | (1U << 31));
-+ static const SetType NonVolatileMask =
-+ NonVolatilePhysMask * (SpreadSingle | SpreadDouble) |
-+ SimdNonVolatilePhysMask * SpreadSimd128;
-+
-+ static const SetType VolatileMask = AllMask & ~NonVolatileMask;
-+
-+ static const SetType WrapperMask = VolatileMask;
-+
-+ // f0 is the scratch register (all three views: single, double, simd128).
-+ static const SetType NonAllocatableMask =
-+ (SetType(1) << FloatRegisters::f0) * Spread;
-+
-+ static const SetType AllocatableMask = AllMask & ~NonAllocatableMask;
-+
-+ union RegisterContent {
-+ float s;
-+ double d;
-+ // No v128 here. Simd128 lives in physically-distinct VRs (VSR32-63)
-+ // and never reaches RegisterDump (asserted by SafepointState; bailout
-+ // AllRegs excludes Simd128). With v128 in the union, sizeof was 16,
-+ // forcing PushRegsInMask to a 16-byte stride that mismatched
-+ // addressOfRegister's 8-byte walk via (*iter).size().
-+ };
-+
-+ static constexpr Encoding encoding(Code c) { return Encoding(c & 31); }
-+
-+ static constexpr Kind kind(Code c) { return Kind((c >> 5) & 3); }
-+
-+ static constexpr Code fromParts(uint32_t encoding, uint32_t kind,
-+ uint32_t invalid) {
-+ return Code((invalid << 7) | (kind << 5) | encoding);
-+ }
-+};
-+
-+// SpillSlotSize must fit the widest register class (Simd128 = 16 bytes).
-+// We can't derive from sizeof(FloatRegisters::RegisterContent) — that
-+// union is sized for FPRs only (8 bytes since v128 lives in distinct
-+// VRs, not in the FPR union), so deriving would under-reserve for
-+// Simd128 cycle breaks. SpillSlotSize is consumed only by MoveEmitter
-+// and is not part of the JIT frame layout.
-+static const uint32_t SpillSlotSize = 16;
-+
-+// PPC64 ELFv2 ABI: the callee saves LR at [caller_SP+16], CR at
-+// [caller_SP+8], and may save TOC at [caller_SP+24]. Reserve 32 bytes
-+// (the minimum ELFv2 stack frame) as a shadow area for every ABI call.
-+static constexpr uint32_t ShadowStackSpace = 32;
-+static const uint32_t SizeOfReturnAddressAfterCall = 0;
-+
-+// PPC64 branch instructions have a 26-bit signed offset field, giving a
-+// range of +/- 32MB. We reduce this to leave room for jump island insertion.
-+static constexpr uint32_t JumpImmediateRange = (32 * 1024 * 1024) - 32;
-+
-+// Size of each bailout table entry (a single bl instruction).
-+static const uint32_t BAILOUT_TABLE_ENTRY_SIZE = 4;
-+
-+// PPC64 special purpose registers (not exposed to the allocator).
-+enum SPRegisterID {
-+ spr_xer = 1,
-+ spr_lr = 8,
-+ spr_ctr = 9,
-+ spr_vrsave = 256,
-+ invalid_spreg
-+};
-+
-+// PPC64 condition registers.
-+enum CRegisterID { cr0 = 0, cr1, cr5 = 5, cr6, cr7, invalid_creg };
-+
-+struct FloatRegister {
-+ typedef FloatRegisters Codes;
-+ typedef size_t Code;
-+ typedef Codes::Encoding Encoding;
-+ typedef Codes::SetType SetType;
-+
-+ static uint32_t SetSize(SetType x) {
-+ // Fold all 3 kinds (Single, Double, Simd128) down to physical mask.
-+ SetType phys = (x & FloatRegisters::AllPhysMask) |
-+ ((x >> FloatRegisters::TotalPhys) & FloatRegisters::AllPhysMask) |
-+ ((x >> (2 * FloatRegisters::TotalPhys)) & FloatRegisters::AllPhysMask);
-+ return std::popcount(static_cast<uint64_t>(phys));
-+ }
-+
-+ // __uint128_t helpers for FirstBit/LastBit.
-+ static uint32_t FirstBit(SetType x) {
-+ MOZ_ASSERT(x);
-+ uint64_t lo = static_cast<uint64_t>(x);
-+ if (lo) {
-+ return std::countr_zero(lo);
-+ }
-+ return 64 + std::countr_zero(static_cast<uint64_t>(x >> 64));
-+ }
-+ static uint32_t LastBit(SetType x) {
-+ MOZ_ASSERT(x);
-+ uint64_t hi = static_cast<uint64_t>(x >> 64);
-+ if (hi) {
-+ return 64 + (std::bit_width(hi) - 1);
-+ }
-+ return std::bit_width(static_cast<uint64_t>(x)) - 1;
-+ }
-+
-+ private:
-+ uint8_t encoding_;
-+ uint8_t kind_;
-+ bool invalid_;
-+
-+ typedef Codes::Kind Kind;
-+
-+ public:
-+ constexpr FloatRegister(Encoding encoding, Kind kind)
-+ : encoding_(encoding), kind_(kind), invalid_(false) {}
-+
-+ constexpr FloatRegister()
-+ : encoding_(0), kind_(FloatRegisters::Double), invalid_(true) {}
-+
-+ static FloatRegister FromCode(uint32_t i) {
-+ MOZ_ASSERT(i < Codes::Total);
-+ return FloatRegister(FloatRegisters::encoding(i), FloatRegisters::kind(i));
-+ }
-+
-+ bool isSingle() const {
-+ MOZ_ASSERT(!invalid_);
-+ return kind_ == FloatRegisters::Single;
-+ }
-+ bool isDouble() const {
-+ MOZ_ASSERT(!invalid_);
-+ return kind_ == FloatRegisters::Double;
-+ }
-+ bool isSimd128() const {
-+ MOZ_ASSERT(!invalid_);
-+ return kind_ == FloatRegisters::Simd128;
-+ }
-+ bool isInvalid() const { return invalid_; }
-+
-+ FloatRegister asSingle() const {
-+ MOZ_ASSERT(!invalid_);
-+ return FloatRegister(Encoding(encoding_), FloatRegisters::Single);
-+ }
-+ FloatRegister asDouble() const {
-+ MOZ_ASSERT(!invalid_);
-+ return FloatRegister(Encoding(encoding_), FloatRegisters::Double);
-+ }
-+ FloatRegister asSimd128() const {
-+ MOZ_ASSERT(!invalid_);
-+ return FloatRegister(Encoding(encoding_), FloatRegisters::Simd128);
-+ }
-+
-+ constexpr uint32_t size() const {
-+ MOZ_ASSERT(!invalid_);
-+ if (kind_ == FloatRegisters::Double) {
-+ return sizeof(double);
-+ }
-+ if (kind_ == FloatRegisters::Single) {
-+ return sizeof(float);
-+ }
-+ MOZ_ASSERT(kind_ == FloatRegisters::Simd128);
-+ return 16;
-+ }
-+
-+ constexpr Code code() const {
-+ return Codes::fromParts(encoding_, kind_, invalid_);
-+ }
-+
-+ constexpr Encoding encoding() const {
-+ MOZ_ASSERT(!invalid_);
-+ // Simd128 lives in VR-namespace at VSR32-63 (= VR0-31). Single/Double
-+ // share FPR namespace at VSR0-31. The unified XX-form encoders split
-+ // the result into low-5-bit VRT/VRA/VRB + TX/AX/BX bits; VMX
-+ // FloatRegister-taking encoders mask with `& 31` for the raw VR
-+ // field. So 32+E flows correctly through both paths.
-+ return Encoding(encoding_ +
-+ (kind_ == FloatRegisters::Simd128 ? 32 : 0));
-+ }
-+
-+ const char* name() const { return FloatRegisters::GetName(code()); }
-+ bool volatile_() const {
-+ MOZ_ASSERT(!invalid_);
-+ return !!((SetType(1) << code()) & FloatRegisters::VolatileMask);
-+ }
-+ constexpr bool operator!=(FloatRegister other) const {
-+ return code() != other.code();
-+ }
-+ constexpr bool operator==(FloatRegister other) const {
-+ return code() == other.code();
-+ }
-+
-+ bool aliases(FloatRegister other) const {
-+ // Register-class partition: {Single, Double} share FPRs (VSR0-31);
-+ // Simd128 lives in VR-namespace (VSR32-63). FPR f5 (Single/Double
-+ // encoding 5) and VR v5 (Simd128 encoding 5) are distinct physical
-+ // registers.
-+ if (encoding_ != other.encoding_) return false;
-+ bool selfSimd = (kind_ == FloatRegisters::Simd128);
-+ bool otherSimd = (other.kind_ == FloatRegisters::Simd128);
-+ return selfSimd == otherSimd;
-+ }
-+ bool equiv(FloatRegister other) const {
-+ MOZ_ASSERT(!invalid_);
-+ return kind_ == other.kind_;
-+ }
-+
-+ uint32_t numAliased() const {
-+ return (kind_ == FloatRegisters::Simd128) ? 1 : 2;
-+ }
-+ uint32_t numAlignedAliased() { return numAliased(); }
-+
-+ FloatRegister aliased(uint32_t aliasIdx) {
-+ MOZ_ASSERT(!invalid_);
-+ MOZ_ASSERT(aliasIdx < numAliased());
-+ if (kind_ == FloatRegisters::Simd128) {
-+ return *this;
-+ }
-+ Kind otherKind = (kind_ == FloatRegisters::Single)
-+ ? FloatRegisters::Double
-+ : FloatRegisters::Single;
-+ Kind selectedKind = (aliasIdx == 0) ? Kind(kind_) : otherKind;
-+ return FloatRegister(Encoding(encoding_), selectedKind);
-+ }
-+ FloatRegister alignedAliased(uint32_t aliasIdx) {
-+ MOZ_ASSERT(aliasIdx < numAliased());
-+ return aliased(aliasIdx);
-+ }
-+ SetType alignedOrDominatedAliasedSet() const {
-+ if (kind_ == FloatRegisters::Simd128) {
-+ return SetType(1) << ((uint32_t(FloatRegisters::Simd128) *
-+ FloatRegisters::TotalPhys) +
-+ encoding_);
-+ }
-+ return (Codes::SpreadSingle | Codes::SpreadDouble) << encoding_;
-+ }
-+
-+ static constexpr RegTypeName DefaultType = RegTypeName::Float64;
-+
-+ template <RegTypeName Name = DefaultType>
-+ static SetType LiveAsIndexableSet(SetType s) {
-+ return SetType(0);
-+ }
-+
-+ template <RegTypeName Name = DefaultType>
-+ static SetType AllocatableAsIndexableSet(SetType s) {
-+ static_assert(Name != RegTypeName::Any, "Allocatable set are not iterable");
-+ return LiveAsIndexableSet<Name>(s);
-+ }
-+
-+ static TypedRegisterSet<FloatRegister> ReduceSetForPush(
-+ const TypedRegisterSet<FloatRegister>& s);
-+ static uint32_t GetPushSizeInBytes(const TypedRegisterSet<FloatRegister>& s);
-+ uint32_t getRegisterDumpOffsetInBytes();
-+};
-+
-+template <>
-+inline FloatRegister::SetType
-+FloatRegister::LiveAsIndexableSet<RegTypeName::Float32>(SetType set) {
-+ return set & FloatRegisters::AllSingleMask;
-+}
-+
-+template <>
-+inline FloatRegister::SetType
-+FloatRegister::LiveAsIndexableSet<RegTypeName::Float64>(SetType set) {
-+ return set & FloatRegisters::AllDoubleMask;
-+}
-+
-+template <>
-+inline FloatRegister::SetType
-+FloatRegister::LiveAsIndexableSet<RegTypeName::Vector128>(SetType set) {
-+ return set & FloatRegisters::AllSimd128Mask;
-+}
-+
-+template <>
-+inline FloatRegister::SetType
-+FloatRegister::LiveAsIndexableSet<RegTypeName::Any>(SetType set) {
-+ return set;
-+}
-+
-+inline bool hasUnaliasedDouble() { return false; }
-+inline bool hasMultiAlias() { return false; }
-+
-+// PPC64 feature bits packed into the value GetPPC64Flags() returns,
-+// which feeds wasm/WasmCompile.cpp's per-architecture code signature.
-+// Defined as enum constants (not enum class) so callers can OR/AND
-+// freely. New bits should remain backward-compatible — older signatures
-+// must keep meaning the same set of features.
-+enum PPC64FeatureFlags : uint32_t {
-+ PPC64Flag_POWER9 = 1u << 0,
-+ // Future: PPC64Flag_POWER10 = 1u << 1, PPC64Flag_VSX4 = 1u << 2, ...
-+};
-+
-+uint32_t GetPPC64Flags();
-+
-+class PPC64Flags final {
-+ public:
-+ PPC64Flags() = delete;
-+
-+ // PPC64Flags::Init is called from the JitContext constructor to read the
-+ // hardware capabilities (via getauxval(AT_HWCAP2)). It must be called
-+ // exactly once, before HasPOWER9()/HasPOWER10() are used.
-+ static void Init();
-+};
-+
-+bool HasPOWER9();
-+bool HasPOWER10();
-+
-+} // namespace jit
-+} // namespace js
-+
-+#endif /* jit_ppc64_Architecture_ppc64_h */
-diff --git a/js/src/jit/ppc64/Assembler-ppc64.cpp b/js/src/jit/ppc64/Assembler-ppc64.cpp
-new file mode 100644
-index 000000000000..481070c4c6d5
---- /dev/null
-+++ b/js/src/jit/ppc64/Assembler-ppc64.cpp
-@@ -0,0 +1,3028 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#include "jit/ppc64/Assembler-ppc64.h"
-+
-+#include "mozilla/DebugOnly.h"
-+#include "mozilla/Maybe.h"
-+
-+#include "gc/Marking.h"
-+#include "jit/AutoWritableJitCode.h"
-+#include "jit/ExecutableAllocator.h"
-+#include "jit/FlushICache.h"
-+
-+using mozilla::DebugOnly;
-+
-+using namespace js;
-+using namespace js::jit;
-+
-+// ELFv2 ABI: 8 GPRs (r3-r10), 13 FPRs (f1-f13).
-+// FP arguments also consume a GPR slot per ELFv2 convention.
-+ABIArg ABIArgGenerator::next(MIRType type) {
-+ switch (type) {
-+ case MIRType::Int32:
-+ case MIRType::Int64:
-+ case MIRType::Pointer:
-+ case MIRType::WasmAnyRef:
-+ case MIRType::WasmArrayData:
-+ case MIRType::StackResults: {
-+ if (intRegIndex_ >= NumIntArgRegs) {
-+ current_ = ABIArg(stackOffset_);
-+ stackOffset_ += sizeof(uintptr_t);
-+ break;
-+ }
-+ current_ = ABIArg(Register::FromCode(Registers::r3 + intRegIndex_));
-+ intRegIndex_++;
-+ break;
-+ }
-+ case MIRType::Float32:
-+ case MIRType::Double: {
-+ if (floatRegIndex_ == NumFloatArgRegs) {
-+ current_ = ABIArg(stackOffset_);
-+ stackOffset_ += sizeof(double);
-+ break;
-+ }
-+ current_ = ABIArg(FloatRegister(
-+ FloatRegisters::Encoding(FloatRegisters::f1 + floatRegIndex_),
-+ type == MIRType::Double ? FloatRegisters::Double
-+ : FloatRegisters::Single));
-+ floatRegIndex_++;
-+ // ELFv2 ABI: each FP arg also consumes a GPR slot (shadow).
-+ // Cap at NumIntArgRegs so subsequent int args go to the stack.
-+ if (intRegIndex_ < NumIntArgRegs) {
-+ intRegIndex_++;
-+ }
-+ break;
-+ }
-+ case MIRType::Simd128: {
-+ // Pass v128 in FP registers (Simd128 kind). On PPC64 ELFv2, SIMD
-+ // values use the same VSR register file as FP args.
-+ if (floatRegIndex_ == NumFloatArgRegs) {
-+ current_ = ABIArg(stackOffset_);
-+ stackOffset_ += 16;
-+ break;
-+ }
-+ current_ = ABIArg(FloatRegister(
-+ FloatRegisters::Encoding(FloatRegisters::f1 + floatRegIndex_),
-+ FloatRegisters::Simd128));
-+ floatRegIndex_++;
-+ if (intRegIndex_ < NumIntArgRegs) {
-+ intRegIndex_++;
-+ }
-+ break;
-+ }
-+ default:
-+ MOZ_CRASH("Unexpected argument type");
-+ }
-+ return current_;
-+}
-+
-+// Condition inversion tables.
-+Assembler::Condition Assembler::InvertCondition(Condition cond) {
-+ switch (cond) {
-+ case Equal:
-+ return NotEqual;
-+ case NotEqual:
-+ return Equal;
-+ case LessThan:
-+ return GreaterThanOrEqual;
-+ case LessThanOrEqual:
-+ return GreaterThan;
-+ case GreaterThan:
-+ return LessThanOrEqual;
-+ case GreaterThanOrEqual:
-+ return LessThan;
-+ case Above:
-+ return BelowOrEqual;
-+ case AboveOrEqual:
-+ return Below;
-+ case Below:
-+ return AboveOrEqual;
-+ case BelowOrEqual:
-+ return Above;
-+ case Zero:
-+ return NonZero;
-+ case NonZero:
-+ return Zero;
-+ case Signed:
-+ return NotSigned;
-+ case NotSigned:
-+ return Signed;
-+ case SOBit:
-+ return NSOBit;
-+ case NSOBit:
-+ return SOBit;
-+ case Overflow:
-+ return NotOverflow;
-+ case NotOverflow:
-+ return Overflow;
-+ case CarrySet:
-+ return CarryClear;
-+ case CarryClear:
-+ return CarrySet;
-+ default:
-+ MOZ_CRASH("unexpected condition");
-+ }
-+}
-+
-+Assembler::DoubleCondition Assembler::InvertCondition(DoubleCondition cond) {
-+ switch (cond) {
-+ case DoubleOrdered:
-+ return DoubleUnordered;
-+ case DoubleEqual:
-+ return DoubleNotEqualOrUnordered;
-+ case DoubleNotEqual:
-+ return DoubleEqualOrUnordered;
-+ case DoubleGreaterThan:
-+ return DoubleLessThanOrEqualOrUnordered;
-+ case DoubleGreaterThanOrEqual:
-+ return DoubleLessThanOrUnordered;
-+ case DoubleLessThan:
-+ return DoubleGreaterThanOrEqualOrUnordered;
-+ case DoubleLessThanOrEqual:
-+ return DoubleGreaterThanOrUnordered;
-+ case DoubleUnordered:
-+ return DoubleOrdered;
-+ case DoubleEqualOrUnordered:
-+ return DoubleNotEqual;
-+ case DoubleNotEqualOrUnordered:
-+ return DoubleEqual;
-+ case DoubleGreaterThanOrUnordered:
-+ return DoubleLessThanOrEqual;
-+ case DoubleGreaterThanOrEqualOrUnordered:
-+ return DoubleLessThan;
-+ case DoubleLessThanOrUnordered:
-+ return DoubleGreaterThanOrEqual;
-+ case DoubleLessThanOrEqualOrUnordered:
-+ return DoubleGreaterThan;
-+ default:
-+ MOZ_CRASH("unexpected condition");
-+ }
-+}
-+
-+// InstImm helper.
-+uint8_t InstImm::traptag() {
-+ uint8_t r = ((data & 0x001f0000) >> 16);
-+ MOZ_ASSERT(isOpcode(PPC_tw));
-+ MOZ_ASSERT(r == ((data & 0x0000f800) >> 11));
-+ return r & 0xfe;
-+}
-+
-+BOffImm16::BOffImm16(InstImm inst) : data(inst.extractImm16Value() & 0xFFFC) {
-+ // Sign-extend the 16-bit field.
-+ if (data & 0x8000) {
-+ data |= ~0xFFFF;
-+ }
-+}
-+
-+Instruction* BOffImm16::getDest(Instruction* src) const {
-+ return (Instruction*)((uint8_t*)src + data);
-+}
-+
-+Instruction* JOffImm26::getDest(Instruction* src) const {
-+ return (Instruction*)((uint8_t*)src + data);
-+}
-+
-+Imm16::Imm16() : value(0) {}
-+
-+Imm8::Imm8() : value(0) {}
-+
-+// Buffer management.
-+bool Assembler::oom() const {
-+ return AssemblerShared::oom() || m_buffer.oom() || jumpRelocations_.oom() ||
-+ dataRelocations_.oom();
-+}
-+
-+void Assembler::finish() {
-+ MOZ_ASSERT(!isFinished);
-+ isFinished = true;
-+ m_buffer.flushPool();
-+}
-+
-+bool Assembler::appendRawCode(const uint8_t* code, size_t numBytes) {
-+ return m_buffer.appendRawCode(code, numBytes);
-+}
-+
-+bool Assembler::reserve(size_t size) {
-+ // Fixed-size chunk buffer; no point in reserving now vs. on-demand.
-+ return !oom();
-+}
-+
-+bool Assembler::swapBuffer(wasm::Bytes& bytes) {
-+ MOZ_ASSERT(bytes.empty());
-+ if (!bytes.resize(bytesNeeded())) {
-+ return false;
-+ }
-+ m_buffer.executableCopy(bytes.begin());
-+ return true;
-+}
-+
-+void Assembler::copyJumpRelocationTable(uint8_t* dest) {
-+ if (jumpRelocations_.length()) {
-+ memcpy(dest, jumpRelocations_.buffer(), jumpRelocations_.length());
-+ }
-+}
-+
-+void Assembler::copyDataRelocationTable(uint8_t* dest) {
-+ if (dataRelocations_.length()) {
-+ memcpy(dest, dataRelocations_.buffer(), dataRelocations_.length());
-+ }
-+}
-+
-+void Assembler::executableCopy(void* buffer) {
-+ MOZ_ASSERT(isFinished);
-+ m_buffer.executableCopy(static_cast<uint8_t*>(buffer));
-+}
-+
-+void Assembler::executableCopy(uint8_t* buffer) {
-+ MOZ_ASSERT(isFinished);
-+ m_buffer.executableCopy(buffer);
-+}
-+
-+size_t Assembler::size() const {
-+ // AssemblerBufferWithConstantPools::size() asserts pool is empty.
-+ // Flush pending pool entries first.
-+ const_cast<PPCBufferWithExecutableCopy&>(m_buffer).flushPool();
-+ return m_buffer.size();
-+}
-+
-+size_t Assembler::jumpRelocationTableBytes() const {
-+ return jumpRelocations_.length();
-+}
-+
-+size_t Assembler::dataRelocationTableBytes() const {
-+ return dataRelocations_.length();
-+}
-+
-+size_t Assembler::bytesNeeded() const {
-+ return size() + jumpRelocationTableBytes() + dataRelocationTableBytes();
-+}
-+
-+// Write an instruction into the buffer or to an external destination.
-+BufferOffset Assembler::writeInst(uint32_t x, uint32_t* dest) {
-+ MOZ_ASSERT(hasCreator());
-+ if (dest == nullptr) {
-+ return m_buffer.putInt(x);
-+ }
-+
-+ WriteInstStatic(x, dest);
-+ return BufferOffset();
-+}
-+
-+void Assembler::WriteInstStatic(uint32_t x, uint32_t* dest) {
-+ MOZ_ASSERT(dest != nullptr);
-+ *dest = x;
-+}
-+
-+// Alignment.
-+BufferOffset Assembler::haltingAlign(int alignment) {
-+ BufferOffset ret;
-+ MOZ_ASSERT(m_buffer.isAligned(4));
-+ if (alignment == 8) {
-+ if (!m_buffer.isAligned(alignment)) {
-+ BufferOffset tmp = xs_trap();
-+ if (!ret.assigned()) {
-+ ret = tmp;
-+ }
-+ }
-+ } else {
-+ MOZ_ASSERT((alignment & (alignment - 1)) == 0);
-+ while (size() & (alignment - 1)) {
-+ BufferOffset tmp = xs_trap();
-+ if (!ret.assigned()) {
-+ ret = tmp;
-+ }
-+ }
-+ }
-+ return ret;
-+}
-+
-+BufferOffset Assembler::nopAlign(int alignment) {
-+ BufferOffset ret;
-+ MOZ_ASSERT(m_buffer.isAligned(4));
-+ if (alignment == 8) {
-+ if (!m_buffer.isAligned(alignment)) {
-+ BufferOffset tmp = as_nop();
-+ if (!ret.assigned()) {
-+ ret = tmp;
-+ }
-+ }
-+ } else {
-+ MOZ_ASSERT((alignment & (alignment - 1)) == 0);
-+ while (size() & (alignment - 1)) {
-+ BufferOffset tmp = as_nop();
-+ if (!ret.assigned()) {
-+ ret = tmp;
-+ }
-+ }
-+ }
-+ return ret;
-+}
-+
-+// Primitive instructions.
-+BufferOffset Assembler::as_nop() {
-+ spew("nop");
-+ return writeInst(PPC_nop);
-+}
-+
-+BufferOffset Assembler::as_lwsync() {
-+ spew("lwsync");
-+ return writeInst(PPC_lwsync);
-+}
-+
-+BufferOffset Assembler::as_sync() {
-+ spew("sync");
-+ return writeInst(PPC_sync);
-+}
-+
-+BufferOffset Assembler::as_isync() {
-+ spew("isync");
-+ return writeInst(PPC_isync);
-+}
-+
-+// Branch and jump instructions.
-+BufferOffset Assembler::as_b(JOffImm26 off, BranchAddressType bat, LinkBit lb) {
-+ return as_b(off.encode(), bat, lb);
-+}
-+
-+BufferOffset Assembler::as_b(int32_t off, BranchAddressType bat, LinkBit lb) {
-+ spew("b%s%s\t%x", bat == AbsoluteBranch ? "a" : "", lb ? "l" : "", off);
-+ MOZ_ASSERT(!(off & 0x03));
-+ return writeInst(PPC_b | ((uint32_t)off & 0x3fffffc) | bat | lb);
-+}
-+
-+BufferOffset Assembler::as_blr(LinkBit lb) {
-+ spew("blr%s", lb ? "l" : "");
-+ return writeInst(uint32_t(PPC_blr) | uint32_t(lb));
-+}
-+
-+BufferOffset Assembler::as_bctr(LinkBit lb) {
-+ spew("bctr%s", lb ? "l" : "");
-+ return writeInst(uint32_t(PPC_bctr) | uint32_t(lb));
-+}
-+
-+// Conditional branches.
-+BufferOffset Assembler::as_bc(BOffImm16 off, Condition cond, CRegisterID cr,
-+ LikelyBit lkb, LinkBit lb) {
-+ return as_bc(off.encode(), cond, cr, lkb, lb);
-+}
-+
-+BufferOffset Assembler::as_bc(int16_t off, Condition cond, CRegisterID cr,
-+ LikelyBit lkb, LinkBit lb) {
-+ return as_bc(off, computeConditionCode(cond, cr), lkb, lb);
-+}
-+
-+BufferOffset Assembler::as_bc(BOffImm16 off, DoubleCondition cond,
-+ CRegisterID cr, LikelyBit lkb, LinkBit lb) {
-+ return as_bc(off.encode(), cond, cr, lkb, lb);
-+}
-+
-+BufferOffset Assembler::as_bc(int16_t off, DoubleCondition cond, CRegisterID cr,
-+ LikelyBit lkb, LinkBit lb) {
-+ return as_bc(off, computeConditionCode(cond, cr), lkb, lb);
-+}
-+
-+BufferOffset Assembler::as_bcctr(Condition cond, CRegisterID cr, LikelyBit lkb,
-+ LinkBit lb) {
-+ return as_bcctr(computeConditionCode(cond, cr), lkb, lb);
-+}
-+
-+BufferOffset Assembler::as_bcctr(DoubleCondition cond, CRegisterID cr,
-+ LikelyBit lkb, LinkBit lb) {
-+ return as_bcctr(computeConditionCode(cond, cr), lkb, lb);
-+}
-+
-+// Condition code computation: turn DoubleCondition + CR into BO|BI.
-+// May emit CR logic instructions for synthetic conditions involving FU bit.
-+uint16_t Assembler::computeConditionCode(DoubleCondition op, CRegisterID cr) {
-+ const uint8_t condBit = crBit(cr, op);
-+ const uint8_t fuBit = crBit(cr, DoubleUnordered);
-+ uint32_t newop = (uint32_t)op & 255;
-+
-+ if (op & DoubleConditionUnordered) {
-+ if ((uint32_t(op) & BranchOptionMask) == BranchOnClear) {
-+ as_crorc(condBit, fuBit, condBit);
-+ newop |= BranchOnSet;
-+ } else {
-+ if (condBit != fuBit) {
-+ as_cror(condBit, fuBit, condBit);
-+ }
-+ }
-+ } else {
-+ if ((uint32_t(op) & BranchOptionMask) == BranchOnClear) {
-+ if (condBit != fuBit) {
-+ as_cror(condBit, fuBit, condBit);
-+ }
-+ } else {
-+ if (condBit != fuBit) {
-+ as_crandc(condBit, condBit, fuBit);
-+ }
-+ }
-+ }
-+
-+ return (newop + ((uint8_t)cr << 6));
-+}
-+
-+// Condition code computation: turn Condition + CR into BO|BI.
-+// May emit mcrxrx for XER-mediated conditions.
-+uint16_t Assembler::computeConditionCode(Condition op, CRegisterID cr) {
-+ uint32_t newop = (uint32_t)op & 255;
-+
-+ if (op & ConditionOnlyXER) {
-+ MOZ_ASSERT(op == Overflow || op == NotOverflow);
-+ if (HasPOWER9()) {
-+ as_mcrxrx(cr);
-+ } else {
-+ // POWER8: read XER, place OV into the GT position of the target
-+ // CR field. Overflow condition (0x1c = GreaterThan) tests GT bit,
-+ // which mcrxrx populates with OV32. For 64-bit ops OV == OV32.
-+ // XER layout in GPR low 32 bits (IBM): bit 0=SO, 1=OV, 2=CA.
-+ // Target: GT position = IBM bit 4*cr+1.
-+ xs_mfxer(r0);
-+ int gtBit = 4 * (int)cr + 1; // GT position in CR field
-+ int sh = (1 - gtBit) & 31; // rotate OV from bit 1 to gtBit
-+ as_rlwinm(r0, r0, sh, gtBit, gtBit); // isolate OV at GT only
-+ as_mtcrf(1 << (7 - (int)cr), r0);
-+ }
-+ newop = (uint32_t)op & 255;
-+ }
-+
-+ return (newop + ((uint8_t)cr << 6));
-+}
-+
-+// Given BO|BI in a 16-bit quantity, split into bit fields for instruction.
-+static uint32_t makeOpMask(uint16_t op) {
-+ MOZ_ASSERT(!(op & 0xfc00));
-+ return ((op & 0x0f) << 21) | ((op & 0xfff0) << 12);
-+}
-+
-+BufferOffset Assembler::as_bc(int16_t off, uint16_t op, LikelyBit lkb,
-+ LinkBit lb) {
-+ spew("bc%s%s\tBO_BI=0x%04x,%d", lb ? "l" : "", lkb ? "+" : "", op, off);
-+ MOZ_ASSERT(!(off & 0x03));
-+ return writeInst(Instruction(PPC_bc | makeOpMask(op) | lkb << 21 |
-+ ((uint16_t)off & 0xfffc) | lb)
-+ .encode());
-+}
-+
-+BufferOffset Assembler::as_bcctr(uint16_t op, LikelyBit lkb, LinkBit lb) {
-+ spew("bcctr%s%s", lb ? "l" : "", lkb ? "+" : "");
-+ return writeInst(PPC_bcctr | makeOpMask(op) | lkb << 21 | lb);
-+}
-+
-+// SPR operations.
-+BufferOffset Assembler::as_mtspr(SPRegisterID spr, Register ra) {
-+ spew("mtspr\t%d,%3s", spr, ra.name());
-+ return writeInst(PPC_mtspr | ra.code() << 21 | PPC_SPR(spr));
-+}
-+
-+BufferOffset Assembler::as_mfspr(Register rd, SPRegisterID spr) {
-+ spew("mfspr\t%3s,%d", rd.name(), spr);
-+ return writeInst(PPC_mfspr | rd.code() << 21 | PPC_SPR(spr));
-+}
-+
-+// CR operations.
-+#define DEF_CRCR(op) \
-+ BufferOffset Assembler::as_##op(uint8_t t, uint8_t a, uint8_t b) { \
-+ spew(#op "\t%d,%d,%d", t, a, b); \
-+ return writeInst(PPC_##op | t << 21 | a << 16 | b << 11); \
-+ }
-+DEF_CRCR(crandc)
-+DEF_CRCR(cror)
-+DEF_CRCR(crorc)
-+#undef DEF_CRCR
-+
-+BufferOffset Assembler::as_mtcrf(uint32_t mask, Register rs) {
-+ spew("mtcrf\t%d,%3s", mask, rs.name());
-+ return writeInst(PPC_mtcrf | rs.code() << 21 | mask << 12);
-+}
-+
-+BufferOffset Assembler::as_mfocrf(Register rd, CRegisterID crfs) {
-+ spew("mfocrf\t%3s,cr%d", rd.name(), crfs);
-+ // FXM is a one-hot 8-bit mask at bits 12-19. Bit (7-crfs) selects the CR.
-+ return writeInst(PPC_mfocrf | rd.code() << 21 | (1 << (7 - crfs)) << 12);
-+}
-+
-+BufferOffset Assembler::as_mcrxrx(CRegisterID cr) {
-+ spew("mcrxrx\tcr%d", cr);
-+ return writeInst(PPC_mcrxrx | cr << 23);
-+}
-+
-+// GPR neg.
-+BufferOffset Assembler::as_neg(Register rd, Register rs) {
-+ spew("neg\t%3s,%3s", rd.name(), rs.name());
-+ return writeInst(InstReg(PPC_neg, rd, rs, r0).encode());
-+}
-+
-+// Compare instructions.
-+BufferOffset Assembler::as_cmpd(CRegisterID cr, Register ra, Register rb) {
-+ spew("cmpd\tcr%d,%3s,%3s", cr, ra.name(), rb.name());
-+ return writeInst(PPC_cmpd | cr << 23 | ra.code() << 16 | rb.code() << 11);
-+}
-+
-+BufferOffset Assembler::as_cmpdi(CRegisterID cr, Register ra, int16_t im) {
-+ spew("cmpdi\tcr%d,%3s,%d", cr, ra.name(), im);
-+ return writeInst(PPC_cmpdi | cr << 23 | ra.code() << 16 |
-+ ((uint16_t)im & 0xffff));
-+}
-+
-+BufferOffset Assembler::as_cmpld(CRegisterID cr, Register ra, Register rb) {
-+ spew("cmpld\tcr%d,%3s,%3s", cr, ra.name(), rb.name());
-+ return writeInst(PPC_cmpld | cr << 23 | ra.code() << 16 | rb.code() << 11);
-+}
-+
-+BufferOffset Assembler::as_cmpldi(CRegisterID cr, Register ra, int16_t im) {
-+ spew("cmpldi\tcr%d,%3s,%d", cr, ra.name(), im);
-+ return writeInst(PPC_cmpldi | cr << 23 | ra.code() << 16 |
-+ ((uint16_t)im & 0xffff));
-+}
-+
-+BufferOffset Assembler::as_cmpw(CRegisterID cr, Register ra, Register rb) {
-+ spew("cmpw\tcr%d,%3s,%3s", cr, ra.name(), rb.name());
-+ return writeInst(PPC_cmpw | cr << 23 | ra.code() << 16 | rb.code() << 11);
-+}
-+
-+BufferOffset Assembler::as_cmpwi(CRegisterID cr, Register ra, int16_t im) {
-+ spew("cmpwi\tcr%d,%3s,%d", cr, ra.name(), im);
-+ return writeInst(PPC_cmpwi | cr << 23 | ra.code() << 16 |
-+ ((uint16_t)im & 0xffff));
-+}
-+
-+BufferOffset Assembler::as_cmplw(CRegisterID cr, Register ra, Register rb) {
-+ spew("cmplw\tcr%d,%3s,%3s", cr, ra.name(), rb.name());
-+ return writeInst(PPC_cmplw | cr << 23 | ra.code() << 16 | rb.code() << 11);
-+}
-+
-+BufferOffset Assembler::as_cmplwi(CRegisterID cr, Register ra, int16_t im) {
-+ spew("cmplwi\tcr%d,%3s,%d", cr, ra.name(), im);
-+ return writeInst(PPC_cmplwi | cr << 23 | ra.code() << 16 |
-+ ((uint16_t)im & 0xffff));
-+}
-+
-+// Compare instructions (cr0 implicit).
-+BufferOffset Assembler::as_cmpd(Register ra, Register rb) {
-+ spew("cmpd\t%3s,%3s", ra.name(), rb.name());
-+ return writeInst(PPC_cmpd | ra.code() << 16 | rb.code() << 11);
-+}
-+
-+BufferOffset Assembler::as_cmpdi(Register ra, int16_t im) {
-+ spew("cmpdi\t%3s,%d", ra.name(), im);
-+ return writeInst(PPC_cmpdi | ra.code() << 16 | ((uint16_t)im & 0xffff));
-+}
-+
-+BufferOffset Assembler::as_cmpld(Register ra, Register rb) {
-+ spew("cmpld\t%3s,%3s", ra.name(), rb.name());
-+ return writeInst(PPC_cmpld | ra.code() << 16 | rb.code() << 11);
-+}
-+
-+BufferOffset Assembler::as_cmpldi(Register ra, int16_t im) {
-+ spew("cmpldi\t%3s,%d", ra.name(), im);
-+ return writeInst(PPC_cmpldi | ra.code() << 16 | ((uint16_t)im & 0xffff));
-+}
-+
-+BufferOffset Assembler::as_cmpw(Register ra, Register rb) {
-+ spew("cmpw\t%3s,%3s", ra.name(), rb.name());
-+ return writeInst(PPC_cmpw | ra.code() << 16 | rb.code() << 11);
-+}
-+
-+BufferOffset Assembler::as_cmpwi(Register ra, int16_t im) {
-+ spew("cmpwi\t%3s,%d", ra.name(), im);
-+ return writeInst(PPC_cmpwi | ra.code() << 16 | ((uint16_t)im & 0xffff));
-+}
-+
-+BufferOffset Assembler::as_cmplw(Register ra, Register rb) {
-+ spew("cmplw\t%3s,%3s", ra.name(), rb.name());
-+ return writeInst(PPC_cmplw | ra.code() << 16 | rb.code() << 11);
-+}
-+
-+BufferOffset Assembler::as_cmplwi(Register ra, int16_t im) {
-+ spew("cmplwi\t%3s,%d", ra.name(), im);
-+ return writeInst(PPC_cmplwi | ra.code() << 16 | ((uint16_t)im & 0xffff));
-+}
-+
-+// FP encoding helpers.
-+static uint32_t AForm(uint32_t op, FloatRegister frt, FloatRegister fra,
-+ FloatRegister frb, FloatRegister frc, bool rc) {
-+ return (op | (frt.encoding() << 21) | (fra.encoding() << 16) |
-+ (frb.encoding() << 11) | (frc.encoding() << 6) | rc);
-+}
-+
-+static uint32_t XForm(uint32_t op, FloatRegister frt, FloatRegister fra,
-+ FloatRegister frb, bool rc) {
-+ return (op | (frt.encoding() << 21) | (fra.encoding() << 16) |
-+ (frb.encoding() << 11) | rc);
-+}
-+
-+static uint32_t XForm(uint32_t op, FloatRegister frt, Register ra, Register rb,
-+ bool rc) {
-+ return (op | (frt.encoding() << 21) | (ra.code() << 16) | (rb.code() << 11) |
-+ rc);
-+}
-+
-+static uint32_t DForm(uint32_t op, FloatRegister frt, Register ra,
-+ int16_t imm) {
-+ return (op | (frt.encoding() << 21) | (ra.code() << 16) |
-+ ((uint16_t)imm & 0xffff));
-+}
-+
-+// XX-form encoders. Each form has its own X-bit positions.
-+// All take uint32_t encodings (0-63) so they correctly
-+// emit the high bit for VSR32-63. FloatRegister.encoding() returns 0-31
-+// for Single/Double (= VSR0-31 = FPR namespace) and 32-63 for Simd128
-+// (= VSR32-63 = VR namespace) — so a single XX-form encoder addresses
-+// the full VSR space.
-+
-+// XX1-form: T + GPR (RA) + GPR (RB). TX bit at instruction bit 0.
-+// Used by lxvx, stxvx, lxvd2x, stxvd2x, mtvsrdd, mtvsrd, mtvsrws, mtvsrwz.
-+static uint32_t XX1Form(uint32_t op, uint32_t xt, uint32_t ra, uint32_t rb) {
-+ return op | (xt & 31) << 21 | (ra & 31) << 16 | (rb & 31) << 11 |
-+ ((xt >> 5) & 1);
-+}
-+
-+// XX1-form for mfvsrX: GPR (RT) + VSR (XS). TX bit ("SX") at instruction
-+// bit 0; the X spec calls this SX since the source register is the VSR.
-+// Used by mfvsrd, mfvsrld.
-+static uint32_t XX1FormMfvsr(uint32_t op, uint32_t rt, uint32_t xs) {
-+ return op | (xs & 31) << 21 | (rt & 31) << 16 | ((xs >> 5) & 1);
-+}
-+
-+// XX2-form: T + B (no A field; bits 16-20 unused or hold a UIM). BX bit
-+// at instruction bit 1, TX bit at instruction bit 0. The bits16-20 slot
-+// is set by callers — for plain XX2 it must be 0, for XX2 with UIM it
-+// holds the immediate.
-+// Used by xxbrd, xxbrh, xxbrw, xxbrq, xscvdpsp, xscvspdp, xscvdpspn,
-+// xscvspdpn, xxspltw (UIM=2 bits), xxinsertw (UIM=4 bits),
-+// xxextractuw (UIM=4 bits), xvabs*/xvneg*/xvsqrt*/xvr* etc. via
-+// DEF_VSX_UN.
-+static uint32_t XX2Form(uint32_t op, uint32_t xt, uint32_t xb,
-+ uint32_t bits16to20 = 0) {
-+ return op | (xt & 31) << 21 | (bits16to20 & 31) << 16 | (xb & 31) << 11 |
-+ ((xb >> 5) & 1) << 1 | ((xt >> 5) & 1);
-+}
-+
-+// XX3-form: T + A + B. AX/BX/TX bits at instruction bits 2/1/0.
-+// Used by xxlor, xxland, xxlxor, xxlnor, xxlandc, xxpermdi, xsmaxjdp,
-+// xsminjdp, xvadd*, xvcmp*, etc.
-+static uint32_t XX3Form(uint32_t op, uint32_t xt, uint32_t xa, uint32_t xb) {
-+ return op | (xt & 31) << 21 | (xa & 31) << 16 | (xb & 31) << 11 |
-+ ((xa >> 5) & 1) << 2 | ((xb >> 5) & 1) << 1 | ((xt >> 5) & 1);
-+}
-+
-+// XX4-form: T + A + B + C. CX/AX/BX/TX bits at instruction bits 3/2/1/0.
-+// Used by xxsel.
-+static uint32_t XX4Form(uint32_t op, uint32_t xt, uint32_t xa, uint32_t xb,
-+ uint32_t xc) {
-+ return op | (xt & 31) << 21 | (xa & 31) << 16 | (xb & 31) << 11 |
-+ (xc & 31) << 6 | ((xc >> 5) & 1) << 3 | ((xa >> 5) & 1) << 2 |
-+ ((xb >> 5) & 1) << 1 | ((xt >> 5) & 1);
-+}
-+
-+// FloatRegister convenience overload for XX3Form (the most common form).
-+static uint32_t XX3Form(uint32_t op, FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb) {
-+ return XX3Form(op, uint32_t(xt.encoding()), uint32_t(xa.encoding()),
-+ uint32_t(xb.encoding()));
-+}
-+
-+// --- Macro-defined instruction emitters ---
-+
-+// X-form: rd in bits 21-25, ra in 16-20, rb in 11-15.
-+#define DEF_XFORM(op) \
-+ BufferOffset Assembler::as_##op(Register rd, Register ra, Register rb) { \
-+ spew(#op "\t%3s,%3s,%3s", rd.name(), ra.name(), rb.name()); \
-+ return writeInst(InstReg(PPC_##op, rd, ra, rb).encode()); \
-+ }
-+
-+#define DEF_XFORM_RC(op) \
-+ BufferOffset Assembler::as_##op##_rc(Register rd, Register ra, \
-+ Register rb) { \
-+ spew(#op ".\t%3s,%3s,%3s", rd.name(), ra.name(), rb.name()); \
-+ return writeInst(InstReg(PPC_##op, rd, ra, rb).encode() | 0x1); \
-+ }
-+
-+// X-form with swapped RS/RA encoding: rs in bits 21-25, ra in 16-20.
-+#define DEF_XFORMS(op) \
-+ BufferOffset Assembler::as_##op(Register rd, Register ra, Register rb) { \
-+ spew(#op "\t%3s,%3s,%3s", rd.name(), ra.name(), rb.name()); \
-+ return writeInst(InstReg(PPC_##op, ra, rd, rb).encode()); \
-+ }
-+
-+#define DEF_XFORMS_RC(op) \
-+ BufferOffset Assembler::as_##op##_rc(Register rd, Register ra, \
-+ Register rb) { \
-+ spew(#op ".\t%3s,%3s,%3s", rd.name(), ra.name(), rb.name()); \
-+ return writeInst(InstReg(PPC_##op, ra, rd, rb).encode() | 0x1); \
-+ }
-+
-+// X-form shift immediate with swapped encoding.
-+#define DEF_XFORMS_I(op) \
-+ BufferOffset Assembler::as_##op(Register rd, Register ra, uint8_t sh) { \
-+ spew(#op "\t%3s,%3s,%d", rd.name(), ra.name(), sh); \
-+ MOZ_ASSERT(sh < 32); \
-+ return writeInst(PPC_##op | ra.code() << 21 | rd.code() << 16 | sh << 11); \
-+ }
-+
-+// 2-reg X-form: rd in bits 21-25, ra in 16-20, rb=r0.
-+#define DEF_XFORM2(op) \
-+ BufferOffset Assembler::as_##op(Register rd, Register ra) { \
-+ spew(#op "\t%3s,%3s", rd.name(), ra.name()); \
-+ return writeInst(InstReg(PPC_##op, rd, ra, r0).encode()); \
-+ }
-+
-+#define DEF_XFORM2_RC(op) \
-+ BufferOffset Assembler::as_##op##_rc(Register rd, Register ra) { \
-+ spew(#op ".\t%3s,%3s", rd.name(), ra.name()); \
-+ return writeInst(InstReg(PPC_##op, rd, ra, r0).encode() | 0x1); \
-+ }
-+
-+// 2-reg X-form swapped: ra in bits 21-25, rd in 16-20.
-+#define DEF_XFORM2S(op) \
-+ BufferOffset Assembler::as_##op(Register rd, Register ra) { \
-+ spew(#op "\t%3s,%3s", rd.name(), ra.name()); \
-+ return writeInst(InstReg(PPC_##op, ra, rd, r0).encode()); \
-+ }
-+
-+#define DEF_XFORM2S_RC(op) \
-+ BufferOffset Assembler::as_##op##_rc(Register rd, Register ra) { \
-+ spew(#op ".\t%3s,%3s", rd.name(), ra.name()); \
-+ return writeInst(InstReg(PPC_##op, ra, rd, r0).encode() | 0x1); \
-+ }
-+
-+// D-form load/store: rd=RT, rb=RA (base register), off=displacement.
-+// r0 cannot be used as base register for D-form loads/stores.
-+#define DEF_DFORM(op) \
-+ BufferOffset Assembler::as_##op(Register rd, Register rb, int16_t off) { \
-+ spew(#op "\t%3s,%d(%3s)", rd.name(), off, rb.name()); \
-+ MOZ_ASSERT(rb != r0); \
-+ return writeInst(InstImm(PPC_##op, rd, rb, off).encode()); \
-+ }
-+
-+// D-form with swapped RS/RA encoding for logical immediates.
-+#define DEF_DFORMS(op) \
-+ BufferOffset Assembler::as_##op(Register rd, Register ra, uint16_t im) { \
-+ spew(#op "\t%3s,%3s,%d", rd.name(), ra.name(), im); \
-+ return writeInst(InstImm(PPC_##op, ra, rd, im).encode()); \
-+ }
-+
-+// M-form: rotate with 3 registers + mb + me.
-+#define DEF_MFORM(op) \
-+ BufferOffset Assembler::as_##op(Register rd, Register rs, Register rb, \
-+ uint8_t mb, uint8_t me) { \
-+ spew(#op "\t%3s,%3s,%3s,%d,%d", rd.name(), rs.name(), rb.name(), mb, me); \
-+ MOZ_ASSERT(mb < 32); \
-+ MOZ_ASSERT(me < 32); \
-+ return writeInst(PPC_##op | rs.code() << 21 | rd.code() << 16 | \
-+ rb.code() << 11 | mb << 6 | me << 1); \
-+ }
-+
-+// M-form with immediate shift.
-+#define DEF_MFORM_I(op) \
-+ BufferOffset Assembler::as_##op(Register rd, Register rs, uint8_t sh, \
-+ uint8_t mb, uint8_t me) { \
-+ spew(#op "\t%3s,%3s,%d,%d,%d", rd.name(), rs.name(), sh, mb, me); \
-+ MOZ_ASSERT(sh < 32); \
-+ MOZ_ASSERT(mb < 32); \
-+ MOZ_ASSERT(me < 32); \
-+ return writeInst(PPC_##op | rs.code() << 21 | rd.code() << 16 | sh << 11 | \
-+ mb << 6 | me << 1); \
-+ }
-+
-+#define DEF_MFORM_I_RC(op) \
-+ BufferOffset Assembler::as_##op##_rc(Register rd, Register rs, uint8_t sh, \
-+ uint8_t mb, uint8_t me) { \
-+ spew(#op ".\t%3s,%3s,%d,%d,%d", rd.name(), rs.name(), sh, mb, me); \
-+ MOZ_ASSERT(sh < 32); \
-+ MOZ_ASSERT(mb < 32); \
-+ MOZ_ASSERT(me < 32); \
-+ return writeInst(PPC_##op | rs.code() << 21 | rd.code() << 16 | sh << 11 | \
-+ mb << 6 | me << 1 | 1); \
-+ }
-+
-+// MDS-form: rotate with register + mb (64-bit).
-+#define DEF_MDSFORM(op) \
-+ BufferOffset Assembler::as_##op(Register ra, Register rs, Register rb, \
-+ uint8_t mb) { \
-+ spew(#op "\t%3s,%3s,%3s,%d", ra.name(), rs.name(), rb.name(), mb); \
-+ MOZ_ASSERT(mb < 64); \
-+ return writeInst(PPC_##op | rs.code() << 21 | ra.code() << 16 | \
-+ rb.code() << 11 | ((mb & 0x1f) << 6) | (mb & 0x20)); \
-+ }
-+
-+#define DEF_MDSFORM_RC(op) \
-+ BufferOffset Assembler::as_##op##_rc(Register ra, Register rs, Register rb, \
-+ uint8_t mb) { \
-+ spew(#op ".\t%3s,%3s,%3s,%d", ra.name(), rs.name(), rb.name(), mb); \
-+ MOZ_ASSERT(mb < 64); \
-+ return writeInst(PPC_##op | rs.code() << 21 | ra.code() << 16 | \
-+ rb.code() << 11 | ((mb & 0x1f) << 6) | (mb & 0x20) | 1); \
-+ }
-+
-+// MD-form: rotate/shift with immediate sh + mb (64-bit).
-+// sh and mb are 6-bit fields split across the instruction word.
-+#define DEF_MDFORM(op) \
-+ BufferOffset Assembler::as_##op(Register ra, Register rs, uint8_t sh, \
-+ uint8_t mb) { \
-+ spew(#op "\t%3s,%3s,%d,%d", ra.name(), rs.name(), sh, mb); \
-+ MOZ_ASSERT(sh < 64); \
-+ MOZ_ASSERT(mb < 64); \
-+ return writeInst(PPC_##op | rs.code() << 21 | ra.code() << 16 | \
-+ ((sh & 0x1f) << 11) | ((mb & 0x1f) << 6) | (mb & 0x20) | \
-+ ((sh & 0x20) >> 4)); \
-+ }
-+
-+#define DEF_MDFORM_RC(op) \
-+ BufferOffset Assembler::as_##op##_rc(Register ra, Register rs, uint8_t sh, \
-+ uint8_t mb) { \
-+ spew(#op ".\t%3s,%3s,%d,%d", ra.name(), rs.name(), sh, mb); \
-+ MOZ_ASSERT(sh < 64); \
-+ MOZ_ASSERT(mb < 64); \
-+ return writeInst(PPC_##op | rs.code() << 21 | ra.code() << 16 | \
-+ ((sh & 0x1f) << 11) | ((mb & 0x1f) << 6) | (mb & 0x20) | \
-+ ((sh & 0x20) >> 4) | 0x01); \
-+ }
-+
-+// FP 2-reg X-form: frt in bits 21-25, fra=f0, frb in 11-15.
-+#define DEF_XFORM2_F(op) \
-+ BufferOffset Assembler::as_##op(FloatRegister rd, FloatRegister ra) { \
-+ spew(#op "\t%3s,%3s", rd.name(), ra.name()); \
-+ return writeInst(XForm(PPC_##op, rd, f0, ra, false)); \
-+ }
-+
-+#define DEF_XFORM2_F_RC(op) \
-+ BufferOffset Assembler::as_##op##_rc(FloatRegister rd, FloatRegister ra) { \
-+ spew(#op ".\t%3s,%3s", rd.name(), ra.name()); \
-+ return writeInst(XForm(PPC_##op, rd, f0, ra, true)); \
-+ }
-+
-+// FP A-form with frc (fmul-type): frt, fra, frc; frb=f0.
-+#define DEF_AFORM_C(op) \
-+ BufferOffset Assembler::as_##op(FloatRegister rd, FloatRegister ra, \
-+ FloatRegister rc) { \
-+ spew(#op "\t%3s,%3s,%3s", rd.name(), ra.name(), rc.name()); \
-+ return writeInst(AForm(PPC_##op, rd, ra, f0, rc, false)); \
-+ }
-+
-+#define DEF_AFORM_C_RC(op) \
-+ BufferOffset Assembler::as_##op##_rc(FloatRegister rd, FloatRegister ra, \
-+ FloatRegister rc) { \
-+ spew(#op ".\t%3s,%3s,%3s", rd.name(), ra.name(), rc.name()); \
-+ return writeInst(AForm(PPC_##op, rd, ra, f0, rc, true)); \
-+ }
-+
-+// FP A-form with frb (fadd-type): frt, fra, frb; frc=f0.
-+#define DEF_AFORM_B(op) \
-+ BufferOffset Assembler::as_##op(FloatRegister rd, FloatRegister ra, \
-+ FloatRegister rb) { \
-+ spew(#op "\t%3s,%3s,%3s", rd.name(), ra.name(), rb.name()); \
-+ return writeInst(AForm(PPC_##op, rd, ra, rb, f0, false)); \
-+ }
-+
-+#define DEF_AFORM_B_RC(op) \
-+ BufferOffset Assembler::as_##op##_rc(FloatRegister rd, FloatRegister ra, \
-+ FloatRegister rb) { \
-+ spew(#op ".\t%3s,%3s,%3s", rd.name(), ra.name(), rb.name()); \
-+ return writeInst(AForm(PPC_##op, rd, ra, rb, f0, true)); \
-+ }
-+
-+// Full FP A-form: frt, fra, frc, frb (fmadd-type).
-+#define DEF_AFORM(op) \
-+ BufferOffset Assembler::as_##op(FloatRegister rd, FloatRegister ra, \
-+ FloatRegister rc, FloatRegister rb) { \
-+ spew(#op "\t%3s,%3s,%3s,%3s", rd.name(), ra.name(), rc.name(), rb.name()); \
-+ return writeInst(AForm(PPC_##op, rd, ra, rb, rc, false)); \
-+ }
-+
-+#define DEF_AFORM_RC(op) \
-+ BufferOffset Assembler::as_##op##_rc(FloatRegister rd, FloatRegister ra, \
-+ FloatRegister rc, FloatRegister rb) { \
-+ spew(#op ".\t%3s,%3s,%3s,%3s", rd.name(), ra.name(), rc.name(), \
-+ rb.name()); \
-+ return writeInst(AForm(PPC_##op, rd, ra, rb, rc, true)); \
-+ }
-+
-+// FP D-form load/store.
-+#define DEF_DFORM_F(op) \
-+ BufferOffset Assembler::as_##op(FloatRegister rd, Register rb, \
-+ int16_t off) { \
-+ spew(#op "\t%3s,%d(%3s)", rd.name(), off, rb.name()); \
-+ MOZ_ASSERT(rb != r0); \
-+ return writeInst(DForm(PPC_##op, rd, rb, off)); \
-+ }
-+
-+// FP X-form indexed load/store.
-+#define DEF_FMEMx(op) \
-+ BufferOffset Assembler::as_##op(FloatRegister rd, Register ra, \
-+ Register rb) { \
-+ spew(#op "\t%3s,%3s,%3s", rd.name(), ra.name(), rb.name()); \
-+ return writeInst(XForm(PPC_##op, rd, ra, rb, false)); \
-+ }
-+
-+// --- Rotate/shift instructions ---
-+
-+DEF_MFORM(rlwnm)
-+DEF_MFORM_I(rlwinm)
-+DEF_MFORM_I_RC(rlwinm)
-+DEF_MFORM_I(rlwimi)
-+DEF_XFORMS_I(srawi)
-+
-+DEF_MDSFORM(rldcl)
-+DEF_MDFORM(rldicl)
-+DEF_MDFORM_RC(rldicl)
-+DEF_MDFORM(rldicr)
-+DEF_MDFORM_RC(rldicr)
-+DEF_MDFORM(rldimi)
-+
-+BufferOffset Assembler::as_sradi(Register rd, Register rs, int sh) {
-+ spew("sradi\t%3s,%3s,%d", rd.name(), rs.name(), sh);
-+ MOZ_ASSERT(sh >= 0 && sh < 64);
-+ return writeInst(PPC_sradi | rd.code() << 16 | rs.code() << 21 |
-+ (sh & 0x1f) << 11 | (sh & 0x20) >> 4);
-+}
-+
-+// --- ALU three-register ---
-+
-+#define DEF_ALU2(op) DEF_XFORM(op)
-+
-+DEF_ALU2(add)
-+DEF_ALU2(addc)
-+DEF_ALU2(adde)
-+DEF_ALU2(subf)
-+DEF_ALU2(subfc)
-+DEF_ALU2(subfe)
-+DEF_ALU2(divd)
-+DEF_ALU2(divdu)
-+DEF_ALU2(divw)
-+DEF_ALU2(divwu)
-+// POWER9 modulo (XO-form, same encoding pattern as div).
-+DEF_XFORM(modsd)
-+DEF_XFORM(modsw)
-+DEF_XFORM(modud)
-+DEF_XFORM(moduw)
-+DEF_ALU2(mulld)
-+DEF_ALU2(mulhd)
-+DEF_ALU2(mulhdu)
-+DEF_ALU2(mulldo)
-+DEF_ALU2(mullw)
-+DEF_ALU2(mulhwu)
-+#undef DEF_ALU2
-+
-+// --- ALU immediate ---
-+
-+// D-form ALU-immediate ops have no Rc bit at instruction LSB (that bit
-+// is part of the 16-bit immediate). The only valid record-form variant
-+// in this group is `addic.`, which is a separate primary opcode (13)
-+// hand-written below; subfic and mulli have no record form at all.
-+#define DEF_ALUI(op) \
-+ BufferOffset Assembler::as_##op(Register rd, Register ra, int16_t im) { \
-+ spew(#op "\t%3s,%3s,%d", rd.name(), ra.name(), im); \
-+ return writeInst(InstImm(PPC_##op, rd, ra, im).encode()); \
-+ }
-+
-+BufferOffset Assembler::as_addi(Register rd, Register ra, int16_t im,
-+ bool actually_li) {
-+#ifdef DEBUG
-+ if (actually_li) {
-+ spew("li\t%3s,%d", rd.name(), im);
-+ } else {
-+ MOZ_ASSERT(ra != r0);
-+ spew("addi\t%3s,%3s,%d", rd.name(), ra.name(), im);
-+ }
-+#endif
-+ return writeInst(InstImm(PPC_addi, rd, ra, im).encode());
-+}
-+
-+BufferOffset Assembler::as_addis(Register rd, Register ra, int16_t im,
-+ bool actually_lis) {
-+#ifdef DEBUG
-+ if (actually_lis) {
-+ spew("lis\t%3s,%d", rd.name(), im);
-+ } else {
-+ MOZ_ASSERT(ra != r0);
-+ spew("addis\t%3s,%3s,%d", rd.name(), ra.name(), im);
-+ }
-+#endif
-+ return writeInst(InstImm(PPC_addis, rd, ra, im).encode());
-+}
-+
-+DEF_ALUI(mulli)
-+DEF_ALUI(subfic)
-+#undef DEF_ALUI
-+
-+// --- ALU unary/extended ---
-+
-+
-+#define DEF_ALUE_S(op) DEF_XFORM2S(op)
-+DEF_ALUE_S(cntlzw)
-+DEF_ALUE_S(cntlzd)
-+DEF_ALUE_S(cnttzd)
-+DEF_ALUE_S(cnttzw)
-+#undef DEF_ALUE_S
-+
-+DEF_XFORM2S(popcntd)
-+DEF_XFORM2S(popcntw)
-+DEF_XFORM2S(brd) // POWER10
-+DEF_XFORM2S(brh) // POWER10
-+DEF_XFORM2S(brw) // POWER10
-+
-+// --- Bitwise logical (three-register) ---
-+
-+#define DEF_BITALU2(op) DEF_XFORMS(op)
-+DEF_BITALU2(nor)
-+DEF_BITALU2(slw)
-+DEF_BITALU2(srw)
-+DEF_BITALU2(sraw)
-+DEF_BITALU2(sld)
-+DEF_BITALU2(srd)
-+DEF_BITALU2(srad)
-+#undef DEF_BITALU2
-+
-+// and_, or_, xor_ are manually defined (trailing underscore to avoid C++
-+// keyword conflicts). xs_mr delegates to as_or_ so we must not assert
-+// rd==rs==rb in as_or_ (which would be a valid mr).
-+BufferOffset Assembler::as_or_(Register rd, Register rs, Register rb) {
-+ spew("or\t%3s,%3s,%3s", rd.name(), rs.name(), rb.name());
-+ return writeInst(InstReg(PPC_or_, rs, rd, rb).encode());
-+}
-+
-+BufferOffset Assembler::as_xor_(Register rd, Register rs, Register rb) {
-+ spew("xor\t%3s,%3s,%3s", rd.name(), rs.name(), rb.name());
-+ return writeInst(InstReg(PPC_xor_, rs, rd, rb).encode());
-+}
-+
-+BufferOffset Assembler::as_and_(Register rd, Register rs, Register rb) {
-+ spew("and\t%3s,%3s,%3s", rd.name(), rs.name(), rb.name());
-+ return writeInst(InstReg(PPC_and_, rs, rd, rb).encode());
-+}
-+
-+BufferOffset Assembler::as_and__rc(Register rd, Register rs, Register rb) {
-+ spew("and.\t%3s,%3s,%3s", rd.name(), rs.name(), rb.name());
-+ return writeInst(InstReg(PPC_and_, rs, rd, rb).encode() | 0x1);
-+}
-+
-+// --- Bitwise logical (immediate) ---
-+
-+DEF_DFORMS(ori)
-+DEF_DFORMS(oris)
-+DEF_DFORMS(xori)
-+DEF_DFORMS(xoris)
-+
-+BufferOffset Assembler::as_andi_rc(Register rd, Register ra, uint16_t im) {
-+ spew("andi.\t%3s,%3s,%d", rd.name(), ra.name(), im);
-+ return writeInst(InstImm(PPC_andi_dot, ra, rd, im).encode());
-+}
-+
-+// --- Sign extension ---
-+
-+#define DEF_ALUEXT(op) DEF_XFORM2S(op) DEF_XFORM2S_RC(op)
-+DEF_XFORM2S(extsb)
-+DEF_XFORM2S(extsh)
-+DEF_ALUEXT(extsw)
-+#undef DEF_ALUEXT
-+
-+// --- Integer loads (D-form) ---
-+
-+DEF_DFORM(lbz)
-+DEF_DFORM(lha)
-+DEF_DFORM(lhz)
-+
-+BufferOffset Assembler::as_lwa(Register rd, Register rb, int16_t off) {
-+ spew("lwa\t%3s,%d(%3s)", rd.name(), off, rb.name());
-+ MOZ_ASSERT(rb != r0);
-+ MOZ_ASSERT(!(off & 0x03));
-+ return writeInst(InstImm(PPC_lwa, rd, rb, off).encode());
-+}
-+
-+DEF_DFORM(lwz)
-+
-+BufferOffset Assembler::as_ld(Register rd, Register rb, int16_t off) {
-+ spew("ld\t%3s,%d(%3s)", rd.name(), off, rb.name());
-+ MOZ_ASSERT(rb != r0);
-+ MOZ_ASSERT(!(off & 0x03));
-+ return writeInst(InstImm(PPC_ld, rd, rb, off).encode());
-+}
-+
-+// --- Integer stores (D-form) ---
-+
-+DEF_DFORM(stb)
-+DEF_DFORM(sth)
-+DEF_DFORM(stw)
-+
-+BufferOffset Assembler::as_std(Register rd, Register rb, int16_t off) {
-+ spew("std\t%3s,%d(%3s)", rd.name(), off, rb.name());
-+ MOZ_ASSERT(rb != r0);
-+ MOZ_ASSERT(!(off & 0x03));
-+ return writeInst(InstImm(PPC_std, rd, rb, off).encode());
-+}
-+
-+DEF_DFORM(stdu)
-+
-+#undef DEF_DFORM
-+#undef DEF_DFORMS
-+
-+// --- Integer loads/stores (X-form, indexed) ---
-+
-+#define DEF_MEMx(op) DEF_XFORM(op)
-+DEF_MEMx(lbzx) DEF_MEMx(lhax) DEF_MEMx(lhzx) DEF_MEMx(lwax)
-+ DEF_MEMx(lwzx) DEF_MEMx(lwarx) DEF_MEMx(lbarx)
-+ DEF_MEMx(lharx) DEF_MEMx(ldx) DEF_MEMx(ldarx) DEF_MEMx(stbx)
-+ DEF_MEMx(stbcx) DEF_MEMx(stwx) DEF_MEMx(stwbrx) DEF_MEMx(sthx)
-+ DEF_MEMx(sthcx) DEF_MEMx(stdx) DEF_MEMx(stdcx)
-+ DEF_MEMx(stwcx)
-+#undef DEF_MEMx
-+
-+// --- Integer select ---
-+
-+BufferOffset Assembler::as_isel(Register rt, Register ra, Register rb,
-+ uint16_t bc, CRegisterID cr) {
-+ MOZ_ASSERT(ra != r0);
-+ return as_isel0(rt, ra, rb, bc, cr);
-+}
-+
-+BufferOffset Assembler::as_isel0(Register rt, Register ra, Register rb,
-+ uint16_t bc, CRegisterID cr) {
-+ spew("isel\t%3s,%3s,%3s,cr%d:0x%02x", rt.name(), ra.name(), rb.name(), cr,
-+ bc);
-+ MOZ_ASSERT((bc < 0x40) && ((bc & 0x0f) == 0x0c));
-+ uint16_t nbc = (bc >> 4) + (cr << 2);
-+ return writeInst(PPC_isel | rt.code() << 21 | ra.code() << 16 |
-+ rb.code() << 11 | nbc << 6);
-+}
-+
-+BufferOffset Assembler::as_setbc(Register rt, uint16_t bc, CRegisterID cr) {
-+ spew("setbc\t%3s,cr%d:0x%02x", rt.name(), cr, bc);
-+ MOZ_ASSERT((bc < 0x40) && ((bc & 0x0f) == 0x0c));
-+ uint16_t nbc = (bc >> 4) + (cr << 2);
-+ return writeInst(PPC_setbc | (rt.code() << 21) | (nbc << 16));
-+}
-+
-+BufferOffset Assembler::as_setbcr(Register rt, uint16_t bc, CRegisterID cr) {
-+ spew("setbcr\t%3s,cr%d:0x%02x", rt.name(), cr, bc);
-+ MOZ_ASSERT((bc < 0x40) && ((bc & 0x0f) == 0x0c));
-+ uint16_t nbc = (bc >> 4) + (cr << 2);
-+ return writeInst(PPC_setbcr | (rt.code() << 21) | (nbc << 16));
-+}
-+
-+// --- FP compare ---
-+
-+BufferOffset Assembler::as_fcmpu(CRegisterID cr, FloatRegister ra,
-+ FloatRegister rb) {
-+ spew("fcmpu\tcr%d,%3s,%3s", cr, ra.name(), rb.name());
-+ return writeInst(PPC_fcmpu | cr << 23 | ra.encoding() << 16 |
-+ rb.encoding() << 11);
-+}
-+
-+BufferOffset Assembler::as_fcmpu(FloatRegister ra, FloatRegister rb) {
-+ return as_fcmpu(cr0, ra, rb);
-+}
-+
-+// --- FP arithmetic ---
-+
-+#define DEF_FPUAC(op) DEF_AFORM_C(op)
-+DEF_FPUAC(fmul)
-+DEF_FPUAC(fmuls)
-+#undef DEF_FPUAC
-+
-+#define DEF_FPUAB(op) DEF_AFORM_B(op)
-+DEF_FPUAB(fadd)
-+DEF_FPUAB(fdiv)
-+DEF_FPUAB(fsub)
-+DEF_FPUAB(fadds)
-+DEF_FPUAB(fdivs)
-+DEF_FPUAB(fsubs)
-+DEF_FPUAB(fcpsgn)
-+#undef DEF_FPUAB
-+
-+// --- FP unary/conversion/rounding ---
-+
-+#define DEF_FPUDS(op) DEF_XFORM2_F(op)
-+DEF_FPUDS(fabs)
-+DEF_FPUDS(fneg)
-+DEF_FPUDS(fmr)
-+DEF_FPUDS(fcfid)
-+DEF_FPUDS(fcfids)
-+DEF_FPUDS(fcfidu)
-+DEF_FPUDS(fcfidus)
-+DEF_FPUDS(fctid)
-+DEF_FPUDS(fctidz)
-+DEF_FPUDS(fctiduz)
-+DEF_FPUDS(fctiwz)
-+DEF_FPUDS(frim)
-+DEF_FPUDS(frip)
-+DEF_FPUDS(friz)
-+DEF_FPUDS(frsp)
-+DEF_FPUDS(fsqrt)
-+DEF_FPUDS(fsqrts)
-+#undef DEF_FPUDS
-+
-+// --- FP loads/stores (D-form) ---
-+
-+DEF_DFORM_F(lfd)
-+DEF_DFORM_F(lfs)
-+DEF_DFORM_F(stfd)
-+DEF_DFORM_F(stfs)
-+DEF_DFORM_F(stfdu)
-+DEF_DFORM_F(stfsu)
-+
-+// --- FP loads/stores (X-form, indexed) ---
-+
-+DEF_FMEMx(lfdx) DEF_FMEMx(lfsx) DEF_FMEMx(lfiwax)
-+ DEF_FMEMx(stfdx) DEF_FMEMx(stfsx)
-+// Clean up macros.
-+#undef DEF_XFORM
-+#undef DEF_XFORM_RC
-+#undef DEF_XFORMS
-+#undef DEF_XFORMS_RC
-+#undef DEF_XFORMS_I
-+#undef DEF_XFORM2
-+#undef DEF_XFORM2_RC
-+#undef DEF_XFORM2S
-+#undef DEF_XFORM2S_RC
-+#undef DEF_XFORM2_F
-+#undef DEF_XFORM2_F_RC
-+#undef DEF_MFORM
-+#undef DEF_MFORM_I
-+#undef DEF_MFORM_I_RC
-+#undef DEF_MDSFORM
-+#undef DEF_MDSFORM_RC
-+#undef DEF_MDFORM
-+#undef DEF_MDFORM_RC
-+#undef DEF_DFORM_F
-+#undef DEF_FMEMx
-+#undef DEF_AFORM_C
-+#undef DEF_AFORM_C_RC
-+#undef DEF_AFORM_B
-+#undef DEF_AFORM_B_RC
-+#undef DEF_AFORM
-+#undef DEF_AFORM_RC
-+
-+ // --- FPSCR operations ---
-+
-+ BufferOffset Assembler::as_mtfsb0(uint8_t bt) {
-+ spew("mtfsb0\t%d", bt);
-+ return writeInst(PPC_mtfsb0 | (uint32_t)bt << 21);
-+}
-+
-+BufferOffset Assembler::as_mcrfs(CRegisterID bf, uint8_t bfa) {
-+ spew("mcrfs\tcr%d,%d", bf, bfa);
-+ return writeInst(PPC_mcrfs | (uint32_t)bf << 23 | (uint32_t)bfa << 18);
-+}
-+
-+// --- VSX (FPR-only subset) ---
-+
-+BufferOffset Assembler::as_mfvsrd(Register ra, FloatRegister xs) {
-+ spew("mfvsrd\t%3s,%3s", ra.name(), xs.name());
-+ return writeInst(XX1FormMfvsr(PPC_mfvsrd, ra.code(), xs.encoding()));
-+}
-+
-+BufferOffset Assembler::as_mtvsrd(FloatRegister xt, Register ra) {
-+ spew("mtvsrd\t%3s,%3s", xt.name(), ra.name());
-+ return writeInst(XX1Form(PPC_mtvsrd, xt.encoding(), ra.code(), 0));
-+}
-+
-+BufferOffset Assembler::as_mtvsrwa(FloatRegister xt, Register ra) {
-+ spew("mtvsrwa\t%3s,%3s", xt.name(), ra.name());
-+ return writeInst(XX1Form(PPC_mtvsrwa, xt.encoding(), ra.code(), 0));
-+}
-+
-+BufferOffset Assembler::as_mtvsrws(FloatRegister xt, Register ra) {
-+ spew("mtvsrws\t%3s,%3s", xt.name(), ra.name());
-+ return writeInst(XX1Form(PPC_mtvsrws, xt.encoding(), ra.code(), 0));
-+}
-+
-+BufferOffset Assembler::as_mtvsrwz(FloatRegister xt, Register ra) {
-+ spew("mtvsrwz\t%3s,%3s", xt.name(), ra.name());
-+ return writeInst(XX1Form(PPC_mtvsrwz, xt.encoding(), ra.code(), 0));
-+}
-+
-+BufferOffset Assembler::as_xxbrd(FloatRegister xt, FloatRegister xb) {
-+ spew("xxbrd\t%3s,%3s", xt.name(), xb.name());
-+ return writeInst(XX2Form(PPC_xxbrd, xt.encoding(), xb.encoding()));
-+}
-+
-+BufferOffset Assembler::as_xscvdpspn(FloatRegister xt, FloatRegister xb) {
-+ spew("xscvdpspn\t%3s,%3s", xt.name(), xb.name());
-+ return writeInst(XX2Form(PPC_xscvdpspn, xt.encoding(), xb.encoding()));
-+}
-+
-+BufferOffset Assembler::as_xscvspdpn(FloatRegister xt, FloatRegister xb) {
-+ spew("xscvspdpn\t%3s,%3s", xt.name(), xb.name());
-+ return writeInst(XX2Form(PPC_xscvspdpn, xt.encoding(), xb.encoding()));
-+}
-+
-+// POWER9 (ISA 3.0) scalar FP16 conversions. The UIM disambiguator is
-+// already in PPC_xscvdphp / PPC_xscvhpdp; XX2Form's bits16to20 default
-+// of 0 leaves it intact.
-+BufferOffset Assembler::as_xscvdphp(FloatRegister xt, FloatRegister xb) {
-+ spew("xscvdphp\t%3s,%3s", xt.name(), xb.name());
-+ return writeInst(XX2Form(PPC_xscvdphp, xt.encoding(), xb.encoding()));
-+}
-+
-+BufferOffset Assembler::as_xscvhpdp(FloatRegister xt, FloatRegister xb) {
-+ spew("xscvhpdp\t%3s,%3s", xt.name(), xb.name());
-+ return writeInst(XX2Form(PPC_xscvhpdp, xt.encoding(), xb.encoding()));
-+}
-+
-+BufferOffset Assembler::as_xsxexpdp(FloatRegister xt, FloatRegister xb) {
-+ spew("xsxexpdp\t%3s,%3s", xt.name(), xb.name());
-+ return writeInst(XX2Form(PPC_xsxexpdp, xt.encoding(), xb.encoding()));
-+}
-+
-+// POWER9 (ISA 3.0) FP16 load/store, X-form indexed. lxsihzx loads
-+// 16 bits into VSR dword 0 word 1's low halfword (zeroing the rest);
-+// stxsihx stores from there. The XT[5]/XS[5] bit travels via the
-+// X-form's TX/SX bit at instruction bit 0.
-+BufferOffset Assembler::as_lxsihzx(FloatRegister xt, Register ra, Register rb) {
-+ spew("lxsihzx\t%3s,%3s,%3s", xt.name(), ra.name(), rb.name());
-+ return writeInst(PPC_lxsihzx | (xt.encoding() & 31) << 21 |
-+ ra.code() << 16 | rb.code() << 11 |
-+ ((xt.encoding() >> 5) & 1));
-+}
-+
-+BufferOffset Assembler::as_stxsihx(FloatRegister xs, Register ra, Register rb) {
-+ spew("stxsihx\t%3s,%3s,%3s", xs.name(), ra.name(), rb.name());
-+ return writeInst(PPC_stxsihx | (xs.encoding() & 31) << 21 |
-+ ra.code() << 16 | rb.code() << 11 |
-+ ((xs.encoding() >> 5) & 1));
-+}
-+
-+// XX3-form, FPR-space only (encoding 0..31 → VSR0..31, all AX/BX/TX = 0).
-+// Java/JavaScript-style scalar max/min — semantics verified to match
-+// ECMA-262 Math.max/Math.min including ±0 and NaN propagation. POWER9-only.
-+BufferOffset Assembler::as_xsmaxjdp(FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb) {
-+ spew("xsmaxjdp\t%3s,%3s,%3s", xt.name(), xa.name(), xb.name());
-+ return writeInst(XX3Form(PPC_xsmaxjdp, xt, xa, xb));
-+}
-+
-+BufferOffset Assembler::as_xsminjdp(FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb) {
-+ spew("xsminjdp\t%3s,%3s,%3s", xt.name(), xa.name(), xb.name());
-+ return writeInst(XX3Form(PPC_xsminjdp, xt, xa, xb));
-+}
-+
-+// --- VSX SIMD load/store ---
-+
-+// For VSX0-31 (FPR), the 6th register bit (TX/SX/BX) is 0.
-+// X-form: opcode | T << 21 | A << 16 | B << 11 | xo | TX
-+// lxvx/stxvx are POWER9 (ISA 3.0). lxvd2x/stxvd2x are POWER8 (ISA 2.07).
-+
-+BufferOffset Assembler::as_lxvx(FloatRegister xt, Register ra, Register rb) {
-+ spew("lxvx\t%3s,%3s,%3s", xt.name(), ra.name(), rb.name());
-+ return writeInst(XX1Form(PPC_lxvx, xt.encoding(), ra.code(), rb.code()));
-+}
-+
-+BufferOffset Assembler::as_stxvx(FloatRegister xs, Register ra, Register rb) {
-+ spew("stxvx\t%3s,%3s,%3s", xs.name(), ra.name(), rb.name());
-+ return writeInst(XX1Form(PPC_stxvx, xs.encoding(), ra.code(), rb.code()));
-+}
-+
-+BufferOffset Assembler::as_lxvd2x(FloatRegister xt, Register ra, Register rb) {
-+ spew("lxvd2x\t%3s,%3s,%3s", xt.name(), ra.name(), rb.name());
-+ return writeInst(XX1Form(PPC_lxvd2x, xt.encoding(), ra.code(), rb.code()));
-+}
-+
-+BufferOffset Assembler::as_stxvd2x(FloatRegister xs, Register ra, Register rb) {
-+ spew("stxvd2x\t%3s,%3s,%3s", xs.name(), ra.name(), rb.name());
-+ return writeInst(XX1Form(PPC_stxvd2x, xs.encoding(), ra.code(), rb.code()));
-+}
-+
-+// VMX register load/store. See PPC_lvx/PPC_stvx in Assembler-ppc64.h for
-+// the encoding rationale.
-+BufferOffset Assembler::as_lvx(uint8_t vrt, Register ra, Register rb) {
-+ MOZ_ASSERT(vrt < 32);
-+ spew("lvx\tvr%d,%3s,%3s", vrt, ra.name(), rb.name());
-+ return writeInst(PPC_lvx | uint32_t(vrt) << 21 | ra.code() << 16 |
-+ rb.code() << 11);
-+}
-+
-+BufferOffset Assembler::as_stvx(uint8_t vrs, Register ra, Register rb) {
-+ MOZ_ASSERT(vrs < 32);
-+ spew("stvx\tvr%d,%3s,%3s", vrs, ra.name(), rb.name());
-+ return writeInst(PPC_stvx | uint32_t(vrs) << 21 | ra.code() << 16 |
-+ rb.code() << 11);
-+}
-+
-+// --- VSX SIMD register operations ---
-+
-+// XX3-form: opcode | T[0:4]<<21 | A[0:4]<<16 | B[0:4]<<11 | xo | AX | BX | TX
-+// where AX/BX/TX (bits 2/1/0) carry bit 5 of each 6-bit VSR index.
-+// Encoded by the XX3Form helper above for both VSR0-31 (Single/Double) and
-+// VSR32-63 (Simd128) operands.
-+BufferOffset Assembler::as_xxlor(FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb) {
-+ spew("xxlor\t%3s,%3s,%3s", xt.name(), xa.name(), xb.name());
-+ return writeInst(XX3Form(PPC_xxlor, xt, xa, xb));
-+}
-+
-+BufferOffset Assembler::as_xxland(FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb) {
-+ spew("xxland\t%3s,%3s,%3s", xt.name(), xa.name(), xb.name());
-+ return writeInst(XX3Form(PPC_xxland, xt, xa, xb));
-+}
-+
-+BufferOffset Assembler::as_xxlxor(FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb) {
-+ spew("xxlxor\t%3s,%3s,%3s", xt.name(), xa.name(), xb.name());
-+ return writeInst(XX3Form(PPC_xxlxor, xt, xa, xb));
-+}
-+
-+BufferOffset Assembler::as_xxlnor(FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb) {
-+ spew("xxlnor\t%3s,%3s,%3s", xt.name(), xa.name(), xb.name());
-+ return writeInst(XX3Form(PPC_xxlnor, xt, xa, xb));
-+}
-+
-+BufferOffset Assembler::as_xxlandc(FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb) {
-+ spew("xxlandc\t%3s,%3s,%3s", xt.name(), xa.name(), xb.name());
-+ return writeInst(XX3Form(PPC_xxlandc, xt, xa, xb));
-+}
-+
-+BufferOffset Assembler::as_xxsel(FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb, FloatRegister xc) {
-+ spew("xxsel\t%3s,%3s,%3s,%3s", xt.name(), xa.name(), xb.name(), xc.name());
-+ return writeInst(XX4Form(PPC_xxsel, xt.encoding(), xa.encoding(),
-+ xb.encoding(), xc.encoding()));
-+}
-+
-+BufferOffset Assembler::as_xxpermdi(FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb, uint8_t dm) {
-+ MOZ_ASSERT(dm < 4);
-+ spew("xxpermdi\t%3s,%3s,%3s,%d", xt.name(), xa.name(), xb.name(), dm);
-+ return writeInst(XX3Form(PPC_xxpermdi | (uint32_t(dm) << 8), xt, xa, xb));
-+}
-+
-+// POWER9 (ISA 3.0). XX1-form with two GPR sources.
-+BufferOffset Assembler::as_mtvsrdd(FloatRegister xt, Register ra, Register rb) {
-+ spew("mtvsrdd\t%3s,%3s,%3s", xt.name(), ra.name(), rb.name());
-+ return writeInst(XX1Form(PPC_mtvsrdd, xt.encoding(), ra.code(), rb.code()));
-+}
-+
-+// POWER9 (ISA 3.0). XX1-form: move lower doubleword of VSR to GPR.
-+BufferOffset Assembler::as_mfvsrld(Register rt, FloatRegister xs) {
-+ spew("mfvsrld\t%3s,%3s", rt.name(), xs.name());
-+ return writeInst(XX1FormMfvsr(PPC_mfvsrld, rt.code(), xs.encoding()));
-+}
-+
-+// --- XX2-form VSX instructions ---
-+
-+// XX2-form: opcode | T<<21 | UIM<<16_area | B<<11_area | XO<<2 | BX | TX
-+// For VSR0-31, BX=TX=0.
-+
-+BufferOffset Assembler::as_xxspltw(FloatRegister xt, FloatRegister xb,
-+ uint8_t uim) {
-+ MOZ_ASSERT(uim < 4);
-+ spew("xxspltw\t%3s,%3s,%d", xt.name(), xb.name(), uim);
-+ return writeInst(XX2Form(PPC_xxspltw, xt.encoding(), xb.encoding(), uim));
-+}
-+
-+BufferOffset Assembler::as_xxinsertw(FloatRegister xt, FloatRegister xb,
-+ uint8_t uim) {
-+ MOZ_ASSERT(uim <= 12 && (uim & 3) == 0);
-+ spew("xxinsertw\t%3s,%3s,%d", xt.name(), xb.name(), uim);
-+ return writeInst(XX2Form(PPC_xxinsertw, xt.encoding(), xb.encoding(), uim));
-+}
-+
-+BufferOffset Assembler::as_xxextractuw(FloatRegister xt, FloatRegister xb,
-+ uint8_t uim) {
-+ MOZ_ASSERT(uim <= 12 && (uim & 3) == 0);
-+ spew("xxextractuw\t%3s,%3s,%d", xt.name(), xb.name(), uim);
-+ return writeInst(XX2Form(PPC_xxextractuw, xt.encoding(), xb.encoding(), uim));
-+}
-+
-+// POWER9 (ISA 3.0). XX1-form-ish: T(5) + UIM8(8) + XO + TX. UIM8 occupies
-+// bits 18..11 (a non-standard slot that XX1Form doesn't fit), so encode
-+// inline. TX bit at instruction bit 0 selects the upper half of VSR
-+// space when xt.encoding() is in 32-63 (Simd128).
-+BufferOffset Assembler::as_xxspltib(FloatRegister xt, uint8_t imm8) {
-+ spew("xxspltib\t%3s,%u", xt.name(), imm8);
-+ uint32_t enc = uint32_t(xt.encoding());
-+ return writeInst(PPC_xxspltib | (enc & 31) << 21 | (uint32_t)imm8 << 11 |
-+ ((enc >> 5) & 1));
-+}
-+
-+// --- VMX instructions ---
-+
-+// VX-form: (4<<26) | VRT<<21 | UIMM<<16 | VRB<<11 | XO
-+// VRT/VRB are 5-bit raw VR numbers (0-31). Simd128 FloatRegister.encoding()
-+// returns 32-63; masking with & 31 maps it back to the VR offset 0-31.
-+BufferOffset Assembler::as_vspltb(FloatRegister vrt, FloatRegister vrb,
-+ uint8_t uim) {
-+ MOZ_ASSERT(uim < 16);
-+ spew("vspltb\t%3s,%3s,%d", vrt.name(), vrb.name(), uim);
-+ return writeInst(PPC_vspltb | (vrt.encoding() & 31) << 21 |
-+ (uint32_t)uim << 16 | (vrb.encoding() & 31) << 11);
-+}
-+
-+BufferOffset Assembler::as_vsplth(FloatRegister vrt, FloatRegister vrb,
-+ uint8_t uim) {
-+ MOZ_ASSERT(uim < 8);
-+ spew("vsplth\t%3s,%3s,%d", vrt.name(), vrb.name(), uim);
-+ return writeInst(PPC_vsplth | (vrt.encoding() & 31) << 21 |
-+ (uint32_t)uim << 16 | (vrb.encoding() & 31) << 11);
-+}
-+
-+// VA-form: (4<<26) | VRT<<21 | VRA<<16 | VRB<<11 | SHB<<6 | XO(6-bit)
-+BufferOffset Assembler::as_vsldoi(FloatRegister vrt, FloatRegister vra,
-+ FloatRegister vrb, uint8_t shb) {
-+ MOZ_ASSERT(shb < 16);
-+ spew("vsldoi\t%3s,%3s,%3s,%d", vrt.name(), vra.name(), vrb.name(), shb);
-+ return writeInst(PPC_vsldoi | (vrt.encoding() & 31) << 21 |
-+ (vra.encoding() & 31) << 16 | (vrb.encoding() & 31) << 11 |
-+ (uint32_t)shb << 6);
-+}
-+
-+// --- VMX integer arithmetic (VR registers only) ---
-+
-+// VX-form: (4<<26) | VRT<<21 | VRA<<16 | VRB<<11 | XO
-+// The macro takes raw VR numbers (0-31).
-+#define DEF_VMX_VVV(op) \
-+ BufferOffset Assembler::as_##op(uint8_t vrt, uint8_t vra, uint8_t vrb) { \
-+ MOZ_ASSERT(vrt < 32 && vra < 32 && vrb < 32); \
-+ spew(#op "\tvr%d,vr%d,vr%d", vrt, vra, vrb); \
-+ return writeInst(PPC_##op | vrt << 21 | vra << 16 | vrb << 11); \
-+ }
-+
-+DEF_VMX_VVV(vaddubm)
-+DEF_VMX_VVV(vadduhm)
-+DEF_VMX_VVV(vadduwm)
-+DEF_VMX_VVV(vaddudm)
-+DEF_VMX_VVV(vsububm)
-+DEF_VMX_VVV(vsubuhm)
-+DEF_VMX_VVV(vsubuwm)
-+DEF_VMX_VVV(vsubudm)
-+DEF_VMX_VVV(vaddsbs)
-+DEF_VMX_VVV(vaddshs)
-+DEF_VMX_VVV(vaddubs)
-+DEF_VMX_VVV(vadduhs)
-+DEF_VMX_VVV(vsubsbs)
-+DEF_VMX_VVV(vsubshs)
-+DEF_VMX_VVV(vsububs)
-+DEF_VMX_VVV(vsubuhs)
-+DEF_VMX_VVV(vminsb)
-+DEF_VMX_VVV(vminsh)
-+DEF_VMX_VVV(vminsw)
-+DEF_VMX_VVV(vmaxsb)
-+DEF_VMX_VVV(vmaxsh)
-+DEF_VMX_VVV(vmaxsw)
-+DEF_VMX_VVV(vmaxsd)
-+DEF_VMX_VVV(vminub)
-+DEF_VMX_VVV(vminuh)
-+DEF_VMX_VVV(vminuw)
-+DEF_VMX_VVV(vmaxub)
-+DEF_VMX_VVV(vmaxuh)
-+DEF_VMX_VVV(vmaxuw)
-+DEF_VMX_VVV(vavgub)
-+DEF_VMX_VVV(vavguh)
-+DEF_VMX_VVV(vmuluwm)
-+DEF_VMX_VVV(vmulld)
-+
-+DEF_VMX_VVV(vslb)
-+DEF_VMX_VVV(vslh)
-+DEF_VMX_VVV(vslw)
-+DEF_VMX_VVV(vsld)
-+DEF_VMX_VVV(vsrb)
-+DEF_VMX_VVV(vsrh)
-+DEF_VMX_VVV(vsrw)
-+DEF_VMX_VVV(vsrd)
-+DEF_VMX_VVV(vsrab)
-+DEF_VMX_VVV(vsrah)
-+DEF_VMX_VVV(vsraw)
-+DEF_VMX_VVV(vsrad)
-+DEF_VMX_VVV(vslo)
-+DEF_VMX_VVV(vsro)
-+DEF_VMX_VVV(vcmpequb)
-+DEF_VMX_VVV(vcmpequh)
-+DEF_VMX_VVV(vcmpequw)
-+DEF_VMX_VVV(vcmpequd)
-+DEF_VMX_VVV(vcmpgtsb)
-+DEF_VMX_VVV(vcmpgtsh)
-+DEF_VMX_VVV(vcmpgtsw)
-+DEF_VMX_VVV(vcmpgtsd)
-+DEF_VMX_VVV(vcmpgtub)
-+DEF_VMX_VVV(vcmpgtuh)
-+DEF_VMX_VVV(vcmpgtuw)
-+DEF_VMX_VVV(vcmpgtud)
-+// POWER9 (ISA 3.0). NotEqual compare; saves the xxlnor that vcmpequX needs.
-+DEF_VMX_VVV(vcmpneb)
-+DEF_VMX_VVV(vcmpneh)
-+DEF_VMX_VVV(vcmpnew)
-+
-+// POWER8+ (ISA 2.07). vbpermq RT,RA,RB: bit-permute quadword.
-+DEF_VMX_VVV(vbpermq)
-+
-+#undef DEF_VMX_VVV
-+
-+// VC-form record forms: same as VX-form above with Rc bit (bit 10 LSB) set.
-+// vcmpXXX. sets CR6: LT = all-true, EQ = none-true.
-+#define DEF_VMX_VVV_RC(op) \
-+ BufferOffset Assembler::as_##op##_rc(uint8_t vrt, uint8_t vra, \
-+ uint8_t vrb) { \
-+ MOZ_ASSERT(vrt < 32 && vra < 32 && vrb < 32); \
-+ spew(#op ".\tvr%d,vr%d,vr%d", vrt, vra, vrb); \
-+ return writeInst(PPC_##op | vrt << 21 | vra << 16 | vrb << 11 | 0x400); \
-+ }
-+
-+DEF_VMX_VVV_RC(vcmpequb)
-+DEF_VMX_VVV_RC(vcmpequh)
-+DEF_VMX_VVV_RC(vcmpequw)
-+DEF_VMX_VVV_RC(vcmpequd)
-+
-+#undef DEF_VMX_VVV_RC
-+
-+// VSX float compare (XX3-form).
-+#define DEF_VSX_CMP(op) \
-+ BufferOffset Assembler::as_##op(FloatRegister xt, FloatRegister xa, \
-+ FloatRegister xb) { \
-+ spew(#op "\t%3s,%3s,%3s", xt.name(), xa.name(), xb.name()); \
-+ return writeInst(XX3Form(PPC_##op, xt, xa, xb)); \
-+ }
-+
-+DEF_VSX_CMP(xvcmpeqsp)
-+DEF_VSX_CMP(xvcmpgtsp)
-+DEF_VSX_CMP(xvcmpgesp)
-+DEF_VSX_CMP(xvcmpeqdp)
-+DEF_VSX_CMP(xvcmpgtdp)
-+DEF_VSX_CMP(xvcmpgedp)
-+
-+#undef DEF_VSX_CMP
-+
-+// VSX float arithmetic (XX3-form binary).
-+#define DEF_VSX_BIN(op) \
-+ BufferOffset Assembler::as_##op(FloatRegister xt, FloatRegister xa, \
-+ FloatRegister xb) { \
-+ spew(#op "\t%3s,%3s,%3s", xt.name(), xa.name(), xb.name()); \
-+ return writeInst(XX3Form(PPC_##op, xt, xa, xb)); \
-+ }
-+DEF_VSX_BIN(xvaddsp)
-+DEF_VSX_BIN(xvadddp) DEF_VSX_BIN(xvsubsp) DEF_VSX_BIN(xvsubdp) DEF_VSX_BIN(
-+ xvmulsp) DEF_VSX_BIN(xvmuldp) DEF_VSX_BIN(xvdivsp) DEF_VSX_BIN(xvdivdp)
-+ DEF_VSX_BIN(xvminsp) DEF_VSX_BIN(xvmindp) DEF_VSX_BIN(xvmaxsp) DEF_VSX_BIN(
-+ xvmaxdp) DEF_VSX_BIN(xvmaddasp) DEF_VSX_BIN(xvmaddadp)
-+ DEF_VSX_BIN(xvnmsubasp) DEF_VSX_BIN(xvnmsubadp)
-+#undef DEF_VSX_BIN
-+
-+// VSX unary (XX2-form): op | xt<<21 | xb<<11 | XO<<2
-+// XX2-form unary VSX op: T + B, no UIM. Uses XX2Form helper for TX/BX bits.
-+#define DEF_VSX_UN(op) \
-+ BufferOffset Assembler::as_##op(FloatRegister xt, FloatRegister xb) { \
-+ spew(#op "\t%3s,%3s", xt.name(), xb.name()); \
-+ return writeInst(XX2Form(PPC_##op, xt.encoding(), xb.encoding())); \
-+ }
-+ DEF_VSX_UN(xvabssp) DEF_VSX_UN(xvabsdp) DEF_VSX_UN(
-+ xvnegsp) DEF_VSX_UN(xvnegdp) DEF_VSX_UN(xvsqrtsp)
-+ DEF_VSX_UN(xvsqrtdp) DEF_VSX_UN(xvrspip) DEF_VSX_UN(
-+ xvrdpip) DEF_VSX_UN(xvrspim) DEF_VSX_UN(xvrdpim)
-+ DEF_VSX_UN(xvrspiz) DEF_VSX_UN(xvrdpiz) DEF_VSX_UN(
-+ xvrspic) DEF_VSX_UN(xvrdpic) DEF_VSX_UN(xvcvsxwsp)
-+ DEF_VSX_UN(xvcvuxwsp) DEF_VSX_UN(xvcvsxwdp) DEF_VSX_UN(
-+ xvcvuxwdp) DEF_VSX_UN(xvcvspsxws)
-+ DEF_VSX_UN(xvcvspuxws) DEF_VSX_UN(xvcvdpsxws)
-+ DEF_VSX_UN(xvcvdpuxws) DEF_VSX_UN(xvcvdpsp)
-+ DEF_VSX_UN(xvcvspdp)
-+#undef DEF_VSX_UN
-+
-+// VMX unary VX-form: (4<<26) | VRT<<21 | 0<<16 | VRB<<11 | XO
-+#define DEF_VMX_UNARY(op) \
-+ BufferOffset Assembler::as_##op(uint8_t vrt, uint8_t vrb) { \
-+ MOZ_ASSERT(vrt < 32 && vrb < 32); \
-+ spew(#op "\tvr%d,vr%d", vrt, vrb); \
-+ return writeInst(PPC_##op | vrt << 21 | vrb << 11); \
-+ }
-+ DEF_VMX_UNARY(vupkhsb) DEF_VMX_UNARY(
-+ vupklsb) DEF_VMX_UNARY(vupkhsh)
-+ DEF_VMX_UNARY(vupklsh)
-+ DEF_VMX_UNARY(vupkhsw)
-+ DEF_VMX_UNARY(vupklsw)
-+ // POWER9 per-lane integer negate. The VRA field holds the subop code
-+ // (6 for vnegw, 7 for vnegd) which is already baked into PPC_vneg{w,d}.
-+ DEF_VMX_UNARY(vnegw) DEF_VMX_UNARY(vnegd) DEF_VMX_UNARY(vpopcntb)
-+#undef DEF_VMX_UNARY
-+
-+ // POWER9 addpcis (DX-form). Computes rT = (CIA + 4) + (D << 16).
-+ // D is a 16-bit signed immediate, split across three instruction fields:
-+ // d0 = bits 16..25 (10 bits, D[15:6])
-+ // d1 = bits 11..15 (5 bits, D[5:1])
-+ // d2 = bit 31 (1 bit, D[0])
-+ // Primary opcode 19, DX subop 2.
-+ BufferOffset Assembler::as_addpcis(Register rt, int16_t d) {
-+ spew("addpcis\t%s,%d", rt.name(), (int)d);
-+ uint32_t D = uint16_t(d);
-+ uint32_t inst = (19u << 26) | (uint32_t(rt.code()) << 21) |
-+ ((D >> 1) & 0x1F) << 16 | ((D >> 6) & 0x3FF) << 6 |
-+ (2u << 1) | (D & 1u);
-+ return writeInst(inst);
-+}
-+
-+// -----------------------------------------------------------------------------
-+// Power ISA v3.1 (POWER10) prefixed instructions.
-+//
-+// Layout:
-+//
-+// Prefix word (BE bit numbering from the manual; LE bits in parentheses):
-+// [0..5] primary opcode = 1 (LE 31..26)
-+// [6..7] Type: 00 = 8LS, 10 = MLS (LE 25..24)
-+// [8..10] reserved = 0 (LE 23..21)
-+// [11] R: 1 = PC-relative (RA must be r0) (LE 20)
-+// [12..13] reserved = 0 (LE 19..18)
-+// [14..31] d0: high 18 bits of 34-bit signed immediate (LE 17..0)
-+//
-+// Suffix (paddi/pld, GPR target):
-+// [0..5] suffix opcode (paddi=14, pld=57) (LE 31..26)
-+// [6..10] RT (LE 25..21)
-+// [11..15] RA (LE 20..16)
-+// [16..31] d1: low 16 bits of immediate (LE 15..0)
-+//
-+// Suffix (plxv, VSR target — has the TX bit at suffix bit 5/LE bit 26):
-+// [0..4] plxv 5-bit opcode = 11001 (=25) (LE 31..27)
-+// [5] TX (high bit of 6-bit XT) (LE 26)
-+// [6..10] T (low 5 bits of XT) (LE 25..21)
-+// [11..15] RA (LE 20..16)
-+// [16..31] d1 (LE 15..0)
-+//
-+// The prefix and suffix of a prefixed instruction must lie in the same
-+// 64-byte aligned block at **runtime**. The JitCode allocator only
-+// guarantees 16-byte alignment, so the buffer-relative offset and the
-+// runtime address can differ by 0/16/32/48 mod 64. A buffer-only check
-+// `(currentOffset() & 63) == 60` is correct when the allocator base is
-+// 64-aligned but misses three of the four 16-aligned base classes — pad
-+// whenever `(currentOffset() & 15) == 12`, which catches all four. The
-+// enterNoPool guard prevents the constant-pool flusher from inserting
-+// bodies between the (optional) nop, prefix, and suffix.
-+
-+static uint32_t EncodePower10Prefix(uint32_t type, bool R, uint32_t d0) {
-+ MOZ_ASSERT(type == 0 || type == 2); // 8LS=0, MLS=2
-+ MOZ_ASSERT(d0 < (1u << 18));
-+ return (1u << 26) | (type << 24) | (uint32_t(R ? 1 : 0) << 20) |
-+ (d0 & 0x3FFFFu);
-+}
-+
-+static void SplitImm34(int64_t imm34, uint32_t* d0, uint32_t* d1) {
-+ MOZ_ASSERT(imm34 >= -(int64_t(1) << 33));
-+ MOZ_ASSERT(imm34 < (int64_t(1) << 33));
-+ uint64_t u = uint64_t(imm34) & 0x3FFFFFFFFull; // low 34 bits
-+ *d0 = uint32_t(u >> 16) & 0x3FFFFu; // 18 bits
-+ *d1 = uint32_t(u) & 0xFFFFu; // 16 bits
-+}
-+
-+void Assembler::ensurePrefixedAlignment() {
-+ if ((currentOffset() & 15) == 12) {
-+ as_nop();
-+ }
-+}
-+
-+// paddi RT, RA, SI, R (MLS, suffix opcode 14 = addi)
-+// R=0: RT = (RA==0 ? 0 : RA) + sign_extend(SI, 34)
-+// R=1: RT = CIA(prefix) + sign_extend(SI, 34) (RA must be r0)
-+BufferOffset Assembler::as_paddi(Register rt, Register ra, int64_t imm34,
-+ bool R) {
-+ MOZ_ASSERT_IF(R, ra == r0);
-+ spew("paddi\t%s,%s,%lld,%d", rt.name(), ra.name(), (long long)imm34,
-+ R ? 1 : 0);
-+ uint32_t d0, d1;
-+ SplitImm34(imm34, &d0, &d1);
-+ uint32_t prefix = EncodePower10Prefix(/*type=MLS*/ 2, R, d0);
-+ uint32_t suffix = (14u << 26) | (uint32_t(rt.code()) << 21) |
-+ (uint32_t(ra.code()) << 16) | d1;
-+ // Reservation = nop (worst case) + prefix + suffix.
-+ m_buffer.enterNoPool(3);
-+ ensurePrefixedAlignment();
-+ BufferOffset bo = writeInst(prefix);
-+ writeInst(suffix);
-+ m_buffer.leaveNoPool();
-+ return bo;
-+}
-+
-+// pld RT, D(RA), R (8LS, suffix opcode 57)
-+BufferOffset Assembler::as_pld(Register rt, Register ra, int64_t imm34,
-+ bool R) {
-+ MOZ_ASSERT_IF(R, ra == r0);
-+ spew("pld\t%s,%lld(%s),%d", rt.name(), (long long)imm34, ra.name(),
-+ R ? 1 : 0);
-+ uint32_t d0, d1;
-+ SplitImm34(imm34, &d0, &d1);
-+ uint32_t prefix = EncodePower10Prefix(/*type=8LS*/ 0, R, d0);
-+ uint32_t suffix = (57u << 26) | (uint32_t(rt.code()) << 21) |
-+ (uint32_t(ra.code()) << 16) | d1;
-+ m_buffer.enterNoPool(3);
-+ ensurePrefixedAlignment();
-+ BufferOffset bo = writeInst(prefix);
-+ writeInst(suffix);
-+ m_buffer.leaveNoPool();
-+ return bo;
-+}
-+
-+// plxv XT, D(RA), R (8LS, 5-bit suffix opcode 25, TX in suffix bit 26)
-+// XT is 6-bit: TX (high) || T (low 5) — matches lxvx convention.
-+BufferOffset Assembler::as_plxv(uint8_t xt, Register ra, int64_t imm34,
-+ bool R) {
-+ MOZ_ASSERT(xt < 64);
-+ MOZ_ASSERT_IF(R, ra == r0);
-+ spew("plxv\tvs%u,%lld(%s),%d", xt, (long long)imm34, ra.name(),
-+ R ? 1 : 0);
-+ uint32_t d0, d1;
-+ SplitImm34(imm34, &d0, &d1);
-+ uint32_t prefix = EncodePower10Prefix(/*type=8LS*/ 0, R, d0);
-+ uint32_t T = xt & 0x1Fu;
-+ uint32_t TX = (xt >> 5) & 1u;
-+ uint32_t suffix = (25u << 27) | (TX << 26) | (T << 21) |
-+ (uint32_t(ra.code()) << 16) | d1;
-+ m_buffer.enterNoPool(3);
-+ ensurePrefixedAlignment();
-+ BufferOffset bo = writeInst(prefix);
-+ writeInst(suffix);
-+ m_buffer.leaveNoPool();
-+ return bo;
-+}
-+
-+// plfd FRT, D(RA), R (MLS, suffix opcode 50; D-form-like FPR load)
-+BufferOffset Assembler::as_plfd(FloatRegister frt, Register ra, int64_t imm34,
-+ bool R) {
-+ MOZ_ASSERT_IF(R, ra == r0);
-+ spew("plfd\tf%u,%lld(%s),%d", uint32_t(frt.encoding()),
-+ (long long)imm34, ra.name(), R ? 1 : 0);
-+ uint32_t d0, d1;
-+ SplitImm34(imm34, &d0, &d1);
-+ uint32_t prefix = EncodePower10Prefix(/*type=MLS*/ 2, R, d0);
-+ uint32_t suffix = (50u << 26) | (uint32_t(frt.encoding()) << 21) |
-+ (uint32_t(ra.code()) << 16) | d1;
-+ m_buffer.enterNoPool(3);
-+ ensurePrefixedAlignment();
-+ BufferOffset bo = writeInst(prefix);
-+ writeInst(suffix);
-+ m_buffer.leaveNoPool();
-+ return bo;
-+}
-+
-+// plfs FRT, D(RA), R (MLS, suffix opcode 48; widens single → double in FPR)
-+BufferOffset Assembler::as_plfs(FloatRegister frt, Register ra, int64_t imm34,
-+ bool R) {
-+ MOZ_ASSERT_IF(R, ra == r0);
-+ spew("plfs\tf%u,%lld(%s),%d", uint32_t(frt.encoding()),
-+ (long long)imm34, ra.name(), R ? 1 : 0);
-+ uint32_t d0, d1;
-+ SplitImm34(imm34, &d0, &d1);
-+ uint32_t prefix = EncodePower10Prefix(/*type=MLS*/ 2, R, d0);
-+ uint32_t suffix = (48u << 26) | (uint32_t(frt.encoding()) << 21) |
-+ (uint32_t(ra.code()) << 16) | d1;
-+ m_buffer.enterNoPool(3);
-+ ensurePrefixedAlignment();
-+ BufferOffset bo = writeInst(prefix);
-+ writeInst(suffix);
-+ m_buffer.leaveNoPool();
-+ return bo;
-+}
-+
-+// pstd RS, D(RA), R (8LS, suffix opcode 61 = std D-form)
-+BufferOffset Assembler::as_pstd(Register rs, Register ra, int64_t imm34,
-+ bool R) {
-+ MOZ_ASSERT_IF(R, ra == r0);
-+ spew("pstd\t%s,%lld(%s),%d", rs.name(), (long long)imm34, ra.name(),
-+ R ? 1 : 0);
-+ uint32_t d0, d1;
-+ SplitImm34(imm34, &d0, &d1);
-+ uint32_t prefix = EncodePower10Prefix(/*type=8LS*/ 0, R, d0);
-+ uint32_t suffix = (61u << 26) | (uint32_t(rs.code()) << 21) |
-+ (uint32_t(ra.code()) << 16) | d1;
-+ m_buffer.enterNoPool(3);
-+ ensurePrefixedAlignment();
-+ BufferOffset bo = writeInst(prefix);
-+ writeInst(suffix);
-+ m_buffer.leaveNoPool();
-+ return bo;
-+}
-+
-+// pstxv XS, D(RA), R (8LS, 5-bit suffix opcode 27, SX in suffix bit 26)
-+// XS is 6-bit: SX (high) || S (low 5) — matches stxvx convention.
-+BufferOffset Assembler::as_pstxv(uint8_t xs, Register ra, int64_t imm34,
-+ bool R) {
-+ MOZ_ASSERT(xs < 64);
-+ MOZ_ASSERT_IF(R, ra == r0);
-+ spew("pstxv\tvs%u,%lld(%s),%d", xs, (long long)imm34, ra.name(),
-+ R ? 1 : 0);
-+ uint32_t d0, d1;
-+ SplitImm34(imm34, &d0, &d1);
-+ uint32_t prefix = EncodePower10Prefix(/*type=8LS*/ 0, R, d0);
-+ uint32_t sx = (xs >> 5) & 1;
-+ uint32_t s = xs & 0x1F;
-+ uint32_t suffix = (27u << 27) | (sx << 26) | (s << 21) |
-+ (uint32_t(ra.code()) << 16) | d1;
-+ m_buffer.enterNoPool(3);
-+ ensurePrefixedAlignment();
-+ BufferOffset bo = writeInst(prefix);
-+ writeInst(suffix);
-+ m_buffer.leaveNoPool();
-+ return bo;
-+}
-+
-+// pstfd FRS, D(RA), R (MLS, suffix opcode 54 = stfd)
-+BufferOffset Assembler::as_pstfd(FloatRegister frs, Register ra, int64_t imm34,
-+ bool R) {
-+ MOZ_ASSERT_IF(R, ra == r0);
-+ spew("pstfd\tf%u,%lld(%s),%d", uint32_t(frs.encoding()),
-+ (long long)imm34, ra.name(), R ? 1 : 0);
-+ uint32_t d0, d1;
-+ SplitImm34(imm34, &d0, &d1);
-+ uint32_t prefix = EncodePower10Prefix(/*type=MLS*/ 2, R, d0);
-+ uint32_t suffix = (54u << 26) | (uint32_t(frs.encoding()) << 21) |
-+ (uint32_t(ra.code()) << 16) | d1;
-+ m_buffer.enterNoPool(3);
-+ ensurePrefixedAlignment();
-+ BufferOffset bo = writeInst(prefix);
-+ writeInst(suffix);
-+ m_buffer.leaveNoPool();
-+ return bo;
-+}
-+
-+// pstfs FRS, D(RA), R (MLS, suffix opcode 52 = stfs)
-+BufferOffset Assembler::as_pstfs(FloatRegister frs, Register ra, int64_t imm34,
-+ bool R) {
-+ MOZ_ASSERT_IF(R, ra == r0);
-+ spew("pstfs\tf%u,%lld(%s),%d", uint32_t(frs.encoding()),
-+ (long long)imm34, ra.name(), R ? 1 : 0);
-+ uint32_t d0, d1;
-+ SplitImm34(imm34, &d0, &d1);
-+ uint32_t prefix = EncodePower10Prefix(/*type=MLS*/ 2, R, d0);
-+ uint32_t suffix = (52u << 26) | (uint32_t(frs.encoding()) << 21) |
-+ (uint32_t(ra.code()) << 16) | d1;
-+ m_buffer.enterNoPool(3);
-+ ensurePrefixedAlignment();
-+ BufferOffset bo = writeInst(prefix);
-+ writeInst(suffix);
-+ m_buffer.leaveNoPool();
-+ return bo;
-+}
-+
-+// POWER10 (ISA 3.1) Vector Extract Mask. RT (GPR) gets the wasm-spec
-+// bitmask (one bit per lane MSB) directly in low 16/8/4/2 bits. UIM
-+// is baked into PPC_vextract{b,h,w,d}m (8/9/10/11). Caller must have
-+// verified HasPOWER10().
-+#define DEF_VEXTRACT_M(op) \
-+ BufferOffset Assembler::as_##op(Register rt, FloatRegister vrb) { \
-+ spew(#op "\t%s,vr%u", rt.name(), uint32_t(vrb.encoding() & 31)); \
-+ return writeInst(PPC_##op | (uint32_t(rt.code()) << 21) | \
-+ ((uint32_t(vrb.encoding()) & 31) << 11)); \
-+ }
-+DEF_VEXTRACT_M(vextractbm)
-+DEF_VEXTRACT_M(vextracthm)
-+DEF_VEXTRACT_M(vextractwm)
-+DEF_VEXTRACT_M(vextractdm)
-+#undef DEF_VEXTRACT_M
-+
-+// POWER10 (ISA 3.1) Vector Insert Word/Doubleword from GPR. VX-form:
-+// VRT at bits 21..25, UIM at bits 16..20, RB at bits 11..15.
-+#define DEF_VINS(op, max_uim) \
-+ BufferOffset Assembler::as_##op(FloatRegister vrt, Register rb, \
-+ uint8_t uim) { \
-+ MOZ_ASSERT(uim <= (max_uim)); \
-+ spew(#op "\tvr%u,%s,%u", uint32_t(vrt.encoding() & 31), rb.name(), \
-+ uint32_t(uim)); \
-+ return writeInst(PPC_##op | \
-+ ((uint32_t(vrt.encoding()) & 31) << 21) | \
-+ (uint32_t(uim) << 16) | \
-+ (uint32_t(rb.code()) << 11)); \
-+ }
-+DEF_VINS(vinsw, 12)
-+DEF_VINS(vinsd, 8)
-+#undef DEF_VINS
-+
-+// POWER10 (ISA 3.1) Vector Insert byte/halfword from GPR with
-+// register-supplied byte position. VX-form: VRT at bits 21..25,
-+// RA at bits 16..20, RB at bits 11..15. "rx" is right-indexed
-+// (LE-natural — index 0 = LSB byte).
-+#define DEF_VINS_RX(op) \
-+ BufferOffset Assembler::as_##op(FloatRegister vrt, Register ra, \
-+ Register rb) { \
-+ spew(#op "\tvr%u,%s,%s", uint32_t(vrt.encoding() & 31), ra.name(), \
-+ rb.name()); \
-+ return writeInst(PPC_##op | \
-+ ((uint32_t(vrt.encoding()) & 31) << 21) | \
-+ (uint32_t(ra.code()) << 16) | \
-+ (uint32_t(rb.code()) << 11)); \
-+ }
-+DEF_VINS_RX(vinsbrx)
-+DEF_VINS_RX(vinshrx)
-+#undef DEF_VINS_RX
-+
-+// POWER9 (ISA 3.0) V-form 3-operand instructions with VRT, UIM, VRB at
-+// bits 21..25, 16..20, 11..15 respectively (vinsert{b,h}, vextract{ub,uh}).
-+// Simd128 lives in VSR32-63 (= VR0-31), so we mask VRT and VRB to the
-+// 5-bit VR field via `encoding() & 31`.
-+#define DEF_VRT_UIM_VRB(op, max_uim, uim_step) \
-+ BufferOffset Assembler::as_##op(FloatRegister vrt, FloatRegister vrb, \
-+ uint8_t uim) { \
-+ MOZ_ASSERT(uim <= (max_uim)); \
-+ MOZ_ASSERT((uim) % (uim_step) == 0); \
-+ spew(#op "\tvr%u,vr%u,%u", uint32_t(vrt.encoding() & 31), \
-+ uint32_t(vrb.encoding() & 31), uint32_t(uim)); \
-+ return writeInst(PPC_##op | \
-+ ((uint32_t(vrt.encoding()) & 31) << 21) | \
-+ (uint32_t(uim) << 16) | \
-+ ((uint32_t(vrb.encoding()) & 31) << 11)); \
-+ }
-+DEF_VRT_UIM_VRB(vinsertb, 15, 1)
-+DEF_VRT_UIM_VRB(vinserth, 14, 2)
-+DEF_VRT_UIM_VRB(vextractub, 15, 1)
-+DEF_VRT_UIM_VRB(vextractuh, 14, 2)
-+#undef DEF_VRT_UIM_VRB
-+
-+// VMX binary VX-form pack/merge (re-use DEF_VMX_VVV pattern).
-+#define DEF_VMX_VVV(op) \
-+ BufferOffset Assembler::as_##op(uint8_t vrt, uint8_t vra, uint8_t vrb) { \
-+ MOZ_ASSERT(vrt < 32 && vra < 32 && vrb < 32); \
-+ spew(#op "\tvr%d,vr%d,vr%d", vrt, vra, vrb); \
-+ return writeInst(PPC_##op | vrt << 21 | vra << 16 | vrb << 11); \
-+ }
-+DEF_VMX_VVV(vpkshss)
-+DEF_VMX_VVV(vpkswss) DEF_VMX_VVV(vpkshus) DEF_VMX_VVV(vpkswus)
-+ DEF_VMX_VVV(vmrghb)
-+ DEF_VMX_VVV(vmrghh) DEF_VMX_VVV(vmrghw) DEF_VMX_VVV(vmrglb)
-+ DEF_VMX_VVV(vmrglh) DEF_VMX_VVV(vmrglw) DEF_VMX_VVV(vmulesb)
-+ DEF_VMX_VVV(vmulosb) DEF_VMX_VVV(vmuleub) DEF_VMX_VVV(vmuloub)
-+ DEF_VMX_VVV(vmulesh) DEF_VMX_VVV(vmulosh)
-+ DEF_VMX_VVV(vmuleuh) DEF_VMX_VVV(vmulouh)
-+ DEF_VMX_VVV(vmulesw) DEF_VMX_VVV(vmulosw)
-+ DEF_VMX_VVV(vmuleuw) DEF_VMX_VVV(vmulouw)
-+#undef DEF_VMX_VVV
-+
-+ // vperm VA-form: (4<<26) | VRT<<21 | VRA<<16 | VRB<<11 | VRC<<6 | XO
-+ BufferOffset Assembler::as_vperm(uint8_t vrt, uint8_t vra, uint8_t vrb,
-+ uint8_t vrc) {
-+ MOZ_ASSERT(vrt < 32 && vra < 32 && vrb < 32 && vrc < 32);
-+ spew("vperm\tvr%d,vr%d,vr%d,vr%d", vrt, vra, vrb, vrc);
-+ return writeInst(PPC_vperm | vrt << 21 | vra << 16 | vrb << 11 | vrc << 6);
-+}
-+
-+// VA-form ternary VMX: (4<<26) | VRT<<21 | VRA<<16 | VRB<<11 | VRC<<6 |
-+// XO(6-bit)
-+BufferOffset Assembler::as_vmladduhm(uint8_t vrt, uint8_t vra, uint8_t vrb,
-+ uint8_t vrc) {
-+ MOZ_ASSERT(vrt < 32 && vra < 32 && vrb < 32 && vrc < 32);
-+ spew("vmladduhm\tvr%d,vr%d,vr%d,vr%d", vrt, vra, vrb, vrc);
-+ return writeInst(PPC_vmladduhm | vrt << 21 | vra << 16 | vrb << 11 |
-+ vrc << 6);
-+}
-+
-+BufferOffset Assembler::as_vmhraddshs(uint8_t vrt, uint8_t vra, uint8_t vrb,
-+ uint8_t vrc) {
-+ MOZ_ASSERT(vrt < 32 && vra < 32 && vrb < 32 && vrc < 32);
-+ spew("vmhraddshs\tvr%d,vr%d,vr%d,vr%d", vrt, vra, vrb, vrc);
-+ return writeInst(PPC_vmhraddshs | vrt << 21 | vra << 16 | vrb << 11 |
-+ vrc << 6);
-+}
-+
-+BufferOffset Assembler::as_vmsumshm(uint8_t vrt, uint8_t vra, uint8_t vrb,
-+ uint8_t vrc) {
-+ MOZ_ASSERT(vrt < 32 && vra < 32 && vrb < 32 && vrc < 32);
-+ spew("vmsumshm\tvr%d,vr%d,vr%d,vr%d", vrt, vra, vrb, vrc);
-+ return writeInst(PPC_vmsumshm | vrt << 21 | vra << 16 | vrb << 11 |
-+ vrc << 6);
-+}
-+
-+BufferOffset Assembler::as_vmsumuhm(uint8_t vrt, uint8_t vra, uint8_t vrb,
-+ uint8_t vrc) {
-+ MOZ_ASSERT(vrt < 32 && vra < 32 && vrb < 32 && vrc < 32);
-+ spew("vmsumuhm\tvr%d,vr%d,vr%d,vr%d", vrt, vra, vrb, vrc);
-+ return writeInst(PPC_vmsumuhm | vrt << 21 | vra << 16 | vrb << 11 |
-+ vrc << 6);
-+}
-+
-+BufferOffset Assembler::as_vspltisb(uint8_t vrt, int8_t simm5) {
-+ MOZ_ASSERT(vrt < 32);
-+ MOZ_ASSERT(simm5 >= -16 && simm5 <= 15);
-+ spew("vspltisb\tvr%d,%d", vrt, simm5);
-+ return writeInst(PPC_vspltisb | uint32_t(vrt) << 21 |
-+ (uint32_t(simm5) & 0x1F) << 16);
-+}
-+
-+BufferOffset Assembler::as_vspltish(uint8_t vrt, int8_t simm5) {
-+ MOZ_ASSERT(vrt < 32);
-+ MOZ_ASSERT(simm5 >= -16 && simm5 <= 15);
-+ spew("vspltish\tvr%d,%d", vrt, simm5);
-+ return writeInst(PPC_vspltish | uint32_t(vrt) << 21 |
-+ (uint32_t(simm5) & 0x1F) << 16);
-+}
-+
-+BufferOffset Assembler::as_vspltisw(uint8_t vrt, int8_t simm5) {
-+ MOZ_ASSERT(vrt < 32);
-+ MOZ_ASSERT(simm5 >= -16 && simm5 <= 15);
-+ spew("vspltisw\tvr%d,%d", vrt, simm5);
-+ return writeInst(PPC_vspltisw | uint32_t(vrt) << 21 |
-+ (uint32_t(simm5) & 0x1F) << 16);
-+}
-+
-+// --- Convenience pseudo-instructions ---
-+
-+BufferOffset Assembler::xs_trap() {
-+ spew("trap @ %08x", currentOffset());
-+ return writeInst(PPC_trap);
-+}
-+
-+BufferOffset Assembler::xs_trap_tagged(TrapTag tag) {
-+ uint32_t tv = PPC_trap | ((uint8_t)tag << 16) | ((uint8_t)tag << 11);
-+ spew("trap @ %08x ; MARK %d %08x", currentOffset(), (uint8_t)tag, tv);
-+ return writeInst(tv);
-+}
-+
-+BufferOffset Assembler::xs_mr(Register rd, Register ra) {
-+ return as_or_(rd, ra, ra);
-+}
-+
-+BufferOffset Assembler::xs_mtctr(Register ra) {
-+ return as_mtspr((SPRegisterID)spr_ctr, ra);
-+}
-+
-+BufferOffset Assembler::xs_mtlr(Register ra) {
-+ return as_mtspr((SPRegisterID)spr_lr, ra);
-+}
-+
-+BufferOffset Assembler::xs_mflr(Register rd) {
-+ return as_mfspr(rd, (SPRegisterID)spr_lr);
-+}
-+
-+BufferOffset Assembler::xs_mtcr(Register rs) { return as_mtcrf(0xff, rs); }
-+
-+BufferOffset Assembler::xs_mfxer(Register ra) {
-+ return as_mfspr(ra, (SPRegisterID)spr_xer);
-+}
-+
-+BufferOffset Assembler::xs_mtxer(Register ra) {
-+ return as_mtspr((SPRegisterID)spr_xer, ra);
-+}
-+
-+BufferOffset Assembler::xs_li(Register rd, int16_t im) {
-+ return as_addi(rd, r0, im, true);
-+}
-+
-+BufferOffset Assembler::xs_lis(Register rd, int16_t im) {
-+ return as_addis(rd, r0, im, true);
-+}
-+
-+BufferOffset Assembler::x_subi(Register rd, Register ra, int16_t im) {
-+ return as_addi(rd, ra, -im);
-+}
-+
-+BufferOffset Assembler::x_not(Register rd, Register ra) {
-+ return as_nor(rd, ra, ra);
-+}
-+
-+BufferOffset Assembler::x_slwi(Register rd, Register rs, int n) {
-+ MOZ_ASSERT(n >= 0 && n < 32);
-+ return as_rlwinm(rd, rs, n, 0, 31 - n);
-+}
-+
-+BufferOffset Assembler::x_sldi(Register rd, Register rs, int n) {
-+ return as_rldicr(rd, rs, n, 63 - n);
-+}
-+
-+BufferOffset Assembler::x_srwi(Register rd, Register rs, int n) {
-+ MOZ_ASSERT(n >= 0 && n < 32);
-+ if (n == 0) {
-+ return as_rlwinm(rd, rs, 0, 0, 31);
-+ }
-+ return as_rlwinm(rd, rs, 32 - n, n, 31);
-+}
-+
-+BufferOffset Assembler::x_srdi(Register rd, Register rs, int n) {
-+ MOZ_ASSERT(n >= 0 && n < 64);
-+ if (n == 0) {
-+ return as_or_(rd, rs, rs);
-+ }
-+ return as_rldicl(rd, rs, 64 - n, n);
-+}
-+
-+BufferOffset Assembler::x_bit_value(Register rd, Register rs, unsigned bit) {
-+ return as_rlwinm(rd, rs, bit + 1, 31, 31);
-+}
-+
-+BufferOffset Assembler::x_insertbits0_15(Register rd, Register rs) {
-+ return as_rlwimi(rd, rs, 0, 16, 31);
-+}
-+
-+BufferOffset Assembler::x_sr_mulli(Register rd, Register ra, int16_t im) {
-+ as_sradi(rd, ra, 63);
-+ return as_mulli(rd, rd, im);
-+}
-+
-+void Assembler::as_break(uint32_t code) {
-+ spew("break\t%d", code);
-+ writeInst(PPC_trap);
-+}
-+
-+// ========================================================================
-+// Label binding, retarget, and code label processing.
-+// ========================================================================
-+
-+// Forward-declared shape helpers; full definitions and the layout
-+// commentary live with the WriteLoad64Instructions section below.
-+static bool IsAddpcisLoad64Stanza(uint32_t enc0);
-+static uint8_t Load64StanzaDestReg(Instruction* inst0);
-+
-+InstImm Assembler::invertBranch(InstImm branch, BOffImm16 skipOffset) {
-+ // Flip the BO condition-true/condition-false bit (bit 24).
-+ uint32_t data = branch.encode();
-+ data = (data ^ 0x01000000) & 0xFFFF0003;
-+ data |= skipOffset.encode();
-+ branch.setData(data);
-+ return branch;
-+}
-+
-+void Assembler::bind(InstImm* inst, uintptr_t branch, uintptr_t target) {
-+ intptr_t offset = target - branch;
-+ Instruction* i0 = (Instruction*)inst;
-+
-+ if (i0->next()->encode() == PPC_bcl_always_plus4 ||
-+ IsAddpcisLoad64Stanza(i0->encode())) {
-+ // Pre-existing long stanza, either P8 (mflr + bcl marker at [1]) or
-+ // P9+ (addpcis at [0]; major opcode 19). Either way, just register
-+ // the long jump — the stanza's .quad at [6..7] gets patched later
-+ // via UpdateLoad64Value.
-+ addLongJump(BufferOffset(branch), BufferOffset(target));
-+ return;
-+ }
-+
-+ if (i0->isOpcode((uint32_t)PPC_tw)) {
-+ // Tagged trap stanza. The tag tells us which branch type was reserved.
-+ TrapTag tag = (TrapTag)inst->traptag();
-+ Instruction* i1 = i0->next();
-+ Instruction* i2 = i1->next();
-+ Instruction* i3 = i2->next();
-+ Instruction* i4 = i3->next();
-+ Instruction* i5 = i4->next();
-+ Instruction* i6 = i5->next();
-+ Instruction* i7 = i6->next();
-+ Instruction* i8 = i7->next();
-+ Instruction* i9 = i8->next();
-+
-+ switch (tag) {
-+ case BCTag: {
-+ // inst[-1] is the original bc instruction.
-+ Instruction* bc = i0 - 1;
-+ // Try short bc (offset + 4 because bc is one instruction before tw).
-+ if (BOffImm16::IsInRange(offset + (intptr_t)sizeof(uint32_t))) {
-+ bc->setData(((bc->encode() ^ 0x01000000) & 0xFFFF0003) |
-+ BOffImm16(offset + sizeof(uint32_t)).encode());
-+ i0->makeNop();
-+ i1->makeNop();
-+ i2->makeNop();
-+ i3->makeNop();
-+ i4->makeNop();
-+ i5->makeNop();
-+ i6->makeNop();
-+ i7->makeNop();
-+ i8->makeNop();
-+ i9->makeNop();
-+ return;
-+ }
-+ // Try short b (unconditional).
-+ if (JOffImm26::IsInRange(offset)) {
-+ i0->setData(PPC_b | JOffImm26(offset).encode());
-+ i1->makeNop();
-+ i2->makeNop();
-+ i3->makeNop();
-+ i4->makeNop();
-+ i5->makeNop();
-+ i6->makeNop();
-+ i7->makeNop();
-+ i8->makeNop();
-+ i9->makeNop();
-+ return;
-+ }
-+ // Long: WriteLoad64 to SecondScratchReg + mtctr + bctr.
-+ addLongJump(BufferOffset(branch), BufferOffset(target));
-+ WriteLoad64Instructions(i0, SecondScratchReg,
-+ LabelBase::INVALID_OFFSET);
-+ i8->makeOp_mtctr(SecondScratchReg);
-+ i9->makeOp_bctr();
-+ break;
-+ }
-+ case CallTag: {
-+ // For calls, the actual call instruction goes at inst[9] and
-+ // the return address must be after the stanza.
-+ intptr_t callOffset = offset - 9 * (intptr_t)sizeof(uint32_t);
-+ if (JOffImm26::IsInRange(callOffset)) {
-+ i0->makeNop();
-+ i1->makeNop();
-+ i2->makeNop();
-+ i3->makeNop();
-+ i4->makeNop();
-+ i5->makeNop();
-+ i6->makeNop();
-+ i7->makeNop();
-+ i8->makeNop();
-+ i9->setData(PPC_b | JOffImm26(callOffset).encode() | LinkB);
-+ return;
-+ }
-+ // Long: WriteLoad64 to SecondScratchReg + mtctr + bctrl.
-+ addLongJump(BufferOffset(branch), BufferOffset(target));
-+ WriteLoad64Instructions(i0, SecondScratchReg,
-+ LabelBase::INVALID_OFFSET);
-+ i8->makeOp_mtctr(SecondScratchReg);
-+ i9->makeOp_bctr(LinkB);
-+ break;
-+ }
-+ case BTag: {
-+ if (JOffImm26::IsInRange(offset)) {
-+ i0->setData(PPC_b | JOffImm26(offset).encode());
-+ i1->makeNop();
-+ i2->makeNop();
-+ i3->makeNop();
-+ i4->makeNop();
-+ i5->makeNop();
-+ i6->makeNop();
-+ i7->makeNop();
-+ i8->makeNop();
-+ i9->makeNop();
-+ return;
-+ }
-+ // Long: WriteLoad64 to SecondScratchReg + mtctr + bctr.
-+ addLongJump(BufferOffset(branch), BufferOffset(target));
-+ WriteLoad64Instructions(i0, SecondScratchReg,
-+ LabelBase::INVALID_OFFSET);
-+ i8->makeOp_mtctr(SecondScratchReg);
-+ i9->makeOp_bctr();
-+ break;
-+ }
-+ default:
-+ MOZ_CRASH("Unexpected TrapTag");
-+ }
-+ return;
-+ }
-+
-+ if (i0->isOpcode(PPC_b)) {
-+ // Short unconditional branch — set offset, nop next-in-chain slot.
-+ MOZ_ASSERT(JOffImm26::IsInRange(offset));
-+ i0->setData((i0->encode() & ~0x03FFFFFC) | JOffImm26(offset).encode());
-+ i0->next()->makeNop();
-+ return;
-+ }
-+
-+ if (i0->isOpcode(PPC_bc)) {
-+ // Short conditional branch — preserve upper 16 bits, set offset.
-+ MOZ_ASSERT(BOffImm16::IsInRange(offset));
-+ i0->setData((i0->encode() & 0xFFFF0003) | BOffImm16(offset).encode());
-+ i0->next()->makeNop();
-+ return;
-+ }
-+
-+ MOZ_CRASH("Unexpected instruction in bind");
-+}
-+
-+void Assembler::bind(Label* label, BufferOffset boff) {
-+ if (label->used()) {
-+ bool more;
-+ BufferOffset b(label);
-+ do {
-+ BufferOffset next;
-+ InstImm* inst = (InstImm*)editSrc(b);
-+ Instruction* i1 = ((Instruction*)inst)->next();
-+ more = (i1->encode() != LabelBase::INVALID_OFFSET);
-+ if (more) {
-+ next = BufferOffset(i1->encode());
-+ }
-+ bind(inst, b.getOffset(), boff.getOffset());
-+ b = next;
-+ } while (more);
-+ }
-+ label->bind(boff.getOffset());
-+}
-+
-+void Assembler::retarget(Label* label, Label* target) {
-+ spew("retarget");
-+ if (label->used() && !oom()) {
-+ if (target->bound()) {
-+ bind(label, BufferOffset(target));
-+ } else if (target->used()) {
-+ // Prepend label's use chain to target's use chain.
-+ BufferOffset b(label);
-+ BufferOffset next;
-+ do {
-+ Instruction* inst = (Instruction*)editSrc(b);
-+ Instruction* i1 = inst->next();
-+ if (i1->encode() != LabelBase::INVALID_OFFSET) {
-+ next = BufferOffset(i1->encode());
-+ } else {
-+ // End of label's chain — link to target's head.
-+ i1->setData(target->offset());
-+ break;
-+ }
-+ b = next;
-+ } while (true);
-+ }
-+ // Transfer label's use list to target.
-+ if (!target->bound()) {
-+ target->use(label->offset());
-+ }
-+ }
-+ label->reset();
-+}
-+
-+void Assembler::processCodeLabels(uint8_t* rawCode) {
-+ for (const CodeLabel& label : codeLabels_) {
-+ Bind(rawCode, label);
-+ }
-+}
-+
-+// ========================================================================
-+// Load64 instruction sequence (8 slots, literal pool format):
-+// [0] mflr r0 — save LR
-+// [1] bcl 20,0,.+4 — LR = address of [2]
-+// [2] mflr rD — rD = address of [2]
-+// [3] mtlr r0 — restore LR
-+// [4] ld rD, 16(rD) — load from [6..7] (offset = 24 - 8 = 16)
-+// [5] b .+12 — skip data
-+// [6..7] .quad VALUE — 8-byte data
-+// ========================================================================
-+
-+// ========================================================================
-+// Constant pool callbacks (required by AssemblerBufferWithConstantPools).
-+// ========================================================================
-+
-+/* static */
-+void Assembler::InsertIndexIntoTag(uint8_t* load, uint32_t index) {
-+ // Stash the pool entry index in the hint word's low 16 bits; the high
-+ // bits carry the dest reg and load type, consumed by
-+ // PatchConstantPoolLoad when the pool is resolved.
-+ uint32_t* inst = (uint32_t*)load;
-+ *inst = (*inst & 0xFFFF0000) | (index & 0xFFFF);
-+}
-+
-+/* static */
-+bool Assembler::PatchConstantPoolLoad(void* loadAddr, void* constPoolAddr) {
-+ // Rewrite placeholder instructions with a pool load sequence.
-+ // Hint word layout (set by loadFromPoolFloat64 / loadFromPoolFloat32 /
-+ // loadFromPoolSimd128):
-+ // bits 0-15: pool entry index
-+ // bits 16-20: destination register (FPR encoding)
-+ // bits 21-22: load type (PoolLoadFPR64, PoolLoadSimd128, PoolLoadFPR32)
-+ // bits 28-31: sentinel 0xF
-+
-+ uint32_t* inst = (uint32_t*)loadAddr;
-+
-+ uint32_t hint = inst[0];
-+ uint32_t index = hint & 0xFFFF;
-+ uint32_t destReg = (hint >> 16) & 0x1F;
-+ uint32_t loadType = (hint >> 21) & 0x3;
-+
-+ // Displacement: pool entry address relative to inst[1] (mflr target) for the
-+ // bcl path, or relative to inst[0]+4 (addpcis target = CIA+4, which is the
-+ // address of inst[1]) for the addpcis path. Both conventions resolve to the
-+ // same value: (pool entry) − (loadAddr + 4).
-+ int32_t displacement =
-+ (int32_t)((uint8_t*)constPoolAddr + index * 4 - ((uint8_t*)loadAddr + 4));
-+
-+ if (loadType == PoolLoadFPR64 || loadType == PoolLoadFPR32) {
-+ // Three emission paths:
-+ //
-+ // POWER10 (preferred): plfd/plfs FRT, SI(0), R=1 — single PC-relative
-+ // prefixed FP load. 8 bytes = 2 slots; slot 2 becomes a nop. If
-+ // loadAddr % 64 == 60, plfd would straddle a 64-byte block, so emit
-+ // a leading nop at slot 0 and place plfd at slots 1-2 instead.
-+ // Reach: ±8 GB (34-bit signed). No LR clobber, no r16 base.
-+ //
-+ // POWER9: addpcis + lfd/lfs + nop. 2 real insns, no LR clobber, no
-+ // Return Address Stack corruption. Base register is r16.
-+ // Displacement splits into (hi << 16) + lo where lo is the 16-bit
-+ // signed D-field of lfd/lfs. Reach: ±2 GB.
-+ //
-+ // POWER8: bcl + mflr r16 + lfd/lfs. Same clobber + RAS caveat as before.
-+ // Kept as a correctness fallback; not exercised today because the
-+ // loadConstantDouble/Float32 wrappers skip the pool on POWER8.
-+ //
-+ // lfs/plfs (32-bit) auto-expand their result to double-precision in the
-+ // FPR, replacing the non-pool path's separate xscvspdpn step.
-+ uint32_t baseReg = SavedScratchRegister.code();
-+ uint32_t loadOp = (loadType == PoolLoadFPR64) ? PPC_lfd : PPC_lfs;
-+
-+ if (HasPOWER10()) {
-+ // MLS prefixed FP load. plfd suffix opcode = 50, plfs = 48. Same
-+ // alignment-driven slot placement as PoolLoadSimd128 above.
-+ uint64_t loadAddrBits = reinterpret_cast<uint64_t>(loadAddr);
-+ // loadAddr is the buffer-time pointer; the final executable base is
-+ // only 16-byte aligned, so the unsafe straddle is when
-+ // (loadAddrBits & 15) == 12 (matches ensurePrefixedAlignment above).
-+ bool needLeadingNop = (loadAddrBits & 15) == 12;
-+ int prefixSlot = needLeadingNop ? 1 : 0;
-+ int prefixByteOffset = prefixSlot * 4;
-+ int64_t SI = int64_t(displacement) + 4 - prefixByteOffset;
-+ MOZ_ASSERT(SI >= -(int64_t(1) << 33) && SI < (int64_t(1) << 33));
-+ uint32_t d0 = uint32_t((uint64_t(SI) >> 16) & 0x3FFFFu);
-+ uint32_t d1 = uint32_t(uint64_t(SI) & 0xFFFFu);
-+ // Type 2 (MLS), R=1, RA=0.
-+ uint32_t prefix =
-+ (1u << 26) | (2u << 24) | (1u << 20) | (d0 & 0x3FFFFu);
-+ uint32_t suffixOp = (loadType == PoolLoadFPR64) ? 50u : 48u;
-+ uint32_t suffix = (suffixOp << 26) | (destReg << 21) | d1;
-+
-+ if (needLeadingNop) {
-+ inst[0] = NopInst;
-+ inst[1] = prefix;
-+ inst[2] = suffix;
-+ } else {
-+ inst[0] = prefix;
-+ inst[1] = suffix;
-+ inst[2] = NopInst;
-+ }
-+ } else if (HasPOWER9()) {
-+ // Split displacement into addpcis hi field and lfd/lfs lo field so that
-+ // target = (CIA + 4) + (hi << 16) + SEXT16(lo).
-+ // Only 2 slots are reserved on P9 (loadFromPoolFloat{32,64} above);
-+ // do NOT touch inst[2], it belongs to the next entry.
-+ int16_t lo = (int16_t)(displacement & 0xFFFF);
-+ int32_t hiAdj = displacement - lo;
-+ MOZ_ASSERT((hiAdj & 0xFFFF) == 0);
-+ int32_t hi = hiAdj >> 16;
-+ MOZ_ASSERT(hi >= -32768 && hi <= 32767);
-+ // [0] addpcis r16, hi
-+ uint32_t Dhi = uint16_t(hi);
-+ inst[0] = (19u << 26) | (baseReg << 21) | ((Dhi >> 1) & 0x1F) << 16 |
-+ ((Dhi >> 6) & 0x3FF) << 6 | (2u << 1) | (Dhi & 1u);
-+ // [1] lfd/lfs fD, lo(r16)
-+ inst[1] = loadOp | (destReg << 21) | (baseReg << 16) | (uint16_t(lo));
-+ } else {
-+ MOZ_ASSERT(displacement >= -32768 && displacement < 32768);
-+ // [0] bcl 20,0,$+4
-+ inst[0] = PPC_bcl_always_plus4;
-+ // [1] mflr r16
-+ inst[1] = PPC_mfspr | (baseReg << 21) | PPC_SPR(spr_lr);
-+ // [2] lfd/lfs fD, displacement(r16)
-+ inst[2] =
-+ loadOp | (destReg << 21) | (baseReg << 16) | (displacement & 0xFFFF);
-+ }
-+ } else if (loadType == PoolLoadSimd128) {
-+ // Three emission paths (5 slots reserved by loadFromPoolSimd128):
-+ //
-+ // POWER10 (preferred): plxv vsD, SI(0), R=1 — single PC-relative
-+ // prefixed load, natural-LE byte order (no xxpermdi needed). 8 bytes
-+ // = 2 slots; slots 2-4 become nops. If the prefix would straddle a
-+ // 64-byte block (loadAddr % 64 == 60), emit a leading nop at slot 0
-+ // and place plxv at slots 1-2 instead. Reach: ±8 GB (34-bit signed).
-+ //
-+ // POWER9: addpcis-equivalent via bcl + mflr + addi + lxvx + nop. 5
-+ // real insns, natural LE.
-+ //
-+ // POWER8: same prelude + lxvd2x + xxpermdi (BE-DW byte-swap fixup).
-+ //
-+ // See PoolLoadFPR64 above for why r16 instead of r12.
-+ MOZ_ASSERT(displacement >= -32768 && displacement < 32768);
-+ // Simd128 dest is in VR-namespace (encoding 32-63). Hint stores only
-+ // the low 5 bits (loadFromPoolSimd128 masks); we set TX unconditionally
-+ // since PoolLoadSimd128 always targets a Simd128.
-+ constexpr uint32_t kTX = 1u;
-+ constexpr uint32_t kAxBxTx_xxpermdi = (1u << 2) | (1u << 1) | 1u;
-+
-+ if (HasPOWER10()) {
-+ // Place plxv prefix at the highest 4-byte-aligned offset within
-+ // the 5 reserved slots that doesn't straddle a 64-byte block.
-+ uint64_t loadAddrBits = reinterpret_cast<uint64_t>(loadAddr);
-+ // loadAddr is the buffer-time pointer; the final executable base is
-+ // only 16-byte aligned, so the unsafe straddle is when
-+ // (loadAddrBits & 15) == 12 (matches ensurePrefixedAlignment above).
-+ bool needLeadingNop = (loadAddrBits & 15) == 12;
-+ int prefixSlot = needLeadingNop ? 1 : 0;
-+ int prefixByteOffset = prefixSlot * 4;
-+ // SI = (pool entry addr) - (prefix addr)
-+ // = (loadAddr + 4 + displacement) - (loadAddr + prefixByteOffset)
-+ // = displacement + 4 - prefixByteOffset
-+ int64_t SI = int64_t(displacement) + 4 - prefixByteOffset;
-+ MOZ_ASSERT(SI >= -(int64_t(1) << 33) && SI < (int64_t(1) << 33));
-+ uint32_t d0 = uint32_t((uint64_t(SI) >> 16) & 0x3FFFFu);
-+ uint32_t d1 = uint32_t(uint64_t(SI) & 0xFFFFu);
-+ // Prefix: primary opcode 1, Type 0 (8LS), R=1, d0 at LE bits 17..0.
-+ uint32_t prefix =
-+ (1u << 26) | (0u << 24) | (1u << 20) | (d0 & 0x3FFFFu);
-+ // Suffix: 5-bit opcode 25 at LE 31..27, TX at LE 26, T at LE 25..21,
-+ // RA=0 at LE 20..16, d1 at LE 15..0.
-+ uint32_t suffix = (25u << 27) | (kTX << 26) | (destReg << 21) | d1;
-+
-+ // P10 reserves 3 slots; only inst[0..2] are written. Slots 3..4
-+ // belong to the next pool entry on P10.
-+ if (needLeadingNop) {
-+ inst[0] = NopInst;
-+ inst[1] = prefix;
-+ inst[2] = suffix;
-+ } else {
-+ inst[0] = prefix;
-+ inst[1] = suffix;
-+ inst[2] = NopInst;
-+ }
-+ } else if (HasPOWER9()) {
-+ // addpcis + addi + lxvx (3 slots) — no LR clobber, no RAS hazard.
-+ // Same displacement split as the FP scalar P9 path: target =
-+ // (CIA+4) + (hi << 16) + SEXT16(lo). lxvx is X-form indexed (no
-+ // immediate offset), so combine the low 16 bits into r16 via addi
-+ // before the load.
-+ int16_t lo = (int16_t)(displacement & 0xFFFF);
-+ int32_t hiAdj = displacement - lo;
-+ MOZ_ASSERT((hiAdj & 0xFFFF) == 0);
-+ int32_t hi = hiAdj >> 16;
-+ MOZ_ASSERT(hi >= -32768 && hi <= 32767);
-+ uint32_t Dhi = uint16_t(hi);
-+ uint32_t baseReg = SavedScratchRegister.code();
-+ // [0] addpcis r16, hi
-+ inst[0] = (19u << 26) | (baseReg << 21) | ((Dhi >> 1) & 0x1F) << 16 |
-+ ((Dhi >> 6) & 0x3FF) << 6 | (2u << 1) | (Dhi & 1u);
-+ // [1] addi r16, r16, lo
-+ inst[1] = PPC_addi | (baseReg << 21) | (baseReg << 16) | uint16_t(lo);
-+ // [2] lxvx vsD, 0, r16 (XT[0:4] in bits 21-25, TX at bit 0)
-+ inst[2] = PPC_lxvx | (destReg << 21) | (baseReg << 11) | kTX;
-+ } else {
-+ // P8 fallback: bcl + mflr + addi + lxvd2x + xxpermdi (5 slots).
-+ // Clobbers LR; correctness-only path.
-+ uint32_t baseReg = SavedScratchRegister.code();
-+ inst[0] = PPC_bcl_always_plus4;
-+ inst[1] = PPC_mfspr | (baseReg << 21) | PPC_SPR(spr_lr);
-+ inst[2] = PPC_addi | (baseReg << 21) | (baseReg << 16) |
-+ (displacement & 0xFFFF);
-+ // lxvd2x XT, RA=0, RB=r16 — loads in BE order on LE.
-+ inst[3] = PPC_lxvd2x | (destReg << 21) | (baseReg << 11) | kTX;
-+ // xxpermdi XT, XT, XT, 2 — swap doublewords for LE byte order.
-+ inst[4] = PPC_xxpermdi | (destReg << 21) | (destReg << 16) |
-+ (destReg << 11) | (2u << 8) | kAxBxTx_xxpermdi;
-+ }
-+ } else {
-+ MOZ_CRASH("PatchConstantPoolLoad: unsupported load type");
-+ }
-+
-+ return false;
-+}
-+
-+/* static */
-+void Assembler::WritePoolGuard(BufferOffset branch, Instruction* inst,
-+ BufferOffset dest) {
-+ // Emit an unconditional branch over the pool data.
-+ int32_t offset = dest.getOffset() - branch.getOffset();
-+ MOZ_ASSERT(JOffImm26::IsInRange(offset));
-+ inst->setData(PPC_b | (offset & 0x03FFFFFC));
-+}
-+
-+/* static */
-+void Assembler::WritePoolHeader(uint8_t* start, Pool* p, bool isNatural) {
-+ // Write pool identification header.
-+ // Encode pool size and isNatural flag in a single 32-bit word.
-+ uint32_t poolSize = p->getPoolSize();
-+ uint32_t sizeInWords = (poolSize + 4 + 3) >> 2; // header + data, in words
-+ MOZ_ASSERT(sizeInWords < (1 << 15));
-+ uint32_t header = (sizeInWords & 0x7FFF) | (isNatural ? (1 << 15) : 0) |
-+ 0xFFFF0000; // sentinel
-+ *(uint32_t*)start = header;
-+}
-+
-+/* static */
-+void Assembler::PatchShortRangeBranchToVeneer(PPCBuffer*, unsigned rangeIdx,
-+ BufferOffset deadline,
-+ BufferOffset veneer) {
-+ // PPC64 does not use short-range branch tracking (NumShortBranchRanges = 0).
-+ MOZ_CRASH("PatchShortRangeBranchToVeneer: should not be called");
-+}
-+
-+// Two stanza shapes share the same 8-slot footprint and the same .quad
-+// location at slots [6..7] (so ExtractLoad64Value / UpdateLoad64Value are
-+// shape-agnostic):
-+//
-+// POWER8 (no addpcis):
-+// [0] mflr r0
-+// [1] bcl 20,0,.+4 (LR := pc of [2])
-+// [2] mflr rD
-+// [3] mtlr r0
-+// [4] ld rD, 16(rD)
-+// [5] b .+12
-+// [6..7] .quad VALUE
-+//
-+// POWER9+ (addpcis):
-+// [0] addpcis rD, 0 (rD := NIA = pc of [1])
-+// [1] ld rD, 20(rD) (rD := mem[pc_of_[1] + 20] = mem[slot[6]])
-+// [2] b .+24
-+// [3..5] NOP, NOP, NOP
-+// [6..7] .quad VALUE
-+//
-+// The P9+ form drops the bcl/mflr/mtlr LR-bounce (no RAS thrash) and runs
-+// 2 dynamic insns instead of 6. Distinguished at patch time by inst[0]'s
-+// major opcode: 31 = mfspr (P8) vs 19 = addpcis (P9+).
-+static bool IsAddpcisLoad64Stanza(uint32_t enc0) {
-+ return ((enc0 >> 26) & 0x3f) == 19;
-+}
-+
-+// Extract the destination register from a load64 stanza in either shape.
-+// P8 stores rD in `mflr rD` at slot [2]; P9+ stores rD in `addpcis rD, 0`
-+// at slot [0]. Both encode RT at LE bits [21..25].
-+static uint8_t Load64StanzaDestReg(Instruction* inst0) {
-+ if (IsAddpcisLoad64Stanza(inst0->encode())) {
-+ return (inst0[0].encode() >> 21) & 0x1f;
-+ }
-+ return (inst0[2].encode() >> 21) & 0x1f;
-+}
-+
-+/* static */
-+void Assembler::WriteLoad64Instructions(Instruction* inst0, Register reg,
-+ uint64_t value) {
-+ Instruction* i1 = inst0->next();
-+ Instruction* i2 = i1->next();
-+ Instruction* i3 = i2->next();
-+ Instruction* i4 = i3->next();
-+ Instruction* i5 = i4->next();
-+ Instruction* i6 = i5->next();
-+ Instruction* i7 = i6->next();
-+
-+ if (HasPOWER9()) {
-+ // [0] addpcis rD, 0 (DX-form: opcode=19, XO=2, all D fields = 0)
-+ inst0->setData(0x4C000004u | (uint32_t(reg.code()) << 21));
-+ // [1] ld rD, 20(rD) (rD := *(slot[1] + 20) = *(slot[6]) = .quad)
-+ i1->setData(PPC_ld | (uint32_t(reg.code()) << 21) |
-+ (uint32_t(reg.code()) << 16) | 20);
-+ // [2] b .+24 (skip slots [3..7] to land at slot [8])
-+ i2->setData(PPC_b | (24 & 0x03FFFFFC));
-+ // [3..5] NOP filler — unreachable but kept aligned for the patcher.
-+ i3->setData(NopInst);
-+ i4->setData(NopInst);
-+ i5->setData(NopInst);
-+ } else {
-+ // [0] mflr r0
-+ inst0->setData(PPC_mfspr | (r0.code() << 21) | PPC_SPR(spr_lr));
-+ // [1] bcl 20,0,.+4
-+ i1->setData(PPC_bcl_always_plus4);
-+ // [2] mflr rD
-+ i2->setData(PPC_mfspr | (reg.code() << 21) | PPC_SPR(spr_lr));
-+ // [3] mtlr r0
-+ i3->setData(PPC_mtspr | (r0.code() << 21) | PPC_SPR(spr_lr));
-+ // [4] ld rD, 16(rD)
-+ i4->setData(PPC_ld | (reg.code() << 21) | (reg.code() << 16) | 16);
-+ // [5] b .+12
-+ i5->setData(PPC_b | (12 & 0x03FFFFFC));
-+ }
-+
-+ // [6..7] .quad VALUE (low 32 at lower addr, high 32 at higher addr).
-+ i6->setData((uint32_t)(value & 0xFFFFFFFF));
-+ i7->setData((uint32_t)(value >> 32));
-+}
-+
-+/* static */
-+uint64_t Assembler::ExtractLoad64Value(Instruction* inst0) {
-+ // The 8-byte value is at inst0[6..7] in both shapes.
-+ Instruction* i6 = inst0 + 6;
-+ Instruction* i7 = inst0 + 7;
-+
-+ uint64_t lo = (uint64_t)i6->encode(); // low 32 at lower addr
-+ uint64_t hi = (uint64_t)i7->encode(); // high 32 at higher addr
-+ return (hi << 32) | lo;
-+}
-+
-+/* static */
-+void Assembler::UpdateLoad64Value(Instruction* inst0, uint64_t value) {
-+ // Sanity-check that inst0 is the start of a load64 stanza in either shape.
-+ // P8: inst0[1] == bcl 20,0,.+4. P9+: inst0[0] is addpcis (major opcode 19).
-+ MOZ_ASSERT(inst0[1].encode() == PPC_bcl_always_plus4 ||
-+ IsAddpcisLoad64Stanza(inst0->encode()),
-+ "UpdateLoad64Value: inst0 is not a load64 stanza");
-+
-+ // .quad lives at inst0[6..7] in both shapes.
-+ Instruction* i6 = inst0 + 6;
-+ Instruction* i7 = inst0 + 7;
-+
-+ i6->setData((uint32_t)(value & 0xFFFFFFFF)); // low 32 at lower addr
-+ i7->setData((uint32_t)(value >> 32)); // high 32 at higher addr
-+}
-+
-+// ========================================================================
-+// Patching and toggle operations.
-+// ========================================================================
-+
-+/* static */
-+uint32_t Assembler::PatchWrite_NearCallSize() {
-+ // 8 instructions for Load64 + mtctr + bctrl = 10 instructions.
-+ return 10 * sizeof(uint32_t);
-+}
-+
-+/* static */
-+void Assembler::PatchWrite_NearCall(CodeLocationLabel start,
-+ CodeLocationLabel toCall) {
-+ Instruction* inst = (Instruction*)start.raw();
-+ uint8_t* dest = toCall.raw();
-+
-+ Assembler::WriteLoad64Instructions(inst, SavedScratchRegister,
-+ (uint64_t)dest);
-+ inst[8].makeOp_mtctr(SavedScratchRegister);
-+ inst[9].makeOp_bctr(LinkB);
-+ FlushICache(inst, 10 * sizeof(Instruction));
-+}
-+
-+/* static */
-+void Assembler::PatchWrite_Imm32(CodeLocationLabel label, Imm32 imm) {
-+ uint32_t* l = (uint32_t*)label.raw();
-+ *(l - 1) = imm.value;
-+ FlushICache(l - 1, sizeof(uint32_t));
-+}
-+
-+void Assembler::PatchDataWithValueCheck(CodeLocationLabel label,
-+ ImmPtr newValue, ImmPtr expectedValue) {
-+ PatchDataWithValueCheck(label, PatchedImmPtr(newValue.value),
-+ PatchedImmPtr(expectedValue.value));
-+}
-+
-+void Assembler::PatchDataWithValueCheck(CodeLocationLabel label,
-+ PatchedImmPtr newValue,
-+ PatchedImmPtr expectedValue) {
-+ Instruction* inst = (Instruction*)label.raw();
-+
-+ DebugOnly<uint64_t> value = Assembler::ExtractLoad64Value(inst);
-+ MOZ_ASSERT(value == uint64_t(expectedValue.value));
-+
-+ Assembler::UpdateLoad64Value(inst, uint64_t(newValue.value));
-+ FlushICache(inst, 8 * sizeof(Instruction));
-+}
-+
-+// ToggleCall toggles the call portion of a toggledCall stanza.
-+// Layout: 8 load64 instructions + mtctr + bctrl (10 total).
-+// We toggle the last two instructions (mtctr/bctrl vs nop/nop).
-+// The destination register is extracted via Load64StanzaDestReg, which
-+// handles both the P8 (mflr-rD at slot [2]) and P9+ (addpcis-rD at slot
-+// [0]) shapes.
-+
-+/* static */
-+void Assembler::ToggleCall(CodeLocationLabel inst_, bool enabled) {
-+ Instruction* i0 = (Instruction*)inst_.raw();
-+ Instruction* i8 = (Instruction*)(inst_.raw() + 8 * sizeof(uint32_t));
-+ Instruction* i9 = (Instruction*)(inst_.raw() + 9 * sizeof(uint32_t));
-+
-+ // Accept either P8 stanza (mflr r0 at slot [0]) or P9+ stanza (addpcis at
-+ // slot [0]; major opcode 19).
-+ MOZ_ASSERT(i0->encode() == (PPC_mfspr | (r0.code() << 21) | PPC_SPR(spr_lr)) ||
-+ IsAddpcisLoad64Stanza(i0->encode()));
-+
-+ // ToggleCall is idempotent across the same `enabled` value: re-enabling
-+ // an already-enabled site (or re-disabling a disabled one) is a no-op.
-+ // Mozilla's debugger machinery may legitimately toggle the same call site
-+ // multiple times in the same direction (e.g. setting both a breakpoint
-+ // and a frame.onStep on the same script).
-+ Register scratch = Register::FromCode(Load64StanzaDestReg(i0));
-+ uint32_t mtctr = PPC_mtspr | (scratch.code() << 21) | PPC_SPR(spr_ctr);
-+ uint32_t bctrl = (uint32_t)PPC_bctr | (uint32_t)LinkB;
-+ if (enabled) {
-+ MOZ_ASSERT(i8->encode() == NopInst || i8->encode() == mtctr);
-+ MOZ_ASSERT(i9->encode() == NopInst || i9->encode() == bctrl);
-+ i8->setData(mtctr);
-+ i9->setData(bctrl);
-+ } else {
-+ MOZ_ASSERT(i8->encode() == NopInst || i8->encode() == mtctr);
-+ MOZ_ASSERT(i9->encode() == NopInst || i9->encode() == bctrl);
-+ i8->setData(NopInst);
-+ i9->setData(NopInst);
-+ }
-+ FlushICache(i8, 2 * sizeof(Instruction));
-+}
-+
-+// toggledJump emits a trap stanza via jump(label). After binding, the first
-+// instruction becomes "b offset" (short branch). We toggle between b and ori:
-+// b offset: [010010][LI:24][0][0]
-+// ori r0,r0,imm: [011000][00000][00000][UI:16]
-+// For short forward jumps (offset < 64KB), bits 25:16 of LI are 0, so
-+// swapping the opcode preserves the offset in the lower 16 bits.
-+// ori r0,r0,X is effectively a nop (writes to r0).
-+
-+/* static */
-+void Assembler::ToggleToJmp(CodeLocationLabel inst_) {
-+ Instruction* inst = (Instruction*)inst_.raw();
-+ MOZ_ASSERT(inst->isOpcode(PPC_ori));
-+ // Verify RS=0 and RA=0 (r0).
-+ MOZ_ASSERT((inst->encode() & 0x03E00000) == 0);
-+ MOZ_ASSERT((inst->encode() & 0x001F0000) == 0);
-+ // Swap opcode from ori (011000) to b (010010).
-+ uint32_t encoding = inst->encode();
-+ encoding = (encoding & 0x03FFFFFF) | (uint32_t)PPC_b;
-+ inst->setData(encoding);
-+ FlushICache(inst, sizeof(Instruction));
-+}
-+
-+/* static */
-+void Assembler::ToggleToCmp(CodeLocationLabel inst_) {
-+ Instruction* inst = (Instruction*)inst_.raw();
-+ MOZ_ASSERT(inst->isOpcode(PPC_b));
-+ // Verify short forward branch: upper LI bits (25:16) are 0, AA=0, LK=0.
-+ MOZ_ASSERT((inst->encode() & 0x03FF0003) == 0);
-+ // Swap opcode from b (010010) to ori (011000).
-+ uint32_t encoding = inst->encode();
-+ encoding = (encoding & 0x03FFFFFF) | (uint32_t)PPC_ori;
-+ inst->setData(encoding);
-+ FlushICache(inst, sizeof(Instruction));
-+}
-+
-+// ========================================================================
-+// Bind, tracing, and pointer extraction.
-+// ========================================================================
-+
-+void Assembler::Bind(uint8_t* rawCode, const CodeLabel& label) {
-+ if (label.patchAt().bound()) {
-+ auto mode = label.linkMode();
-+ intptr_t offset = label.patchAt().offset();
-+ intptr_t target = label.target().offset();
-+
-+ if (mode == CodeLabel::RawPointer) {
-+ *reinterpret_cast<const void**>(rawCode + offset) = rawCode + target;
-+ } else {
-+ MOZ_ASSERT(mode == CodeLabel::MoveImmediate ||
-+ mode == CodeLabel::JumpImmediate);
-+ Instruction* inst = (Instruction*)(rawCode + offset);
-+ Assembler::UpdateLoad64Value(inst, (uint64_t)(rawCode + target));
-+ }
-+ }
-+}
-+
-+uintptr_t Assembler::GetPointer(uint8_t* instPtr) {
-+ Instruction* inst = (Instruction*)instPtr;
-+ return Assembler::ExtractLoad64Value(inst);
-+}
-+
-+static JitCode* CodeFromJump(Instruction* jump) {
-+ uint8_t* target = (uint8_t*)Assembler::ExtractLoad64Value(jump);
-+ return JitCode::FromExecutable(target);
-+}
-+
-+void Assembler::TraceJumpRelocations(JSTracer* trc, JitCode* code,
-+ CompactBufferReader& reader) {
-+ while (reader.more()) {
-+ JitCode* child =
-+ CodeFromJump((Instruction*)(code->raw() + reader.readUnsigned()));
-+ TraceManuallyBarrieredEdge(trc, &child, "rel32");
-+ }
-+}
-+
-+static void TraceOneDataRelocation(JSTracer* trc,
-+ mozilla::Maybe<AutoWritableJitCode>& awjc,
-+ JitCode* code, Instruction* inst) {
-+ void* ptr = (void*)Assembler::ExtractLoad64Value(inst);
-+ void* prior = ptr;
-+
-+ uintptr_t word = reinterpret_cast<uintptr_t>(ptr);
-+ if (word >> JSVAL_TAG_SHIFT) {
-+ Value v = Value::fromRawBits(word);
-+ TraceManuallyBarrieredEdge(trc, &v, "jit-masm-value");
-+ ptr = (void*)v.bitsAsPunboxPointer();
-+ } else {
-+ TraceManuallyBarrieredGenericPointerEdge(
-+ trc, reinterpret_cast<gc::Cell**>(&ptr), "jit-masm-ptr");
-+ }
-+
-+ if (ptr != prior) {
-+ if (awjc.isNothing()) {
-+ awjc.emplace(code);
-+ }
-+ Assembler::UpdateLoad64Value(inst, uint64_t(ptr));
-+ }
-+}
-+
-+/* static */
-+void Assembler::TraceDataRelocations(JSTracer* trc, JitCode* code,
-+ CompactBufferReader& reader) {
-+ mozilla::Maybe<AutoWritableJitCode> awjc;
-+ while (reader.more()) {
-+ size_t offset = reader.readUnsigned();
-+ Instruction* inst = (Instruction*)(code->raw() + offset);
-+ TraceOneDataRelocation(trc, awjc, code, inst);
-+ }
-+}
-+
-+/* static */
-+uint8_t* Assembler::NextInstruction(uint8_t* instruction, uint32_t* count) {
-+ if (count != nullptr) {
-+ *count += sizeof(Instruction);
-+ }
-+ return instruction + sizeof(Instruction);
-+}
-+
-+// ========================================================================
-+// UseScratchRegisterScope implementation.
-+// ========================================================================
-+
-+UseScratchRegisterScope::UseScratchRegisterScope(Assembler& assembler)
-+ : available_(assembler.GetScratchRegisterList()),
-+ old_available_(*available_) {}
-+
-+UseScratchRegisterScope::UseScratchRegisterScope(Assembler* assembler)
-+ : available_(assembler->GetScratchRegisterList()),
-+ old_available_(*available_) {}
-+
-+UseScratchRegisterScope::~UseScratchRegisterScope() {
-+ *available_ = old_available_;
-+}
-+
-+Register UseScratchRegisterScope::Acquire() {
-+ MOZ_ASSERT(available_ != nullptr);
-+ MOZ_ASSERT(!available_->empty());
-+ Register index = GeneralRegisterSet::FirstRegister(available_->bits());
-+ available_->takeRegisterIndex(index);
-+ return index;
-+}
-+
-+void UseScratchRegisterScope::Release(const Register& reg) {
-+ MOZ_ASSERT(available_ != nullptr);
-+ MOZ_ASSERT(old_available_.hasRegisterIndex(reg));
-+ MOZ_ASSERT(!available_->hasRegisterIndex(reg));
-+ Include(GeneralRegisterSet(1 << reg.code()));
-+}
-+
-+bool UseScratchRegisterScope::hasAvailable() const {
-+ return (available_->size()) != 0;
-+}
-diff --git a/js/src/jit/ppc64/Assembler-ppc64.h b/js/src/jit/ppc64/Assembler-ppc64.h
-new file mode 100644
-index 000000000000..60e84bf71cf7
---- /dev/null
-+++ b/js/src/jit/ppc64/Assembler-ppc64.h
-@@ -0,0 +1,2114 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#ifndef jit_ppc64_Assembler_ppc64_h
-+#define jit_ppc64_Assembler_ppc64_h
-+
-+#include "jit/CompactBuffer.h"
-+#include "jit/JitCode.h"
-+#include "jit/JitSpewer.h"
-+#include "jit/ppc64/Architecture-ppc64.h"
-+#include "jit/shared/Assembler-shared.h"
-+#include "jit/shared/Disassembler-shared.h"
-+#include "jit/shared/IonAssemblerBuffer.h"
-+#include "jit/shared/IonAssemblerBufferWithConstantPools.h"
-+#include "wasm/WasmTypeDecls.h"
-+
-+namespace js {
-+namespace jit {
-+
-+// GPR register constants.
-+static constexpr Register r0{Registers::r0};
-+static constexpr Register r1{Registers::r1};
-+static constexpr Register r2{Registers::r2};
-+static constexpr Register r3{Registers::r3};
-+static constexpr Register r4{Registers::r4};
-+static constexpr Register r5{Registers::r5};
-+static constexpr Register r6{Registers::r6};
-+static constexpr Register r7{Registers::r7};
-+static constexpr Register r8{Registers::r8};
-+static constexpr Register r9{Registers::r9};
-+static constexpr Register r10{Registers::r10};
-+static constexpr Register r11{Registers::r11};
-+static constexpr Register r12{Registers::r12};
-+static constexpr Register r13{Registers::r13};
-+static constexpr Register r14{Registers::r14};
-+static constexpr Register r15{Registers::r15};
-+static constexpr Register r16{Registers::r16};
-+static constexpr Register r17{Registers::r17};
-+static constexpr Register r18{Registers::r18};
-+static constexpr Register r19{Registers::r19};
-+static constexpr Register r20{Registers::r20};
-+static constexpr Register r21{Registers::r21};
-+static constexpr Register r22{Registers::r22};
-+static constexpr Register r23{Registers::r23};
-+static constexpr Register r24{Registers::r24};
-+static constexpr Register r25{Registers::r25};
-+static constexpr Register r26{Registers::r26};
-+static constexpr Register r27{Registers::r27};
-+static constexpr Register r28{Registers::r28};
-+static constexpr Register r29{Registers::r29};
-+static constexpr Register r30{Registers::r30};
-+static constexpr Register r31{Registers::r31};
-+
-+// FPR register constants.
-+static constexpr FloatRegister f0{FloatRegisters::f0, FloatRegisters::Double};
-+static constexpr FloatRegister f1{FloatRegisters::f1, FloatRegisters::Double};
-+static constexpr FloatRegister f2{FloatRegisters::f2, FloatRegisters::Double};
-+static constexpr FloatRegister f3{FloatRegisters::f3, FloatRegisters::Double};
-+static constexpr FloatRegister f4{FloatRegisters::f4, FloatRegisters::Double};
-+static constexpr FloatRegister f5{FloatRegisters::f5, FloatRegisters::Double};
-+static constexpr FloatRegister f6{FloatRegisters::f6, FloatRegisters::Double};
-+static constexpr FloatRegister f7{FloatRegisters::f7, FloatRegisters::Double};
-+static constexpr FloatRegister f8{FloatRegisters::f8, FloatRegisters::Double};
-+static constexpr FloatRegister f9{FloatRegisters::f9, FloatRegisters::Double};
-+static constexpr FloatRegister f10{FloatRegisters::f10, FloatRegisters::Double};
-+static constexpr FloatRegister f11{FloatRegisters::f11, FloatRegisters::Double};
-+static constexpr FloatRegister f12{FloatRegisters::f12, FloatRegisters::Double};
-+static constexpr FloatRegister f13{FloatRegisters::f13, FloatRegisters::Double};
-+static constexpr FloatRegister f14{FloatRegisters::f14, FloatRegisters::Double};
-+static constexpr FloatRegister f15{FloatRegisters::f15, FloatRegisters::Double};
-+static constexpr FloatRegister f16{FloatRegisters::f16, FloatRegisters::Double};
-+static constexpr FloatRegister f17{FloatRegisters::f17, FloatRegisters::Double};
-+static constexpr FloatRegister f18{FloatRegisters::f18, FloatRegisters::Double};
-+static constexpr FloatRegister f19{FloatRegisters::f19, FloatRegisters::Double};
-+static constexpr FloatRegister f20{FloatRegisters::f20, FloatRegisters::Double};
-+static constexpr FloatRegister f21{FloatRegisters::f21, FloatRegisters::Double};
-+static constexpr FloatRegister f22{FloatRegisters::f22, FloatRegisters::Double};
-+static constexpr FloatRegister f23{FloatRegisters::f23, FloatRegisters::Double};
-+static constexpr FloatRegister f24{FloatRegisters::f24, FloatRegisters::Double};
-+static constexpr FloatRegister f25{FloatRegisters::f25, FloatRegisters::Double};
-+static constexpr FloatRegister f26{FloatRegisters::f26, FloatRegisters::Double};
-+static constexpr FloatRegister f27{FloatRegisters::f27, FloatRegisters::Double};
-+static constexpr FloatRegister f28{FloatRegisters::f28, FloatRegisters::Double};
-+static constexpr FloatRegister f29{FloatRegisters::f29, FloatRegisters::Double};
-+static constexpr FloatRegister f30{FloatRegisters::f30, FloatRegisters::Double};
-+static constexpr FloatRegister f31{FloatRegisters::f31, FloatRegisters::Double};
-+
-+static constexpr Register InvalidReg{Registers::Invalid};
-+static constexpr FloatRegister InvalidFloatReg;
-+
-+static constexpr Register StackPointer = r1;
-+static constexpr Register FramePointer = r31;
-+static constexpr Register ReturnReg = r3;
-+static constexpr Register64 ReturnReg64(ReturnReg);
-+static constexpr FloatRegister ReturnFloat32Reg{FloatRegisters::f1,
-+ FloatRegisters::Single};
-+static constexpr FloatRegister ReturnDoubleReg = f1;
-+static constexpr FloatRegister ReturnSimd128Reg{FloatRegisters::f1,
-+ FloatRegisters::Simd128};
-+
-+// r16 is non-volatile and non-allocatable, used as a saved scratch.
-+static constexpr Register SavedScratchRegister = r16;
-+
-+static constexpr Register SecondScratchReg = r12;
-+
-+static constexpr FloatRegister ScratchFloat32Reg{FloatRegisters::f0,
-+ FloatRegisters::Single};
-+static constexpr FloatRegister ScratchDoubleReg = f0;
-+static constexpr FloatRegister ScratchSimd128Reg{FloatRegisters::f0,
-+ FloatRegisters::Simd128};
-+
-+struct ScratchFloat32Scope : public AutoFloatRegisterScope {
-+ explicit ScratchFloat32Scope(MacroAssembler& masm)
-+ : AutoFloatRegisterScope(masm, ScratchFloat32Reg) {}
-+};
-+
-+struct ScratchDoubleScope : public AutoFloatRegisterScope {
-+ explicit ScratchDoubleScope(MacroAssembler& masm)
-+ : AutoFloatRegisterScope(masm, ScratchDoubleReg) {}
-+};
-+
-+// PPC64: ScratchSimd128Scope is a simple register wrapper, NOT a scoped
-+// acquire/release. On PPC64, ScratchSimd128Reg is v0 (VSR32; encoded as
-+// {FloatRegisters::f0, Simd128} so encoding() = 0 + 32 = 32) — distinct
-+// from ScratchDoubleReg = f0 (VSR0). It is non-allocatable and always
-+// available. Many SIMD functions call other SIMD functions that also need
-+// v0, creating nested "scopes". Using AutoFloatRegisterScope would assert
-+// on double-acquire in debug builds. Since v0 is never allocated by the
-+// register allocator, nesting is safe.
-+struct ScratchSimd128Scope : public FloatRegister {
-+ explicit ScratchSimd128Scope(MacroAssembler&)
-+ : FloatRegister(ScratchSimd128Reg) {}
-+};
-+
-+class Assembler;
-+
-+class UseScratchRegisterScope {
-+ public:
-+ explicit UseScratchRegisterScope(Assembler& assembler);
-+ explicit UseScratchRegisterScope(Assembler* assembler);
-+ ~UseScratchRegisterScope();
-+
-+ Register Acquire();
-+ void Release(const Register& reg);
-+ bool hasAvailable() const;
-+ void Include(const GeneralRegisterSet& list) {
-+ *available_ = GeneralRegisterSet::Union(*available_, list);
-+ }
-+ void Exclude(const GeneralRegisterSet& list) {
-+ *available_ = GeneralRegisterSet::Subtract(*available_, list);
-+ }
-+
-+ private:
-+ GeneralRegisterSet* available_;
-+ GeneralRegisterSet old_available_;
-+};
-+
-+static constexpr Register OsrFrameReg = r6;
-+static constexpr Register PreBarrierReg = r4;
-+static constexpr Register InterpreterPCReg = r17;
-+
-+static constexpr Register CallTempReg0 = r4;
-+static constexpr Register CallTempReg1 = r9;
-+static constexpr Register CallTempReg2 = r10;
-+static constexpr Register CallTempReg3 = r7;
-+// CallTempReg4 must NOT be JSReturnReg (r5): LMegamorphicLoadSlotPermissive
-+// uses tempFixed(CallTempReg4) for a saved obj pointer AND defineReturn
-+// (JSReturnOperand=r5) for output. If they alias, the megamorphic cache
-+// lookup clobbers the saved obj, corrupting the 'this' pointer.
-+static constexpr Register CallTempReg4 = r8;
-+static constexpr Register CallTempReg5 = r6;
-+
-+// PPC64 ELFv2 has no volatile non-arg GPRs (r3-r10 are all arg regs).
-+// Use allocatable non-volatile registers as overflow temps.
-+static constexpr Register CallTempNonArgRegs[] = {r14, r15};
-+static const uint32_t NumCallTempNonArgRegs = std::size(CallTempNonArgRegs);
-+
-+static constexpr Register IntArgReg0 = r3;
-+static constexpr Register IntArgReg1 = r4;
-+static constexpr Register IntArgReg2 = r5;
-+static constexpr Register IntArgReg3 = r6;
-+static constexpr Register IntArgReg4 = r7;
-+static constexpr Register IntArgReg5 = r8;
-+static constexpr Register IntArgReg6 = r9;
-+static constexpr Register IntArgReg7 = r10;
-+
-+// Registers used by RegExpMatcher and RegExpExecMatch stubs.
-+static constexpr Register RegExpMatcherRegExpReg = CallTempReg0;
-+static constexpr Register RegExpMatcherStringReg = CallTempReg1;
-+static constexpr Register RegExpMatcherLastIndexReg = CallTempReg2;
-+
-+// Registers used by RegExpExecTest stub (do not use ReturnReg).
-+static constexpr Register RegExpExecTestRegExpReg = CallTempReg0;
-+static constexpr Register RegExpExecTestStringReg = CallTempReg1;
-+
-+// Registers used by RegExpSearcher stub (do not use ReturnReg).
-+static constexpr Register RegExpSearcherRegExpReg = CallTempReg0;
-+static constexpr Register RegExpSearcherStringReg = CallTempReg1;
-+static constexpr Register RegExpSearcherLastIndexReg = CallTempReg2;
-+
-+static constexpr Register JSReturnReg_Type = r6;
-+static constexpr Register JSReturnReg_Data = r5;
-+static constexpr Register JSReturnReg = r5;
-+static constexpr ValueOperand JSReturnOperand = ValueOperand(JSReturnReg);
-+
-+static constexpr Register ABINonArgReg0 = r19;
-+static constexpr Register ABINonArgReg1 = r20;
-+static constexpr Register ABINonArgReg2 = r21;
-+static constexpr Register ABINonArgReg3 = r22;
-+static constexpr Register ABINonArgReturnReg0 = r29;
-+static constexpr Register ABINonArgReturnReg1 = r30;
-+static constexpr Register ABINonVolatileReg = r14;
-+static constexpr Register ABINonArgReturnVolatileReg = r11;
-+
-+static constexpr FloatRegister ABINonArgDoubleReg{FloatRegisters::f14,
-+ FloatRegisters::Double};
-+
-+// Wasm instance pointer register. Preserved across wasm function calls.
-+static constexpr Register InstanceReg = r18;
-+static constexpr Register HeapReg = r24;
-+static constexpr Register GlobalReg = r23;
-+
-+// Wasm table call registers.
-+static constexpr Register WasmTableCallScratchReg0 = ABINonArgReg0;
-+static constexpr Register WasmTableCallScratchReg1 = ABINonArgReg1;
-+static constexpr Register WasmTableCallSigReg = ABINonArgReg2;
-+static constexpr Register WasmTableCallIndexReg = ABINonArgReg3;
-+
-+// Wasm ref call registers.
-+static constexpr Register WasmCallRefCallScratchReg0 = ABINonArgReg0;
-+static constexpr Register WasmCallRefCallScratchReg1 = ABINonArgReg1;
-+static constexpr Register WasmCallRefCallScratchReg2 = ABINonArgReg2;
-+static constexpr Register WasmCallRefReg = ABINonArgReg3;
-+
-+// Wasm tail call scratch registers.
-+// WasmTailCallRAScratchReg must NOT be ABINonArgReg0: the shared tail-call
-+// code (wasmReturnCallImport, wasmReturnCallIndirect, wasmReturnCallRef)
-+// stores the callee address in ABINonArgReg0, and CollapseWasmFrame*
-+// overwrites tempForRA. On architectures with a GPR link register (ARM,
-+// MIPS, LA64, RISC-V) this is ra/lr. PPC64's LR is an SPR, so we use r14
-+// (ABINonVolatileReg) which is callee-saved and not used in call setup.
-+static constexpr Register WasmTailCallInstanceScratchReg = ABINonArgReg1;
-+static constexpr Register WasmTailCallRAScratchReg = ABINonVolatileReg;
-+static constexpr Register WasmTailCallFPScratchReg = ABINonArgReg3;
-+
-+// Register used as a scratch along the return path in the fast js -> wasm stub
-+// code. Must not overlap ReturnReg, JSReturnOperand, or InstanceReg.
-+// Must be volatile.
-+static constexpr Register WasmJitEntryReturnScratch = r10;
-+
-+static constexpr uint32_t ABIStackAlignment = 16;
-+static constexpr uint32_t CodeAlignment = 16;
-+static constexpr uint32_t JitStackAlignment = 16;
-+
-+static constexpr uint32_t JitStackValueAlignment =
-+ JitStackAlignment / sizeof(Value);
-+static_assert(JitStackAlignment % sizeof(Value) == 0 &&
-+ JitStackValueAlignment >= 1,
-+ "Stack alignment should be a non-zero multiple of sizeof(Value)");
-+
-+static constexpr uint32_t SimdMemoryAlignment = 16;
-+static_assert(
-+ CodeAlignment % SimdMemoryAlignment == 0,
-+ "Code alignment should be larger than any of the alignments "
-+ "which are used for the constant sections of the code buffer. "
-+ "Thus it should be larger than the alignment for SIMD constants.");
-+
-+static constexpr uint32_t WasmStackAlignment = SimdMemoryAlignment;
-+static const uint32_t WasmTrapInstructionLength = 4;
-+
-+static constexpr uint32_t WasmCheckedCallEntryOffset = 0u;
-+static constexpr uint32_t WasmCheckedTailEntryOffset = 32u;
-+
-+static constexpr Scale ScalePointer = TimesEight;
-+
-+class ABIArgGenerator : public ABIArgGeneratorShared {
-+ public:
-+ explicit ABIArgGenerator(ABIKind kind)
-+ : ABIArgGeneratorShared(kind),
-+ intRegIndex_(0),
-+ floatRegIndex_(0),
-+ current_() {
-+ // PPC64 ELFv2 ABI: the callee saves LR, CR, TOC into the caller's
-+ // frame (offsets 8, 16, 24 from caller SP). Reserve 32 bytes so that
-+ // callWithABIPre always allocates enough space for this link area.
-+ stackOffset_ += ShadowStackSpace;
-+ }
-+
-+ ABIArg next(MIRType argType);
-+ ABIArg& current() { return current_; }
-+
-+ protected:
-+ unsigned intRegIndex_;
-+ unsigned floatRegIndex_;
-+ ABIArg current_;
-+};
-+
-+static constexpr uint32_t NumIntArgRegs = 8;
-+static constexpr uint32_t NumFloatArgRegs = 13;
-+
-+static inline bool GetIntArgReg(uint32_t usedIntArgs, Register* out) {
-+ if (usedIntArgs < NumIntArgRegs) {
-+ *out = Register::FromCode(r3.code() + usedIntArgs);
-+ return true;
-+ }
-+ return false;
-+}
-+
-+static inline bool GetFloatArgReg(uint32_t usedFloatArgs, FloatRegister* out) {
-+ if (usedFloatArgs < NumFloatArgRegs) {
-+ *out = FloatRegister::FromCode(f1.code() + usedFloatArgs);
-+ return true;
-+ }
-+ return false;
-+}
-+
-+static inline bool GetTempRegForIntArg(uint32_t usedIntArgs,
-+ uint32_t usedFloatArgs, Register* out) {
-+ MOZ_ASSERT(usedFloatArgs == 0);
-+
-+ if (GetIntArgReg(usedIntArgs, out)) {
-+ return true;
-+ }
-+
-+ usedIntArgs -= NumIntArgRegs;
-+ if (usedIntArgs >= NumCallTempNonArgRegs) {
-+ return false;
-+ }
-+ *out = CallTempNonArgRegs[usedIntArgs];
-+ return true;
-+}
-+
-+// PPC64 instruction field positions.
-+// PPC uses big-endian bit numbering (bit 0 = MSB), but we store instructions
-+// in a uint32_t where bit 0 = LSB. The shifts below are in LSB-0 terms.
-+//
-+// [0:5] primary opcode (OpcodeShift=26)
-+// [6:10] RT/RS/BF/TO (RTShift=21, 5 bits)
-+// [11:15] RA/BI (RAShift=16, 5 bits)
-+// [16:20] RB/SH (RBShift=11, 5 bits)
-+// [16:31] SI/UI/D (Imm16Shift=0, 16 bits)
-+// [21:25] subop bits (varies)
-+// [21:30] XO (X-form; A/M/MD/MDS narrower)
-+// [31] Rc bit (RcShift=0)
-+
-+static const uint32_t OpcodeShift = 26;
-+static const uint32_t OpcodeBits = 6;
-+
-+static const uint32_t RTShift = 21;
-+static const uint32_t RTBits = 5;
-+static const uint32_t RSShift = 21;
-+static const uint32_t RSBits = 5;
-+static const uint32_t RAShift = 16;
-+static const uint32_t RABits = 5;
-+static const uint32_t RBShift = 11;
-+static const uint32_t RBBits = 5;
-+static const uint32_t RCShift = 6;
-+static const uint32_t RCBits = 5;
-+
-+static const uint32_t BOShift = 21;
-+static const uint32_t BOBits = 5;
-+static const uint32_t BIShift = 16;
-+static const uint32_t BIBits = 5;
-+
-+static const uint32_t Imm16Shift = 0;
-+static const uint32_t Imm16Bits = 16;
-+
-+static const uint32_t RcShift = 0;
-+static const uint32_t RcBit = 1;
-+
-+static const uint32_t RTMask = ((1 << RTBits) - 1) << RTShift;
-+static const uint32_t RSMask = ((1 << RSBits) - 1) << RSShift;
-+static const uint32_t RAMask = ((1 << RABits) - 1) << RAShift;
-+static const uint32_t RBMask = ((1 << RBBits) - 1) << RBShift;
-+static const uint32_t Imm16Mask = (1 << Imm16Bits) - 1;
-+static const uint32_t RegMask = (1 << RTBits) - 1;
-+
-+static inline uint32_t RT(Register r) { return (uint32_t)r.code() << RTShift; }
-+static inline uint32_t RT(FloatRegister r) {
-+ return (uint32_t)r.code() << RTShift;
-+}
-+static inline uint32_t RS(Register r) { return (uint32_t)r.code() << RSShift; }
-+static inline uint32_t RS(FloatRegister r) {
-+ return (uint32_t)r.code() << RSShift;
-+}
-+static inline uint32_t RA(Register r) { return (uint32_t)r.code() << RAShift; }
-+static inline uint32_t RA(FloatRegister r) {
-+ return (uint32_t)r.code() << RAShift;
-+}
-+static inline uint32_t RB(Register r) { return (uint32_t)r.code() << RBShift; }
-+static inline uint32_t RB(FloatRegister r) {
-+ return (uint32_t)r.code() << RBShift;
-+}
-+
-+// SPR encoding: the SPR number is split across bits 11-15 and 16-20 in a
-+// swapped arrangement. PPC_SPR(x) produces the value to OR into an
-+// mtspr/mfspr instruction at the RB+RA position (bits 11-20).
-+#define PPC_SPR(x) ((((int)(x) >> 5) & 0x1f) << 11 | ((int)(x) & 0x1f) << 16)
-+
-+enum PPCOpcodes {
-+ PPC_add = 0x7C000214,
-+ PPC_addc = 0x7C000014,
-+ PPC_adde = 0x7C000114,
-+ PPC_addi = 0x38000000,
-+ PPC_addis = 0x3C000000,
-+ PPC_and_ = 0x7C000038,
-+ // andi. is always record form (no non-record andi exists).
-+ PPC_andi_dot = 0x70000000,
-+ PPC_b = 0x48000000,
-+ PPC_bc = 0x40000000,
-+ // Encoded "bcl 20, lt, $+4": PC-relative branch-and-link by 4 bytes
-+ // (land at the next instruction) with BO=20 (branch always); BI=0
-+ // (=lt) is don't-care because BO=20 forces the branch. Used by
-+ // PoolLoadFPR{32,64}'s POWER8 stanza and PoolLoadSimd128's stanza to
-+ // seed LR with the current PC for the subsequent mflr+ld base
-+ // computation. Used by patch sites that write raw instruction memory
-+ // (PatchConstantPoolLoad, WriteLoad64Instructions, etc.). Named for
-+ // grep-ability and to avoid magic-number copies.
-+ PPC_bcl_always_plus4 = 0x42800005,
-+ PPC_bctr = 0x4E800420,
-+ PPC_bcctr = 0x4C000420,
-+ PPC_blr = 0x4E800020,
-+ PPC_cmpd = 0x7C200000,
-+ PPC_cmpdi = 0x2C200000,
-+ PPC_cmpld = 0x7C200040,
-+ PPC_cmpldi = 0x28200000,
-+ PPC_cmpw = 0x7C000000,
-+ PPC_cmpwi = 0x2C000000,
-+ PPC_cmplw = 0x7C000040,
-+ PPC_cmplwi = 0x28000000,
-+ PPC_cntlzd = 0x7C000074,
-+ PPC_cntlzw = 0x7C000034,
-+ PPC_cnttzd = 0x7C000474,
-+ PPC_cnttzw = 0x7C000434,
-+ PPC_crandc = 0x4C000102,
-+ PPC_cror = 0x4C000382,
-+ PPC_crorc = 0x4C000342,
-+ PPC_divd = 0x7C0003D2,
-+ PPC_divdu = 0x7C000392,
-+ PPC_divw = 0x7C0003D6,
-+ PPC_divwu = 0x7C000396,
-+ // POWER9 (ISA 3.0) modulo instructions.
-+ PPC_modsd = 0x7C000612,
-+ PPC_modsw = 0x7C000616,
-+ PPC_modud = 0x7C000212,
-+ PPC_moduw = 0x7C000216,
-+ PPC_extsb = 0x7C000774,
-+ PPC_extsh = 0x7C000734,
-+ PPC_extsw = 0x7C0007B4,
-+ PPC_fabs = 0xFC000210,
-+ PPC_fadd = 0xFC00002A,
-+ PPC_fadds = 0xEC00002A,
-+ PPC_fcpsgn = 0xFC000010,
-+ PPC_fcfid = 0xFC00069C,
-+ PPC_fcfids = 0xEC00069C,
-+ PPC_fcfidu = 0xFC00079C,
-+ PPC_fcfidus = 0xEC00079C,
-+ PPC_fcmpu = 0xFC000000,
-+ PPC_fctid = 0xFC00065C,
-+ PPC_fctidz = 0xFC00065E,
-+ PPC_fctiduz = 0xFC00075E,
-+ PPC_fctiwz = 0xFC00001E,
-+ PPC_fdiv = 0xFC000024,
-+ PPC_fdivs = 0xEC000024,
-+ PPC_fmr = 0xFC000090,
-+ PPC_fmul = 0xFC000032,
-+ PPC_fmuls = 0xEC000032,
-+ PPC_fneg = 0xFC000050,
-+ PPC_frim = 0xFC0003D0,
-+ PPC_frip = 0xFC000390,
-+ PPC_friz = 0xFC000350,
-+ PPC_frsp = 0xFC000018,
-+ PPC_fsub = 0xFC000028,
-+ PPC_fsubs = 0xEC000028,
-+ PPC_fsqrt = 0xFC00002C,
-+ PPC_fsqrts = 0xEC00002C,
-+ PPC_isel = 0x7C00001E,
-+ // POWER10 (ISA 3.1). RT = (CR[BI]==1) ? 1 : 0. XO=384 at bits 21-30.
-+ PPC_setbc = 0x7C000300,
-+ // POWER10 (ISA 3.1). RT = (CR[BI]==0) ? 1 : 0. XO=416.
-+ PPC_setbcr = 0x7C000340,
-+ PPC_lbarx = 0x7C000068,
-+ PPC_lbz = 0x88000000,
-+ PPC_lbzx = 0x7C0000AE,
-+ PPC_ld = 0xE8000000,
-+ PPC_ldarx = 0x7C0000A8,
-+ PPC_ldx = 0x7C00002A,
-+ PPC_lfd = 0xC8000000,
-+ PPC_lfdx = 0x7C0004AE,
-+ PPC_lfiwax = 0x7C0006AE,
-+ PPC_lfiwzx = 0x7C0006EE,
-+ PPC_lfs = 0xC0000000,
-+ PPC_lfsx = 0x7C00042E,
-+ PPC_lha = 0xA8000000,
-+ PPC_lharx = 0x7C0000E8,
-+ PPC_lhax = 0x7C0002AE,
-+ PPC_lhz = 0xA0000000,
-+ PPC_lhzx = 0x7C00022E,
-+ PPC_lwa = 0xE8000002,
-+ PPC_lwarx = 0x7C000028,
-+ PPC_lwz = 0x80000000,
-+ // X-form sign-extending word load (opcode 31, XO=341). Single-insn
-+ // equivalent of lwzx + extsw.
-+ PPC_lwax = 0x7C0002AA,
-+ PPC_lwzx = 0x7C00002E,
-+ PPC_mcrxrx = 0x7C000480,
-+ PPC_mcrfs = 0xFC000080,
-+ PPC_mfocrf = 0x7C100026,
-+ PPC_mffs = 0xFC00048E,
-+ PPC_mfspr = 0x7C0002A6,
-+ PPC_mfvsrd = 0x7C000066,
-+ PPC_mtcrf = 0x7C000120,
-+ PPC_mtfsb0 = 0xFC00008C,
-+ PPC_mtvsrd = 0x7C000166,
-+ // POWER8+ (ISA 2.07). VSR[XT].dw[0] = sign_ext_64(RA[32:63]).
-+ // XO=211 at bits 21-30. Combines extsw + mtvsrd into one insn.
-+ PPC_mtvsrwa = 0x7C0001A6,
-+ PPC_mtvsrws = 0x7C000326,
-+ PPC_mtvsrwz = 0x7C0001E6,
-+ PPC_mtspr = 0x7C0003A6,
-+ PPC_mulhd = 0x7C000092,
-+ PPC_mulhdu = 0x7C000012,
-+ PPC_mulhwu = 0x7C000016,
-+ PPC_mulli = 0x1C000000,
-+ PPC_mulld = 0x7C0001D2,
-+ PPC_mulldo = 0x7C0005D2,
-+ PPC_mullw = 0x7C0001D6,
-+ PPC_neg = 0x7C0000D0,
-+ PPC_nor = 0x7C0000F8,
-+ PPC_or_ = 0x7C000378,
-+ PPC_ori = 0x60000000,
-+ PPC_oris = 0x64000000,
-+ PPC_popcntb = 0x7C0000F4,
-+ PPC_popcntd = 0x7C0003F4,
-+ PPC_popcntw = 0x7C0002F4,
-+ PPC_brd = 0x7C000176, // POWER10: byte-reverse doubleword (X-form, XO=187)
-+ PPC_brh = 0x7C0001B6, // POWER10: byte-reverse each halfword (X-form, XO=219)
-+ PPC_brw = 0x7C000136, // POWER10: byte-reverse each word (X-form, XO=155)
-+ PPC_rldcl = 0x78000010,
-+ PPC_rldicl = 0x78000000,
-+ PPC_rldcr = 0x78000012,
-+ PPC_rldicr = 0x78000004,
-+ PPC_rldimi = 0x7800000C,
-+ PPC_rlwimi = 0x50000000,
-+ PPC_rlwinm = 0x54000000,
-+ PPC_rlwnm = 0x5C000000,
-+ PPC_sld = 0x7C000036,
-+ PPC_slw = 0x7C000030,
-+ PPC_srad = 0x7C000634,
-+ PPC_sradi = 0x7C000674,
-+ PPC_sraw = 0x7C000630,
-+ PPC_srawi = 0x7C000670,
-+ PPC_srd = 0x7C000436,
-+ PPC_srw = 0x7C000430,
-+ PPC_stb = 0x98000000,
-+ PPC_stbcx = 0x7C00056D,
-+ PPC_stbx = 0x7C0001AE,
-+ PPC_std = 0xF8000000,
-+ PPC_stdcx = 0x7C0001AD,
-+ PPC_stdu = 0xF8000001,
-+ PPC_stdx = 0x7C00012A,
-+ PPC_stfd = 0xD8000000,
-+ PPC_stfdu = 0xDC000000,
-+ PPC_stfdx = 0x7C0005AE,
-+ PPC_stfs = 0xD0000000,
-+ PPC_stfsu = 0xD4000000,
-+ PPC_stfsx = 0x7C00052E,
-+ PPC_sth = 0xB0000000,
-+ PPC_sthcx = 0x7C0005AD,
-+ PPC_sthx = 0x7C00032E,
-+ PPC_stw = 0x90000000,
-+ PPC_stwx = 0x7C00012E,
-+ PPC_stwbrx = 0x7C00052C,
-+ PPC_stwcx = 0x7C00012D,
-+ PPC_subf = 0x7C000050,
-+ PPC_subfc = 0x7C000010,
-+ PPC_subfe = 0x7C000110,
-+ PPC_subfic = 0x20000000,
-+ PPC_sync = 0x7C0004AC,
-+ // isync — execution synchronization. Discards prefetched instructions and
-+ // forces a refetch+reexecute of everything past the barrier; prevents
-+ // speculative bypass. Used for Spectre v1 mitigation in speculationBarrier.
-+ // Encoding: bytes `2c 01 00 4c` (LE) = 0x4C00012C.
-+ PPC_isync = 0x4C00012C,
-+ PPC_trap = 0x7FE00008,
-+ PPC_tw = 0x7C000008,
-+ PPC_xor_ = 0x7C000278,
-+ PPC_xori = 0x68000000,
-+ PPC_xoris = 0x6C000000,
-+ // VMX register load/store (X-form, opcode 31, XO=103/231).
-+ // Operate on raw VR0-31 (the lvx/stvx mnemonics predate VSX, so the
-+ // assembler exposes them with a uint8_t VR index rather than via the
-+ // VSR-namespace FloatRegister overloads used for lxvx/stxvx.)
-+ PPC_lvx = 0x7C0000CE,
-+ PPC_lxvd2x = 0x7C000698,
-+ PPC_lxvx = 0x7C000218,
-+ PPC_mfvsrld = 0x7C000266,
-+ PPC_mtvsrdd = 0x7C000366,
-+ PPC_stvx = 0x7C0001CE,
-+ PPC_stxvd2x = 0x7C000798,
-+ PPC_stxvx = 0x7C000318,
-+ PPC_vaddubm = 0x10000000,
-+ PPC_vavgub = 0x10000402,
-+ PPC_vavguh = 0x10000442,
-+ PPC_vcmpequb = 0x10000006,
-+ PPC_vcmpequh = 0x10000046,
-+ PPC_vcmpequw = 0x10000086,
-+ PPC_vcmpequd = 0x100000C7,
-+ PPC_vcmpgtsb = 0x10000306,
-+ PPC_vcmpgtsh = 0x10000346,
-+ PPC_vcmpgtsw = 0x10000386,
-+ PPC_vcmpgtsd = 0x100003C7,
-+ PPC_vcmpgtub = 0x10000206,
-+ PPC_vcmpgtuh = 0x10000246,
-+ PPC_vcmpgtuw = 0x10000286,
-+ PPC_vcmpgtud = 0x100002C7,
-+ PPC_vcmpneb = 0x10000007, // POWER9 (ISA 3.0)
-+ PPC_vcmpneh = 0x10000047, // POWER9
-+ PPC_vcmpnew = 0x10000087, // POWER9
-+ PPC_vadduhm = 0x10000040,
-+ PPC_vadduwm = 0x10000080,
-+ PPC_vaddudm = 0x100000C0,
-+ PPC_vaddubs = 0x10000200,
-+ PPC_vadduhs = 0x10000240,
-+ PPC_vaddsbs = 0x10000300,
-+ PPC_vaddshs = 0x10000340,
-+ PPC_vmaxsb = 0x10000102,
-+ PPC_vmaxsh = 0x10000142,
-+ PPC_vmaxsw = 0x10000182,
-+ PPC_vmaxsd = 0x100001C2,
-+ PPC_vmaxub = 0x10000002,
-+ PPC_vmaxuh = 0x10000042,
-+ PPC_vmaxuw = 0x10000082,
-+ PPC_vmhraddshs = 0x10000021,
-+ PPC_vmrghb = 0x1000000C,
-+ PPC_vmrghh = 0x1000004C,
-+ PPC_vmrghw = 0x1000008C,
-+ PPC_vmrglb = 0x1000010C,
-+ PPC_vmrglh = 0x1000014C,
-+ PPC_vmrglw = 0x1000018C,
-+ PPC_vminsb = 0x10000302,
-+ PPC_vminsh = 0x10000342,
-+ PPC_vminsw = 0x10000382,
-+ PPC_vminub = 0x10000202,
-+ PPC_vminuh = 0x10000242,
-+ PPC_vminuw = 0x10000282,
-+ // POWER9 (ISA 3.0) per-lane integer negate. VRA field carries the subop
-+ // code: 6 for vnegw, 7 for vnegd. Base XO is 0x602.
-+ PPC_vnegw = 0x10060602,
-+ PPC_vnegd = 0x10070602,
-+ PPC_vmladduhm = 0x10000022,
-+ PPC_vmuluwm = 0x10000089,
-+ PPC_vmulld = 0x100001C9, // POWER10 (XO=457, vector i64x2 multiply low)
-+ PPC_vmulesb = 0x10000308,
-+ PPC_vmuleub = 0x10000208,
-+ PPC_vmulesh = 0x10000348,
-+ PPC_vmuleuh = 0x10000248,
-+ PPC_vmulesw = 0x10000388,
-+ PPC_vmuleuw = 0x10000288,
-+ PPC_vmulosb = 0x10000108,
-+ PPC_vmuloub = 0x10000008,
-+ PPC_vmulosh = 0x10000148,
-+ PPC_vmulouh = 0x10000048,
-+ PPC_vmulosw = 0x10000188,
-+ PPC_vmulouw = 0x10000088,
-+ PPC_vmsumshm = 0x10000028,
-+ PPC_vmsumuhm = 0x10000026,
-+ PPC_vperm = 0x1000002B,
-+ // VX-form, opcode 4, XO=0x54C. Per-byte bit-permute of a 128-bit value;
-+ // result 16-bit bitmap lands in dw0 low 16 bits, recoverable via mfvsrd.
-+ // Available on POWER8+ (ISA 2.07).
-+ PPC_vbpermq = 0x1000054C,
-+ // POWER10 (ISA 3.1) Vector Extract Mask. VX-form, opcode 4, XO=0x642,
-+ // with UIM at bits 11..15 selecting lane width: 8=byte, 9=halfword,
-+ // 10=word, 11=doubleword. RT is a GPR (low N bits = wasm bitmask).
-+ PPC_vextractbm = 0x10080642,
-+ PPC_vextracthm = 0x10090642,
-+ PPC_vextractwm = 0x100A0642,
-+ PPC_vextractdm = 0x100B0642,
-+ // POWER10 vector insert from GPR at immediate byte offset:
-+ // vinsw VRT, RB, UIM VRT[UIM*8:UIM*8+31] ← RB[32:63]
-+ // vinsd VRT, RB, UIM VRT[UIM*8:UIM*8+63] ← RB[0:63]
-+ // VX-form, opcode 4. RB at bits 16..20, UIM at bits 11..15.
-+ PPC_vinsw = 0x100000CF, // POWER10 (XO=207)
-+ PPC_vinsd = 0x100001CF, // POWER10 (XO=463)
-+ // POWER10 vector insert byte/halfword from GPR with register-supplied
-+ // (right-indexed = LE-natural) byte position:
-+ // vinsbrx VRT, RA, RB VRT.byte[RA & 0xF] ← RB & 0xFF
-+ // vinshrx VRT, RA, RB VRT.hword[(RA & 0xE)/2] ← RB & 0xFFFF
-+ // VX-form, opcode 4. RA at bits 16..20, RB at bits 11..15.
-+ PPC_vinsbrx = 0x1000030F, // POWER10 (XO=783)
-+ PPC_vinshrx = 0x1000034F, // POWER10 (XO=847)
-+ // POWER9 (ISA 3.0) vector insert byte/halfword from VR at immediate
-+ // byte position:
-+ // vinsertb VRT, VRB, UIM VRT.byte[UIM] ← VRB.byte[7] (BE)
-+ // vinserth VRT, VRB, UIM VRT.hword[UIM..+1] ← VRB.byte[6..7] (BE)
-+ // V-form, opcode 4. VRB at bits 11..15, UIM at bits 16..20. Simd128
-+ // lives in VSR32-63 (= VR0-31), so the V-form VRT field addresses our
-+ // Simd128 storage via `encoding() & 31`.
-+ PPC_vinsertb = 0x1000030D, // POWER9 (XO=781)
-+ PPC_vinserth = 0x1000034D, // POWER9 (XO=845)
-+ PPC_vextractub = 0x1000020D, // POWER9 (XO=525)
-+ PPC_vextractuh = 0x1000024D, // POWER9 (XO=589)
-+ PPC_vspltisb = 0x1000030C, // POWER7+ (XO=780, splat 5-bit SIMM to all 16 byte lanes)
-+ PPC_vspltish = 0x1000034C, // POWER7+ (XO=844, splat 5-bit SIMM to all 8 i16 lanes)
-+ PPC_vspltisw = 0x1000038C, // POWER7+ (XO=908, splat 5-bit SIMM to all 4 i32 lanes)
-+ PPC_vpopcntb = 0x10000703,
-+ PPC_vslb = 0x10000104,
-+ PPC_vsld = 0x100005C4,
-+ PPC_vsldoi = 0x1000002C,
-+ PPC_vslh = 0x10000144,
-+ PPC_vslo = 0x1000040C,
-+ PPC_vslw = 0x10000184,
-+ PPC_vspltb = 0x1000020C,
-+ PPC_vsplth = 0x1000024C,
-+ PPC_vsrab = 0x10000304,
-+ PPC_vsrad = 0x100003C4,
-+ PPC_vsrah = 0x10000344,
-+ PPC_vsraw = 0x10000384,
-+ PPC_vsrb = 0x10000204,
-+ PPC_vsrd = 0x100006C4,
-+ PPC_vsrh = 0x10000244,
-+ PPC_vsro = 0x1000044C,
-+ PPC_vsrw = 0x10000284,
-+ PPC_vpkshss = 0x1000018E,
-+ PPC_vpkshus = 0x1000010E,
-+ PPC_vpkswss = 0x100001CE,
-+ PPC_vpkswus = 0x1000014E,
-+ PPC_vupkhsb = 0x1000020E,
-+ PPC_vupkhsh = 0x1000024E,
-+ PPC_vupkhsw = 0x1000064E,
-+ PPC_vupklsb = 0x1000028E,
-+ PPC_vupklsh = 0x100002CE,
-+ PPC_vupklsw = 0x100006CE,
-+ PPC_vsububm = 0x10000400,
-+ PPC_vsubuhm = 0x10000440,
-+ PPC_vsubuwm = 0x10000480,
-+ PPC_vsubudm = 0x100004C0,
-+ PPC_vsububs = 0x10000600,
-+ PPC_vsubuhs = 0x10000640,
-+ PPC_vsubsbs = 0x10000700,
-+ PPC_vsubshs = 0x10000740,
-+ PPC_xscvdpspn = 0xF000042C,
-+ PPC_xscvspdpn = 0xF000052C,
-+ // POWER9 (ISA 3.0) scalar FP16 conversions, XX2-form. The UIM
-+ // disambiguator is baked into the constant (xscvdphp=17, xscvhpdp=16).
-+ // Encodings cross-checked against binutils with `.machine power9`.
-+ PPC_xscvdphp = 0xF011056C,
-+ PPC_xscvhpdp = 0xF010056C,
-+ // POWER9 (ISA 3.0) scalar VSX extract biased exponent, XX2-form.
-+ // XT.dword[0] = (zero || biased_exp_11bit), XT.dword[1] = 0. XO=347
-+ // (shares XO with xscv{dp,hp}{hp,dp} — disambiguated by bits 16-20=0).
-+ // Encoding cross-checked against binutils with `.machine power9`.
-+ PPC_xsxexpdp = 0xF000056C,
-+ // POWER9 (ISA 3.0) scalar FP16 load/store, X-form (opcode 31).
-+ // lxsihzx zero-extends; stxsihx writes 16 bits from VSR dword 0
-+ // word 1's low halfword.
-+ PPC_lxsihzx = 0x7C00065A,
-+ PPC_stxsihx = 0x7C00075A,
-+ // POWER9 scalar VSX max/min with Java/JavaScript semantics — handles
-+ // ±0 and NaN identically to Math.max/Math.min in ECMA-262 (covers
-+ // 19 corner cases against the JS shell).
-+ // XX3-form, primary opcode 60, XO=144 (max) / XO=152 (min).
-+ PPC_xsmaxjdp = 0xF0000480,
-+ PPC_xsminjdp = 0xF00004C0,
-+ PPC_xxbrd = 0xF017076C,
-+ PPC_xvabsdp = 0xF0000764,
-+ PPC_xvabssp = 0xF0000664,
-+ PPC_xvadddp = 0xF0000300,
-+ PPC_xvaddsp = 0xF0000200,
-+ PPC_xvcmpeqdp = 0xF0000318,
-+ PPC_xvcmpeqsp = 0xF0000218,
-+ PPC_xvcmpgedp = 0xF0000398,
-+ PPC_xvcmpgesp = 0xF0000298,
-+ PPC_xvcmpgtdp = 0xF0000358,
-+ PPC_xvcmpgtsp = 0xF0000258,
-+ PPC_xvcvdpsp = 0xF0000624,
-+ PPC_xvcvdpsxws = 0xF0000360,
-+ PPC_xvcvdpuxws = 0xF0000320,
-+ PPC_xvcvspdp = 0xF0000724,
-+ PPC_xvcvspsxws = 0xF0000260,
-+ PPC_xvcvspuxws = 0xF0000220,
-+ PPC_xvcvsxwdp = 0xF00003E0,
-+ PPC_xvcvsxwsp = 0xF00002E0,
-+ PPC_xvcvuxwdp = 0xF00003A0,
-+ PPC_xvcvuxwsp = 0xF00002A0,
-+ PPC_xvdivdp = 0xF00003C0,
-+ PPC_xvdivsp = 0xF00002C0,
-+ PPC_xvmaddadp = 0xF0000308,
-+ PPC_xvmaddasp = 0xF0000208,
-+ PPC_xvmaxdp = 0xF0000700,
-+ PPC_xvmaxsp = 0xF0000600,
-+ PPC_xvmindp = 0xF0000740,
-+ PPC_xvminsp = 0xF0000640,
-+ PPC_xvmuldp = 0xF0000380,
-+ PPC_xvmulsp = 0xF0000280,
-+ PPC_xvnegdp = 0xF00007E4,
-+ PPC_xvnmsubadp = 0xF0000788,
-+ PPC_xvnmsubasp = 0xF0000688,
-+ PPC_xvnegsp = 0xF00006E4,
-+ PPC_xvrdpic = 0xF00003AC,
-+ PPC_xvrdpim = 0xF00003E4,
-+ PPC_xvrdpip = 0xF00003A4,
-+ PPC_xvrdpiz = 0xF0000364,
-+ PPC_xvrspic = 0xF00002AC,
-+ PPC_xvrspim = 0xF00002E4,
-+ PPC_xvrspip = 0xF00002A4,
-+ PPC_xvrspiz = 0xF0000264,
-+ PPC_xvsqrtdp = 0xF000032C,
-+ PPC_xvsqrtsp = 0xF000022C,
-+ PPC_xvsubdp = 0xF0000340,
-+ PPC_xvsubsp = 0xF0000240,
-+ PPC_xxextractuw = 0xF0000294,
-+ PPC_xxinsertw = 0xF00002D4,
-+ PPC_xxland = 0xF0000410,
-+ PPC_xxlandc = 0xF0000450,
-+ PPC_xxlnor = 0xF0000510,
-+ PPC_xxlor = 0xF0000490,
-+ PPC_xxlxor = 0xF00004D0,
-+ PPC_xxpermdi = 0xF0000050,
-+ PPC_xxsel = 0xF0000030,
-+ PPC_xxspltib = 0xF00002D0, // POWER9 (ISA 3.0): XX1-form, no Rc
-+ PPC_xxspltw = 0xF0000290,
-+
-+ // Simplified mnemonics.
-+ PPC_mr = PPC_or_,
-+ PPC_not = PPC_nor,
-+ PPC_nop = PPC_ori,
-+ PPC_lwsync = PPC_sync | (1 << 21),
-+
-+ PPC_MAJOR_OPCODE_MASK = 0xFC000000
-+};
-+
-+static const uint32_t NopInst = (uint32_t)PPC_nop;
-+static const uint32_t PPC_STANZA_LENGTH = 16;
-+
-+class Instruction;
-+class InstReg;
-+class InstImm;
-+class BOffImm16;
-+class JOffImm26;
-+
-+// PPC64 base instruction type: a single 32-bit word.
-+class Instruction {
-+ protected:
-+ uint32_t data;
-+
-+ public:
-+ explicit Instruction(uint32_t data_) : data(data_) {}
-+ explicit Instruction(PPCOpcodes op) : data((uint32_t)op) {}
-+
-+ uint32_t encode() const { return data; }
-+
-+ void makeNop() { data = NopInst; }
-+ void makeOp_mtctr(Register r) {
-+ data = PPC_mtspr | ((uint32_t)r.code()) << 21 | PPC_SPR(9);
-+ }
-+ void makeOp_bctr(uint32_t linkBit = 0) { data = PPC_bctr | linkBit; }
-+
-+ void setData(uint32_t data) { this->data = data; }
-+
-+ const Instruction& operator=(const Instruction& src) {
-+ data = src.data;
-+ return *this;
-+ }
-+
-+ uint32_t extractBit(uint32_t bit) const { return (encode() >> bit) & 1; }
-+ uint32_t extractBitField(uint32_t hi, uint32_t lo) const {
-+ return (encode() >> lo) & ((2 << (hi - lo)) - 1);
-+ }
-+
-+ uint32_t extractOpcode() const { return data & PPC_MAJOR_OPCODE_MASK; }
-+ bool isOpcode(uint32_t op) const {
-+ return extractOpcode() == (op & PPC_MAJOR_OPCODE_MASK);
-+ }
-+
-+ uint32_t extractRT() const {
-+ return extractBitField(RTShift + RTBits - 1, RTShift);
-+ }
-+ uint32_t extractRA() const {
-+ return extractBitField(RAShift + RABits - 1, RAShift);
-+ }
-+ uint32_t extractRB() const {
-+ return extractBitField(RBShift + RBBits - 1, RBShift);
-+ }
-+ uint32_t extractImm16() const { return data & Imm16Mask; }
-+
-+ Instruction* next() { return this + 1; }
-+
-+ const uint32_t* raw() const { return &data; }
-+ uint32_t size() const { return 4; }
-+};
-+
-+static_assert(sizeof(Instruction) == 4);
-+
-+class InstNOP : public Instruction {
-+ public:
-+ InstNOP() : Instruction(NopInst) {}
-+};
-+
-+// Register-register-register instruction (X-form and XO-form).
-+class InstReg : public Instruction {
-+ public:
-+ explicit InstReg(PPCOpcodes op) : Instruction(op) {}
-+ InstReg(PPCOpcodes op, Register rt, Register ra, Register rb)
-+ : Instruction((uint32_t)op | RT(rt) | RA(ra) | RB(rb)) {}
-+ InstReg(PPCOpcodes op, FloatRegister frt, FloatRegister fra,
-+ FloatRegister frb)
-+ : Instruction((uint32_t)op | RT(frt) | RA(fra) | RB(frb)) {}
-+
-+ void setRT(Register r) { data = (data & ~RTMask) | RT(r); }
-+ void setRA(Register r) { data = (data & ~RAMask) | RA(r); }
-+ void setRB(Register r) { data = (data & ~RBMask) | RB(r); }
-+
-+ void setImm16(uint32_t imm) {
-+ data = (data & 0xFFFF0000) | (imm & Imm16Mask);
-+ }
-+ uint32_t extractImm16Value() const { return data & Imm16Mask; }
-+};
-+
-+// Register-immediate instruction (D-form).
-+// Bits 21-25 hold RT (loads, addi) or RS (stores, ori). Both encode identically
-+// since RT and RS occupy the same field; the caller simply passes the right
-+// register.
-+class InstImm : public Instruction {
-+ public:
-+ explicit InstImm(PPCOpcodes op) : Instruction(op) {}
-+ InstImm(PPCOpcodes op, Register rt, Register ra, uint32_t imm16)
-+ : Instruction((uint32_t)op | RT(rt) | RA(ra) | (imm16 & Imm16Mask)) {}
-+
-+ void setRT(Register r) { data = (data & ~RTMask) | RT(r); }
-+ void setRA(Register r) { data = (data & ~RAMask) | RA(r); }
-+
-+ void setImm16(uint32_t imm) {
-+ data = (data & 0xFFFF0000) | (imm & Imm16Mask);
-+ }
-+ void setLowerReg(Register rl) {
-+ data = (data & 0xFFE0FFFF) | ((uint32_t)rl.code() << 16);
-+ }
-+ uint32_t extractImm16Value() const { return data & Imm16Mask; }
-+
-+ // Extract the TrapTag from a tagged trap instruction (tw).
-+ // Defined in Assembler-ppc64.cpp. Returns a TrapTag value as uint8_t
-+ // because Assembler::TrapTag is not yet defined at this point in the header.
-+ uint8_t traptag();
-+};
-+
-+// A BOffImm16 is a 16-bit signed branch offset for conditional branches
-+// (bc-form instructions). The offset is stored in bits 2..15 and is
-+// 4-byte aligned, giving a range of +/-32 KB.
-+class BOffImm16 {
-+ int32_t data;
-+
-+ public:
-+ uint32_t encode() const {
-+ MOZ_ASSERT(!isInvalid());
-+ return static_cast<uint32_t>(data) & 0xFFFC;
-+ }
-+ int32_t decode() const {
-+ MOZ_ASSERT(!isInvalid());
-+ return data;
-+ }
-+
-+ explicit BOffImm16(int offset) : data(offset) {
-+ MOZ_ASSERT((offset & 0x3) == 0);
-+ MOZ_ASSERT(IsInRange(offset));
-+ }
-+ static bool IsInRange(int offset) {
-+ return offset >= -32768 && offset <= 32764;
-+ }
-+
-+ static const int32_t INVALID = 0x00020000;
-+ BOffImm16() : data(INVALID) {}
-+
-+ bool isInvalid() const { return data == INVALID; }
-+
-+ Instruction* getDest(Instruction* src) const;
-+
-+ explicit BOffImm16(InstImm inst);
-+};
-+
-+// A JOffImm26 is a 26-bit signed branch offset for unconditional branches
-+// (b/bl instructions). Bits 2..25 encode the offset, 4-byte aligned,
-+// giving a range of +/-32 MB.
-+class JOffImm26 {
-+ int32_t data;
-+
-+ public:
-+ uint32_t encode() const {
-+ MOZ_ASSERT(!isInvalid());
-+ return static_cast<uint32_t>(data) & 0x03FFFFFC;
-+ }
-+ int32_t decode() const {
-+ MOZ_ASSERT(!isInvalid());
-+ return data;
-+ }
-+
-+ explicit JOffImm26(int offset) : data(offset) {
-+ MOZ_ASSERT((offset & 0x3) == 0);
-+ MOZ_ASSERT(IsInRange(offset));
-+ }
-+ static bool IsInRange(int offset) {
-+ return offset >= -33554432 && offset <= 33554428;
-+ }
-+
-+ static const int32_t INVALID = 0x20000000;
-+ JOffImm26() : data(INVALID) {}
-+
-+ bool isInvalid() const { return data == INVALID; }
-+
-+ Instruction* getDest(Instruction* src) const;
-+};
-+
-+// A 16-bit immediate value used in D-form instructions.
-+class Imm16 {
-+ int32_t value;
-+
-+ public:
-+ Imm16();
-+ explicit Imm16(uint32_t imm) : value(imm) {}
-+ uint32_t encode() const { return static_cast<uint32_t>(value) & 0xffff; }
-+ int32_t decodeSigned() const { return value; }
-+ uint32_t decodeUnsigned() const { return value; }
-+ static bool IsInSignedRange(int32_t imm) {
-+ return imm >= INT16_MIN && imm <= INT16_MAX;
-+ }
-+ static bool IsInUnsignedRange(uint32_t imm) { return imm <= UINT16_MAX; }
-+ static Imm16 Lower(Imm32 imm) { return Imm16(imm.value & 0xffff); }
-+ static Imm16 Upper(Imm32 imm) { return Imm16((imm.value >> 16) & 0xffff); }
-+};
-+
-+class Imm8 {
-+ uint8_t value;
-+
-+ public:
-+ Imm8();
-+ explicit Imm8(uint32_t imm) : value(imm) {}
-+ uint32_t encode(uint32_t shift) const { return value << shift; }
-+ int32_t decodeSigned() const { return value; }
-+ uint32_t decodeUnsigned() const { return value; }
-+ static bool IsInSignedRange(int32_t imm) {
-+ return imm >= INT8_MIN && imm <= INT8_MAX;
-+ }
-+ static bool IsInUnsignedRange(uint32_t imm) { return imm <= UINT8_MAX; }
-+ static Imm8 Lower(Imm16 imm) { return Imm8(imm.decodeSigned() & 0xff); }
-+ static Imm8 Upper(Imm16 imm) {
-+ return Imm8((imm.decodeSigned() >> 8) & 0xff);
-+ }
-+};
-+
-+class Operand {
-+ public:
-+ enum Tag { REG, FREG, MEM };
-+
-+ private:
-+ Tag tag : 3;
-+ uint32_t reg : 5;
-+ int32_t offset;
-+
-+ public:
-+ MOZ_IMPLICIT Operand(Register reg_) : tag(REG), reg(reg_.code()) {}
-+
-+ explicit Operand(FloatRegister freg) : tag(FREG), reg(freg.code()) {}
-+
-+ Operand(Register base, Imm32 off)
-+ : tag(MEM), reg(base.code()), offset(off.value) {}
-+
-+ Operand(Register base, int32_t off)
-+ : tag(MEM), reg(base.code()), offset(off) {}
-+
-+ explicit Operand(const Address& addr)
-+ : tag(MEM), reg(addr.base.code()), offset(addr.offset) {}
-+
-+ Tag getTag() const { return tag; }
-+
-+ Register toReg() const {
-+ MOZ_ASSERT(tag == REG);
-+ return Register::FromCode(reg);
-+ }
-+
-+ FloatRegister toFReg() const {
-+ MOZ_ASSERT(tag == FREG);
-+ return FloatRegister::FromCode(reg);
-+ }
-+
-+ void toAddr(Register* r, Imm32* dest) const {
-+ MOZ_ASSERT(tag == MEM);
-+ *r = Register::FromCode(reg);
-+ *dest = Imm32(offset);
-+ }
-+ Address toAddress() const {
-+ MOZ_ASSERT(tag == MEM);
-+ return Address(Register::FromCode(reg), offset);
-+ }
-+ int32_t disp() const {
-+ MOZ_ASSERT(tag == MEM);
-+ return offset;
-+ }
-+
-+ int32_t base() const {
-+ MOZ_ASSERT(tag == MEM);
-+ return reg;
-+ }
-+ Register baseReg() const {
-+ MOZ_ASSERT(tag == MEM);
-+ return Register::FromCode(reg);
-+ }
-+};
-+
-+// Bug 2034064 collapsed the per-buffer compile-time configuration of
-+// AssemblerBufferWithConstantPools into AssemblerBufferSettings, and reduced
-+// the runtime ctor to (poolMaxOffset, nopFill). instBufferAlign and the
-+// NumShortBranchRanges template arg were dropped: PPC64 previously passed
-+// instBufferAlign=8 (unused on this backend; pool entries are 4-byte aligned)
-+// and NumShortBranchRanges=0.
-+using PPCBuffer = js::jit::AssemblerBufferWithConstantPools<
-+ Instruction, Assembler,
-+ js::jit::AssemblerBufferSettings{
-+ .instSize = 4,
-+ .guardSize = 1,
-+ .headerSize = 1,
-+ .pcBias = 0,
-+ .alignFillInst = NopInst,
-+ .nopFillInst = NopInst,
-+ }>;
-+
-+// Inherits executableCopy() and appendRawCode() from
-+// AssemblerBufferWithConstantPools, which assert pool is flushed.
-+class PPCBufferWithExecutableCopy : public PPCBuffer {
-+ public:
-+ PPCBufferWithExecutableCopy(size_t poolMaxOffset, unsigned nopFill)
-+ : PPCBuffer(poolMaxOffset, nopFill) {}
-+};
-+
-+class Assembler : public AssemblerShared {
-+ public:
-+ // Trap tags encoded in the low bits of a trap word.
-+ // FreeBSD and others may use r1 in their trap word, so bit 0 is avoided.
-+ enum TrapTag {
-+ BTag = 2,
-+ BCTag = 4,
-+ CallTag = 6,
-+ DebugTag0 = 10,
-+ DebugTag1 = 12,
-+ DebugTag2 = 14
-+ };
-+
-+ // Pool load types encoded in bits 21-22 of pool hint words.
-+ // Used by InsertIndexIntoTag / PatchConstantPoolLoad.
-+ enum PoolLoadType {
-+ PoolLoadFPR64 = 1, // lfd fD, offset(rBase)
-+ PoolLoadSimd128 = 2, // addi rBase, rBase, offset; lxvx vsD, 0, rBase
-+ PoolLoadFPR32 = 3 // lfs fD, offset(rBase) — auto-expands to double
-+ };
-+
-+ enum BranchBits {
-+ BranchOnClear = 0x04,
-+ BranchOnSet = 0x0c,
-+ BranchOptionMask = 0x0f,
-+ BranchOptionInvert = 0x08
-+ };
-+
-+ // PPC condition encoding. The top nybble is the offset to the CR field
-+ // (the x in BIF*4+x), and the bottom is the BO field.
-+ // Synthetic flags sit in the MSB and are masked off before use.
-+ enum Condition {
-+ ConditionUnsigned = 0x100,
-+ ConditionUnsignedHandled = 0x2ff,
-+ ConditionZero = 0x400,
-+ ConditionOnlyXER = 0x200,
-+ ConditionXERCA = 0x23c,
-+ ConditionXERNCA = 0x234,
-+ ConditionXEROV = 0x21c,
-+
-+ Equal = 0x2c,
-+ NotEqual = 0x24,
-+ GreaterThan = 0x1c,
-+ GreaterThanOrEqual = 0x04,
-+ LessThan = 0x0c,
-+ LessThanOrEqual = 0x14,
-+
-+ Above = GreaterThan | ConditionUnsigned,
-+ AboveOrEqual = GreaterThanOrEqual | ConditionUnsigned,
-+ Below = LessThan | ConditionUnsigned,
-+ BelowOrEqual = LessThanOrEqual | ConditionUnsigned,
-+
-+ Signed = LessThan | ConditionZero,
-+ NotSigned = GreaterThanOrEqual | ConditionZero,
-+ Zero = Equal | ConditionZero,
-+ NonZero = NotEqual | ConditionZero,
-+
-+ Overflow = ConditionXEROV,
-+ NotOverflow = ConditionOnlyXER | LessThanOrEqual,
-+ CarrySet = ConditionXERCA,
-+ CarryClear = ConditionXERNCA,
-+
-+ Always = 0x1f,
-+ SOBit = 0x3c,
-+ NSOBit = 0x34
-+ };
-+
-+ enum DoubleCondition {
-+ DoubleConditionUnordered = 0x100,
-+ DoubleOrdered = 0x34,
-+ DoubleEqual = 0x2c,
-+ DoubleNotEqual = 0x24,
-+ DoubleGreaterThan = 0x1c,
-+ DoubleGreaterThanOrEqual = 0x04,
-+ DoubleLessThan = 0x0c,
-+ DoubleLessThanOrEqual = 0x14,
-+ DoubleUnordered = 0x3c,
-+ DoubleEqualOrUnordered = DoubleEqual | DoubleConditionUnordered,
-+ DoubleNotEqualOrUnordered = DoubleNotEqual | DoubleConditionUnordered,
-+ DoubleGreaterThanOrUnordered = DoubleGreaterThan | DoubleConditionUnordered,
-+ DoubleGreaterThanOrEqualOrUnordered =
-+ DoubleGreaterThanOrEqual | DoubleConditionUnordered,
-+ DoubleLessThanOrUnordered = DoubleLessThan | DoubleConditionUnordered,
-+ DoubleLessThanOrEqualOrUnordered =
-+ DoubleLessThanOrEqual | DoubleConditionUnordered,
-+ };
-+
-+ enum JumpOrCall { BranchIsJump, BranchIsCall };
-+
-+ enum LinkBit {
-+ DontLinkB = 0,
-+ LinkB = 1,
-+ };
-+
-+ enum LikelyBit {
-+ NotLikelyB = 0,
-+ LikelyB = 1,
-+ };
-+
-+ enum BranchAddressType {
-+ RelativeBranch = 0,
-+ AbsoluteBranch = 2,
-+ };
-+
-+ enum FloatFormat { SingleFloat, DoubleFloat };
-+ enum FloatTestKind { TestForTrue, TestForFalse };
-+
-+ BufferOffset nextOffset() { return m_buffer.nextOffset(); }
-+
-+ protected:
-+ Instruction* editSrc(BufferOffset bo) {
-+ if (!bo.assigned()) {
-+ // Under OOM, writeInst may return an unassigned BufferOffset.
-+ // Return a dummy writable area so callers (WriteLoad64Instructions)
-+ // can proceed harmlessly; the compilation will be discarded.
-+ static uint32_t oomDummy_[8];
-+ return (Instruction*)oomDummy_;
-+ }
-+ return m_buffer.getInst(bo);
-+ }
-+
-+ struct RelativePatch {
-+ BufferOffset offset;
-+ void* target;
-+ RelocationKind kind;
-+
-+ RelativePatch(BufferOffset offset, void* target, RelocationKind kind)
-+ : offset(offset), target(target), kind(kind) {}
-+ };
-+
-+ js::Vector<RelativePatch, 8, SystemAllocPolicy> jumps_;
-+
-+ CompactBufferWriter jumpRelocations_;
-+ CompactBufferWriter dataRelocations_;
-+
-+ PPCBufferWithExecutableCopy m_buffer;
-+
-+#ifdef JS_JITSPEW
-+ Sprinter* printer;
-+#endif
-+
-+ public:
-+ // Which absolute bit number does a CR + Condition pair refer to?
-+ static uint8_t crBit(CRegisterID cr, Condition cond) {
-+ return (cr << 2) + ((cond & 0xf0) >> 4);
-+ }
-+ static uint8_t crBit(CRegisterID cr, DoubleCondition cond) {
-+ return (cr << 2) + ((cond & 0xf0) >> 4);
-+ }
-+
-+ Assembler()
-+ : m_buffer(/* poolMaxOffset */ 8192, /* nopFill */ 0),
-+#ifdef JS_JITSPEW
-+ printer(nullptr),
-+#endif
-+ isFinished(false),
-+ scratch_register_list_((1 << Registers::r11) | (1 << Registers::r12)) {
-+ }
-+
-+ void setUnlimitedBuffer() { m_buffer.setUnlimited(); }
-+
-+ // Constant pool callbacks required by AssemblerBufferWithConstantPools.
-+ static void InsertIndexIntoTag(uint8_t* load, uint32_t index);
-+ static bool PatchConstantPoolLoad(void* loadAddr, void* constPoolAddr);
-+ static void WritePoolGuard(BufferOffset branch, Instruction* inst,
-+ BufferOffset dest);
-+ static void WritePoolHeader(uint8_t* start, js::jit::Pool* p, bool isNatural);
-+ static void PatchShortRangeBranchToVeneer(PPCBuffer*, unsigned rangeIdx,
-+ BufferOffset deadline,
-+ BufferOffset veneer);
-+
-+ static Condition InvertCondition(Condition cond);
-+ static DoubleCondition InvertCondition(DoubleCondition cond);
-+
-+ void writeRelocation(BufferOffset src) {
-+ jumpRelocations_.writeUnsigned(src.getOffset());
-+ }
-+
-+ void writeDataRelocation(ImmGCPtr ptr) {
-+ if (ptr.value) {
-+ if (gc::IsInsideNursery(ptr.value)) {
-+ embedsNurseryPointers_ = true;
-+ }
-+ dataRelocations_.writeUnsigned(nextOffset().getOffset());
-+ }
-+ }
-+ void writeDataRelocation(BufferOffset bo, ImmGCPtr ptr) {
-+ if (ptr.value) {
-+ if (gc::IsInsideNursery(ptr.value)) {
-+ embedsNurseryPointers_ = true;
-+ }
-+ dataRelocations_.writeUnsigned(bo.getOffset());
-+ }
-+ }
-+
-+ void assertNoGCThings() const {
-+#ifdef DEBUG
-+ MOZ_ASSERT(dataRelocations_.length() == 0);
-+ for (auto& j : jumps_) {
-+ MOZ_ASSERT(j.kind == RelocationKind::HARDCODED);
-+ }
-+#endif
-+ }
-+
-+ bool oom() const;
-+
-+ void setPrinter(Sprinter* sp) {
-+#ifdef JS_JITSPEW
-+ printer = sp;
-+#endif
-+ }
-+
-+#ifdef JS_JITSPEW
-+ inline void spew(const char* fmt, ...) MOZ_FORMAT_PRINTF(2, 3) {
-+ if (MOZ_UNLIKELY(printer || JitSpewEnabled(JitSpew_Codegen))) {
-+ va_list va;
-+ va_start(va, fmt);
-+ spewVA(fmt, va);
-+ va_end(va);
-+ }
-+ }
-+ MOZ_COLD void spewVA(const char* fmt, va_list va) MOZ_FORMAT_PRINTF(2, 0) {
-+ char buf[200];
-+ int i = VsprintfLiteral(buf, fmt, va);
-+ if (i > -1) {
-+ if (printer) {
-+ printer->printf("%s\n", buf);
-+ }
-+ js::jit::JitSpew(js::jit::JitSpew_Codegen, "%s", buf);
-+ }
-+ }
-+#else
-+ MOZ_ALWAYS_INLINE void spew(const char* fmt, ...) MOZ_FORMAT_PRINTF(2, 3) {}
-+#endif
-+
-+ Register getStackPointer() const { return StackPointer; }
-+
-+ protected:
-+ bool isFinished;
-+
-+ public:
-+ static uintptr_t GetPointer(uint8_t*);
-+ void flush() {
-+ MOZ_ASSERT(!isFinished);
-+ m_buffer.flushPool();
-+ }
-+ // Inhibit pool flushes for the next maxInst instructions. Mirrors the
-+ // ARM/ARM64 wrappers; lets shared code (e.g. WasmFrameIter epilogues
-+ // that need static byte distances between currentOffset() captures)
-+ // fence a small instruction window without reaching into m_buffer.
-+ void enterNoPool(size_t maxInst) { m_buffer.enterNoPool(maxInst); }
-+ void leaveNoPool() { m_buffer.leaveNoPool(); }
-+ void finish();
-+ bool appendRawCode(const uint8_t* code, size_t numBytes);
-+ bool reserve(size_t size);
-+ bool swapBuffer(wasm::Bytes& bytes);
-+ void executableCopy(void* buffer);
-+ void copyJumpRelocationTable(uint8_t* dest);
-+ void copyDataRelocationTable(uint8_t* dest);
-+
-+ size_t size() const;
-+ size_t jumpRelocationTableBytes() const;
-+ size_t dataRelocationTableBytes() const;
-+ size_t bytesNeeded() const;
-+
-+ BufferOffset writeInst(uint32_t x, uint32_t* dest = nullptr);
-+ static void WriteInstStatic(uint32_t x, uint32_t* dest);
-+
-+ public:
-+ BufferOffset haltingAlign(int alignment);
-+ BufferOffset nopAlign(int alignment);
-+ BufferOffset as_nop();
-+
-+ // --- Instruction emission (declarations only, implemented in later commits)
-+
-+ // Branch instructions.
-+ uint16_t computeConditionCode(Condition op, CRegisterID cr = cr0);
-+ uint16_t computeConditionCode(DoubleCondition cond, CRegisterID cr = cr0);
-+ BufferOffset as_b(JOffImm26 off, BranchAddressType bat = RelativeBranch,
-+ LinkBit lb = DontLinkB);
-+ BufferOffset as_b(int32_t off, BranchAddressType bat = RelativeBranch,
-+ LinkBit lb = DontLinkB);
-+ BufferOffset as_blr(LinkBit lb = DontLinkB);
-+ BufferOffset as_bctr(LinkBit lb = DontLinkB);
-+ BufferOffset as_bc(BOffImm16 off, Condition cond, CRegisterID cr = cr0,
-+ LikelyBit lkb = NotLikelyB, LinkBit lb = DontLinkB);
-+ BufferOffset as_bc(int16_t off, Condition cond, CRegisterID cr = cr0,
-+ LikelyBit lkb = NotLikelyB, LinkBit lb = DontLinkB);
-+ BufferOffset as_bc(BOffImm16 off, DoubleCondition cond, CRegisterID cr = cr0,
-+ LikelyBit lkb = NotLikelyB, LinkBit lb = DontLinkB);
-+ BufferOffset as_bc(int16_t off, DoubleCondition cond, CRegisterID cr = cr0,
-+ LikelyBit lkb = NotLikelyB, LinkBit lb = DontLinkB);
-+ BufferOffset as_bcctr(Condition cond, CRegisterID cr = cr0,
-+ LikelyBit lkb = NotLikelyB, LinkBit lb = DontLinkB);
-+ BufferOffset as_bcctr(DoubleCondition cond, CRegisterID cr = cr0,
-+ LikelyBit lkb = NotLikelyB, LinkBit lb = DontLinkB);
-+ BufferOffset as_bc(int16_t off, uint16_t op, LikelyBit lkb = NotLikelyB,
-+ LinkBit lb = DontLinkB);
-+ BufferOffset as_bcctr(uint16_t op, LikelyBit lkb = NotLikelyB,
-+ LinkBit lb = DontLinkB);
-+
-+ // SPR operations.
-+ BufferOffset as_mtspr(SPRegisterID spr, Register ra);
-+ BufferOffset as_mfspr(Register rd, SPRegisterID spr);
-+
-+ // CR operations.
-+ BufferOffset as_crand(uint8_t t, uint8_t a, uint8_t b);
-+ BufferOffset as_crandc(uint8_t t, uint8_t a, uint8_t b);
-+ BufferOffset as_cror(uint8_t t, uint8_t a, uint8_t b);
-+ BufferOffset as_crorc(uint8_t t, uint8_t a, uint8_t b);
-+ BufferOffset as_crxor(uint8_t t, uint8_t a, uint8_t b);
-+ BufferOffset as_mtcrf(uint32_t mask, Register rs);
-+ BufferOffset as_mfocrf(Register rd, CRegisterID crfs);
-+ BufferOffset as_mcrxrx(CRegisterID crt);
-+
-+ // Compare instructions.
-+ BufferOffset as_cmpd(CRegisterID cr, Register ra, Register rb);
-+ BufferOffset as_cmpdi(CRegisterID cr, Register ra, int16_t im);
-+ BufferOffset as_cmpld(CRegisterID cr, Register ra, Register rb);
-+ BufferOffset as_cmpldi(CRegisterID cr, Register ra, int16_t im);
-+ BufferOffset as_cmpw(CRegisterID cr, Register ra, Register rb);
-+ BufferOffset as_cmpwi(CRegisterID cr, Register ra, int16_t im);
-+ BufferOffset as_cmplw(CRegisterID cr, Register ra, Register rb);
-+ BufferOffset as_cmplwi(CRegisterID cr, Register ra, int16_t im);
-+ BufferOffset as_cmpd(Register ra, Register rb);
-+ BufferOffset as_cmpdi(Register ra, int16_t im);
-+ BufferOffset as_cmpld(Register ra, Register rb);
-+ BufferOffset as_cmpldi(Register ra, int16_t im);
-+ BufferOffset as_cmpw(Register ra, Register rb);
-+ BufferOffset as_cmpwi(Register ra, int16_t im);
-+ BufferOffset as_cmplw(Register ra, Register rb);
-+ BufferOffset as_cmplwi(Register ra, int16_t im);
-+
-+ // ALU (three-register).
-+ BufferOffset as_add(Register rd, Register ra, Register rb);
-+ BufferOffset as_addc(Register rd, Register ra, Register rb);
-+ BufferOffset as_adde(Register rd, Register ra, Register rb);
-+ BufferOffset as_subf(Register rd, Register ra, Register rb);
-+ BufferOffset as_subfc(Register rd, Register ra, Register rb);
-+ BufferOffset as_subfe(Register rd, Register ra, Register rb);
-+ BufferOffset as_neg(Register rd, Register rs);
-+
-+ BufferOffset as_mulld(Register rd, Register ra, Register rb);
-+ BufferOffset as_mulhd(Register rd, Register ra, Register rb);
-+ BufferOffset as_mulhdu(Register rd, Register ra, Register rb);
-+ BufferOffset as_mulldo(Register rd, Register ra, Register rb);
-+ BufferOffset as_mullw(Register rd, Register ra, Register rb);
-+ BufferOffset as_mulhwu(Register rd, Register ra, Register rb);
-+
-+ BufferOffset as_divd(Register rd, Register ra, Register rb);
-+ BufferOffset as_divdu(Register rd, Register ra, Register rb);
-+ BufferOffset as_divw(Register rd, Register ra, Register rb);
-+ BufferOffset as_divwu(Register rd, Register ra, Register rb);
-+ // POWER9 modulo.
-+ BufferOffset as_modsd(Register rd, Register ra, Register rb);
-+ BufferOffset as_modsw(Register rd, Register ra, Register rb);
-+ BufferOffset as_modud(Register rd, Register ra, Register rb);
-+ BufferOffset as_moduw(Register rd, Register ra, Register rb);
-+
-+ // ALU immediate.
-+ BufferOffset as_addi(Register rd, Register ra, int16_t im,
-+ bool actually_li = false);
-+ BufferOffset as_addis(Register rd, Register ra, int16_t im,
-+ bool actually_lis = false);
-+ BufferOffset as_mulli(Register rd, Register ra, int16_t im);
-+ BufferOffset as_subfic(Register rd, Register ra, int16_t im);
-+
-+ // ALU unary/extended.
-+ BufferOffset as_cntlzw(Register rd, Register ra);
-+ BufferOffset as_cntlzd(Register rd, Register ra);
-+ BufferOffset as_cnttzd(Register rd, Register ra);
-+ BufferOffset as_cnttzw(Register rd, Register ra);
-+ BufferOffset as_popcntd(Register ra, Register rs);
-+ BufferOffset as_popcntw(Register ra, Register rs);
-+ // POWER10 byte-reverse doubleword: ra = bswap64(rs). 1 insn replacing the
-+ // POWER9 mtvsrd / xxbrd / mfvsrd round-trip in byteSwap64.
-+ BufferOffset as_brd(Register ra, Register rs);
-+ // POWER10 byte-reverse each halfword (4 halfwords) / each word (2 words)
-+ // in the 64-bit doubleword. The wasm/asm caller usually masks or
-+ // sign-extends the low halfword/word afterwards.
-+ BufferOffset as_brh(Register ra, Register rs);
-+ BufferOffset as_brw(Register ra, Register rs);
-+
-+ // Bit operations (logical, three-register).
-+ BufferOffset as_and_(Register rd, Register rs, Register rb);
-+ BufferOffset as_and__rc(Register rd, Register rs, Register rb);
-+ BufferOffset as_nor(Register rd, Register rs, Register rb);
-+ BufferOffset as_or_(Register rd, Register rs, Register rb);
-+ BufferOffset as_xor_(Register rd, Register rs, Register rb);
-+ BufferOffset as_slw(Register rd, Register rs, Register rb);
-+ BufferOffset as_srw(Register rd, Register rs, Register rb);
-+ BufferOffset as_sraw(Register rd, Register rs, Register rb);
-+ BufferOffset as_sld(Register rd, Register rs, Register rb);
-+ BufferOffset as_srd(Register rd, Register rs, Register rb);
-+ BufferOffset as_srad(Register rd, Register rs, Register rb);
-+
-+ // Bit operations (logical, immediate).
-+ BufferOffset as_ori(Register rd, Register ra, uint16_t im);
-+ BufferOffset as_oris(Register rd, Register ra, uint16_t im);
-+ BufferOffset as_xori(Register rd, Register ra, uint16_t im);
-+ BufferOffset as_xoris(Register rd, Register ra, uint16_t im);
-+ BufferOffset as_andi_rc(Register rd, Register ra, uint16_t im);
-+
-+ // Sign extension.
-+ BufferOffset as_extsb(Register rd, Register rs);
-+ BufferOffset as_extsh(Register rd, Register rs);
-+ BufferOffset as_extsw(Register rd, Register rs);
-+ BufferOffset as_extsw_rc(Register rd, Register rs);
-+
-+ // Shift/rotate with immediates.
-+ BufferOffset as_srawi(Register id, Register rs, uint8_t n);
-+ BufferOffset as_sradi(Register rd, Register rs, int n);
-+ BufferOffset as_rldcl(Register ra, Register rs, Register rb, uint8_t mb);
-+ BufferOffset as_rldicl(Register ra, Register rs, uint8_t sh, uint8_t mb);
-+ BufferOffset as_rldicl_rc(Register ra, Register rs, uint8_t sh, uint8_t mb);
-+ BufferOffset as_rldicr(Register ra, Register rs, uint8_t sh, uint8_t mb);
-+ BufferOffset as_rldicr_rc(Register ra, Register rs, uint8_t sh, uint8_t mb);
-+ BufferOffset as_rlwinm(Register rd, Register rs, uint8_t sh, uint8_t mb,
-+ uint8_t me);
-+ BufferOffset as_rlwinm_rc(Register rd, Register rs, uint8_t sh, uint8_t mb,
-+ uint8_t me);
-+ BufferOffset as_rlwimi(Register rd, Register rs, uint8_t sh, uint8_t mb,
-+ uint8_t me);
-+ BufferOffset as_rldimi(Register rd, Register rs, uint8_t sh, uint8_t mb);
-+ BufferOffset as_rlwnm(Register rd, Register rs, Register rb, uint8_t mb,
-+ uint8_t me);
-+
-+ // Integer loads (D-form).
-+ BufferOffset as_lbz(Register rd, Register rb, int16_t off);
-+ BufferOffset as_lha(Register rd, Register rb, int16_t off);
-+ BufferOffset as_lhz(Register rd, Register rb, int16_t off);
-+ BufferOffset as_lwa(Register rd, Register rb, int16_t off);
-+ BufferOffset as_lwz(Register rd, Register rb, int16_t off);
-+ BufferOffset as_ld(Register rd, Register rb, int16_t off);
-+
-+ // Integer stores (D-form).
-+ BufferOffset as_stb(Register rd, Register rb, int16_t off);
-+ BufferOffset as_sth(Register rd, Register rb, int16_t off);
-+ BufferOffset as_stw(Register rd, Register rb, int16_t off);
-+ BufferOffset as_std(Register rd, Register rb, int16_t off);
-+ BufferOffset as_stdu(Register rd, Register rb, int16_t off);
-+
-+ // Integer loads/stores (X-form, indexed).
-+ BufferOffset as_lbzx(Register rd, Register ra, Register rb);
-+ BufferOffset as_lhax(Register rd, Register ra, Register rb);
-+ BufferOffset as_lhzx(Register rd, Register ra, Register rb);
-+ BufferOffset as_lwzx(Register rd, Register ra, Register rb);
-+ // X-form sign-extending word load. Single-insn equivalent of lwzx + extsw.
-+ BufferOffset as_lwax(Register rd, Register ra, Register rb);
-+ BufferOffset as_lwarx(Register rd, Register ra, Register rb);
-+ BufferOffset as_lbarx(Register rd, Register ra, Register rb);
-+ BufferOffset as_lharx(Register rd, Register ra, Register rb);
-+ BufferOffset as_ldx(Register rd, Register ra, Register rb);
-+ BufferOffset as_ldarx(Register rd, Register ra, Register rb);
-+ BufferOffset as_stbx(Register rd, Register ra, Register rb);
-+ BufferOffset as_stbcx(Register rd, Register ra, Register rb);
-+ BufferOffset as_stwx(Register rd, Register ra, Register rb);
-+ BufferOffset as_stwbrx(Register rd, Register ra, Register rb);
-+ BufferOffset as_sthx(Register rd, Register ra, Register rb);
-+ BufferOffset as_sthcx(Register rd, Register ra, Register rb);
-+ BufferOffset as_stdx(Register rd, Register ra, Register rb);
-+ BufferOffset as_stdcx(Register rd, Register ra, Register rb);
-+ BufferOffset as_stwcx(Register rd, Register ra, Register rb);
-+
-+ // Integer select.
-+ // POWER10 (ISA 3.1). Set RT = 1/0 based on a CR bit.
-+ BufferOffset as_setbc(Register rt, uint16_t bc, CRegisterID cr);
-+ BufferOffset as_setbcr(Register rt, uint16_t bc, CRegisterID cr);
-+ BufferOffset as_isel(Register rt, Register ra, Register rb, uint16_t rc,
-+ CRegisterID cr = cr0);
-+ BufferOffset as_isel0(Register rt, Register ra, Register rb, uint16_t rc,
-+ CRegisterID cr = cr0);
-+
-+ // FP compare.
-+ BufferOffset as_fcmpu(CRegisterID cr, FloatRegister ra, FloatRegister rb);
-+ BufferOffset as_fcmpu(FloatRegister ra, FloatRegister rb);
-+
-+ // FP arithmetic (two-source).
-+ BufferOffset as_fadd(FloatRegister rd, FloatRegister ra, FloatRegister rc);
-+ BufferOffset as_fadds(FloatRegister rd, FloatRegister ra, FloatRegister rc);
-+ BufferOffset as_fsub(FloatRegister rd, FloatRegister ra, FloatRegister rc);
-+ BufferOffset as_fsubs(FloatRegister rd, FloatRegister ra, FloatRegister rc);
-+ BufferOffset as_fdiv(FloatRegister rd, FloatRegister ra, FloatRegister rc);
-+ BufferOffset as_fdivs(FloatRegister rd, FloatRegister ra, FloatRegister rc);
-+ BufferOffset as_fmul(FloatRegister rd, FloatRegister ra, FloatRegister rc);
-+ BufferOffset as_fmuls(FloatRegister rd, FloatRegister ra, FloatRegister rc);
-+ BufferOffset as_fcpsgn(FloatRegister rd, FloatRegister ra, FloatRegister rc);
-+ // FP unary.
-+ BufferOffset as_fabs(FloatRegister rd, FloatRegister rs);
-+ BufferOffset as_fneg(FloatRegister rd, FloatRegister rs);
-+ BufferOffset as_fmr(FloatRegister rd, FloatRegister rs);
-+ BufferOffset as_fsqrt(FloatRegister rd, FloatRegister rs);
-+ BufferOffset as_fsqrts(FloatRegister rd, FloatRegister rs);
-+ BufferOffset as_frsp(FloatRegister rd, FloatRegister rs);
-+
-+ // FP conversions.
-+ BufferOffset as_fcfid(FloatRegister rd, FloatRegister rs);
-+ BufferOffset as_fcfids(FloatRegister rd, FloatRegister rs);
-+ BufferOffset as_fcfidu(FloatRegister rd, FloatRegister rs);
-+ BufferOffset as_fcfidus(FloatRegister rd, FloatRegister rs);
-+ BufferOffset as_fctid(FloatRegister rd, FloatRegister rs);
-+ BufferOffset as_fctidz(FloatRegister rd, FloatRegister rs);
-+ BufferOffset as_fctiduz(FloatRegister rd, FloatRegister rs);
-+ BufferOffset as_fctiwz(FloatRegister rd, FloatRegister rs);
-+
-+ // FP rounding.
-+ BufferOffset as_frim(FloatRegister rd, FloatRegister rs);
-+ BufferOffset as_frip(FloatRegister rd, FloatRegister rs);
-+ BufferOffset as_friz(FloatRegister rd, FloatRegister rs);
-+
-+ // FP loads (D-form).
-+ BufferOffset as_lfd(FloatRegister rd, Register rb, int16_t off);
-+ BufferOffset as_lfs(FloatRegister rd, Register rb, int16_t off);
-+
-+ // FP stores (D-form).
-+ BufferOffset as_stfd(FloatRegister rd, Register rb, int16_t off);
-+ BufferOffset as_stfs(FloatRegister rd, Register rb, int16_t off);
-+ BufferOffset as_stfdu(FloatRegister rd, Register rb, int16_t off);
-+ BufferOffset as_stfsu(FloatRegister rd, Register rb, int16_t off);
-+
-+ // FP loads/stores (X-form, indexed).
-+ BufferOffset as_lfdx(FloatRegister rd, Register ra, Register rb);
-+ BufferOffset as_lfsx(FloatRegister rd, Register ra, Register rb);
-+ BufferOffset as_lfiwax(FloatRegister rd, Register ra, Register rb);
-+ BufferOffset as_stfdx(FloatRegister rd, Register ra, Register rb);
-+ BufferOffset as_stfsx(FloatRegister rd, Register ra, Register rb);
-+
-+ // FPSCR operations.
-+ BufferOffset as_mtfsb0(uint8_t bt);
-+ BufferOffset as_mcrfs(CRegisterID bf, uint8_t bfa);
-+
-+ // VSX (FPR-only subset).
-+ BufferOffset as_mfvsrd(Register ra, FloatRegister xs);
-+ BufferOffset as_mtvsrd(FloatRegister xs, Register ra);
-+ // POWER8+ (ISA 2.07). Sign-extending move of RA's low 32 bits to FPR.
-+ BufferOffset as_mtvsrwa(FloatRegister xs, Register ra);
-+ BufferOffset as_mtvsrwz(FloatRegister xs, Register ra);
-+ BufferOffset as_mtvsrws(FloatRegister xs, Register ra);
-+ BufferOffset as_xxbrd(FloatRegister xt, FloatRegister xb);
-+ // POWER9 scalar VSX max/min with Java/JavaScript semantics (matches
-+ // ECMA-262 Math.max / Math.min). Operate on FPR-space (encoding 0..31).
-+ BufferOffset as_xsmaxjdp(FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb);
-+ BufferOffset as_xsminjdp(FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb);
-+ BufferOffset as_xscvdpspn(FloatRegister xt, FloatRegister xb);
-+ BufferOffset as_xscvspdpn(FloatRegister xt, FloatRegister xb);
-+ // POWER9 (ISA 3.0) scalar FP16 conversions.
-+ BufferOffset as_xscvdphp(FloatRegister xt, FloatRegister xb);
-+ BufferOffset as_xscvhpdp(FloatRegister xt, FloatRegister xb);
-+ // POWER9 (ISA 3.0) scalar extract biased exponent.
-+ BufferOffset as_xsxexpdp(FloatRegister xt, FloatRegister xb);
-+ // POWER9 (ISA 3.0) scalar FP16 load/store, X-form indexed.
-+ BufferOffset as_lxsihzx(FloatRegister xt, Register ra, Register rb);
-+ BufferOffset as_stxsihx(FloatRegister xs, Register ra, Register rb);
-+
-+ // VSX SIMD load/store (X-form, indexed).
-+ BufferOffset as_lxvx(FloatRegister xt, Register ra, Register rb);
-+ BufferOffset as_stxvx(FloatRegister xs, Register ra, Register rb);
-+ BufferOffset as_lxvd2x(FloatRegister xt, Register ra, Register rb);
-+ BufferOffset as_stxvd2x(FloatRegister xs, Register ra, Register rb);
-+
-+ // VMX SIMD load/store (X-form, indexed). Take a raw VR number (0-31)
-+ // because VR20-VR31 are outside the FloatRegister encoding (which only
-+ // covers VSR0-31 = f0-f31). Used by the JIT trampoline to save/restore
-+ // the ELFv2 callee-saved VR20-VR31. EA is force-aligned to 16 bytes
-+ // (low 4 bits of the address are ignored), so the slot's alignment
-+ // matters for layout but not for trap avoidance.
-+ BufferOffset as_lvx(uint8_t vrt, Register ra, Register rb);
-+ BufferOffset as_stvx(uint8_t vrs, Register ra, Register rb);
-+
-+ // VSX SIMD register operations (XX3-form / XX1-form / XX2-form).
-+ BufferOffset as_xxlor(FloatRegister xt, FloatRegister xa, FloatRegister xb);
-+
-+ // VSX bitwise operations (XX3-form).
-+ BufferOffset as_xxland(FloatRegister xt, FloatRegister xa, FloatRegister xb);
-+ BufferOffset as_xxlxor(FloatRegister xt, FloatRegister xa, FloatRegister xb);
-+ BufferOffset as_xxlnor(FloatRegister xt, FloatRegister xa, FloatRegister xb);
-+ BufferOffset as_xxlandc(FloatRegister xt, FloatRegister xa, FloatRegister xb);
-+ BufferOffset as_xxsel(FloatRegister xt, FloatRegister xa, FloatRegister xb,
-+ FloatRegister xc);
-+
-+ // VMX integer arithmetic (VR0-31 = VSR32-63 only).
-+ // Callers must ensure operands are in VR space.
-+ BufferOffset as_vaddubm(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vadduhm(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vadduwm(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vaddudm(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsububm(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsubuhm(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsubuwm(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsubudm(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vaddsbs(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vaddshs(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vaddubs(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vadduhs(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsubsbs(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsubshs(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsububs(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsubuhs(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vminsb(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vminsh(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vminsw(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmaxsb(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmaxsh(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmaxsw(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmaxsd(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vminub(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vminuh(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vminuw(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmaxub(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmaxuh(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmaxuw(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ // POWER9 (ISA 3.0): per-lane integer negate.
-+ BufferOffset as_vnegw(uint8_t vrt, uint8_t vrb);
-+ BufferOffset as_vnegd(uint8_t vrt, uint8_t vrb);
-+ // POWER9 (ISA 3.0): addpcis rT, D. Computes rT = (CIA + 4) + (D << 16).
-+ // D is a 16-bit signed immediate; DX-form splits D across three instruction
-+ // fields (d0[16..25] ∥ d1[11..15] ∥ d2[31]). No LR clobber, no RAS hazard.
-+ BufferOffset as_addpcis(Register rt, int16_t d);
-+ // POWER10 (ISA 3.1) prefixed instructions. Each emits 8 bytes (prefix +
-+ // suffix) with a single nop inserted before iff the prefix would
-+ // straddle a 64-byte block. Caller must guarantee HasPOWER10().
-+ // imm34 is signed 34-bit; R=true selects PC-relative form (RA must be r0).
-+ // Returns the offset of the prefix word.
-+ BufferOffset as_paddi(Register rt, Register ra, int64_t imm34, bool R);
-+ BufferOffset as_pld(Register rt, Register ra, int64_t imm34, bool R);
-+ BufferOffset as_plxv(uint8_t xt, Register ra, int64_t imm34, bool R);
-+ // FP-target prefixed loads: plfd/plfs are MLS (Type=2) with suffix
-+ // opcodes 50 and 48. plfs widens single → double in the FPR
-+ // (matches non-prefixed lfs semantics).
-+ BufferOffset as_plfd(FloatRegister frt, Register ra, int64_t imm34,
-+ bool R);
-+ BufferOffset as_plfs(FloatRegister frt, Register ra, int64_t imm34,
-+ bool R);
-+ // Prefixed-store counterparts. Same prefix shape; suffix opcodes are
-+ // the D-form variants of std/stxv/stfd/stfs (61, 27, 54, 52).
-+ BufferOffset as_pstd(Register rs, Register ra, int64_t imm34, bool R);
-+ BufferOffset as_pstxv(uint8_t xs, Register ra, int64_t imm34, bool R);
-+ BufferOffset as_pstfd(FloatRegister frs, Register ra, int64_t imm34,
-+ bool R);
-+ BufferOffset as_pstfs(FloatRegister frs, Register ra, int64_t imm34,
-+ bool R);
-+
-+ private:
-+ // Emit a nop before a prefixed instruction iff the prefix would otherwise
-+ // start at offset 60 (mod 64) and the suffix would land in the next block.
-+ void ensurePrefixedAlignment();
-+
-+ public:
-+ BufferOffset as_vavgub(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vavguh(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmuluwm(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmulld(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ // VMX shift (VR0-31 only).
-+ BufferOffset as_vslb(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vslh(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vslw(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsld(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsrb(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsrh(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsrw(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsrd(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsrab(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsrah(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsraw(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsrad(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vslo(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vsro(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+
-+ // VMX integer compare (VR0-31 only).
-+ BufferOffset as_vcmpequb(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vcmpequh(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vcmpequw(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vcmpequd(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ // Record forms set CR6: LT = all-true, EQ = none-true.
-+ BufferOffset as_vcmpequb_rc(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vcmpequh_rc(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vcmpequw_rc(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vcmpequd_rc(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vcmpgtsb(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vcmpgtsh(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vcmpgtsw(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vcmpgtsd(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vcmpgtub(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vcmpgtuh(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vcmpgtuw(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vcmpgtud(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ // POWER9 (ISA 3.0). NotEqual compare; no doubleword variant.
-+ BufferOffset as_vcmpneb(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vcmpneh(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vcmpnew(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+
-+ // VSX float compare (XX3-form, VSR0-63).
-+ BufferOffset as_xvcmpeqsp(FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb);
-+ BufferOffset as_xvcmpgtsp(FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb);
-+ BufferOffset as_xvcmpgesp(FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb);
-+ BufferOffset as_xvcmpeqdp(FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb);
-+ BufferOffset as_xvcmpgtdp(FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb);
-+ BufferOffset as_xvcmpgedp(FloatRegister xt, FloatRegister xa,
-+ FloatRegister xb);
-+
-+ // VSX float arithmetic (XX3-form binary, XX2-form unary).
-+#define DECL_VSX_BIN(op) \
-+ BufferOffset as_##op(FloatRegister xt, FloatRegister xa, FloatRegister xb);
-+ DECL_VSX_BIN(xvaddsp)
-+ DECL_VSX_BIN(xvadddp) DECL_VSX_BIN(xvsubsp) DECL_VSX_BIN(
-+ xvsubdp) DECL_VSX_BIN(xvmulsp) DECL_VSX_BIN(xvmuldp) DECL_VSX_BIN(xvdivsp)
-+ DECL_VSX_BIN(xvdivdp) DECL_VSX_BIN(xvminsp) DECL_VSX_BIN(
-+ xvmindp) DECL_VSX_BIN(xvmaxsp) DECL_VSX_BIN(xvmaxdp)
-+ DECL_VSX_BIN(xvmaddasp) DECL_VSX_BIN(xvmaddadp) DECL_VSX_BIN(
-+ xvnmsubasp) DECL_VSX_BIN(xvnmsubadp)
-+#undef DECL_VSX_BIN
-+#define DECL_VSX_UN(op) \
-+ BufferOffset as_##op(FloatRegister xt, FloatRegister xb);
-+ DECL_VSX_UN(xvabssp) DECL_VSX_UN(xvabsdp) DECL_VSX_UN(xvnegsp)
-+ DECL_VSX_UN(xvnegdp) DECL_VSX_UN(xvsqrtsp) DECL_VSX_UN(
-+ xvsqrtdp) DECL_VSX_UN(xvrspip) DECL_VSX_UN(xvrdpip)
-+ DECL_VSX_UN(xvrspim) DECL_VSX_UN(xvrdpim) DECL_VSX_UN(
-+ xvrspiz) DECL_VSX_UN(xvrdpiz) DECL_VSX_UN(xvrspic)
-+ DECL_VSX_UN(xvrdpic) DECL_VSX_UN(xvcvsxwsp)
-+ DECL_VSX_UN(xvcvuxwsp) DECL_VSX_UN(xvcvsxwdp)
-+ DECL_VSX_UN(xvcvuxwdp) DECL_VSX_UN(xvcvspsxws)
-+ DECL_VSX_UN(xvcvspuxws)
-+ DECL_VSX_UN(xvcvdpsxws)
-+ DECL_VSX_UN(xvcvdpuxws)
-+ DECL_VSX_UN(xvcvdpsp)
-+ DECL_VSX_UN(xvcvspdp)
-+#undef DECL_VSX_UN
-+
-+ // VMX widen/narrow/merge/pack (VR0-31 only).
-+ BufferOffset as_vupkhsb(uint8_t vrt, uint8_t vrb);
-+ BufferOffset as_vupklsb(uint8_t vrt, uint8_t vrb);
-+ BufferOffset as_vupkhsh(uint8_t vrt, uint8_t vrb);
-+ BufferOffset as_vupklsh(uint8_t vrt, uint8_t vrb);
-+ BufferOffset as_vupkhsw(uint8_t vrt, uint8_t vrb);
-+ BufferOffset as_vupklsw(uint8_t vrt, uint8_t vrb);
-+ BufferOffset as_vpkshss(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vpkswss(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vpkshus(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vpkswus(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmrghb(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmrghh(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmrghw(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmrglb(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmrglh(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmrglw(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+
-+ // VMX extended multiply (VR0-31 only).
-+ BufferOffset as_vmulesb(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmulosb(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmuleub(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmuloub(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmulesh(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmulosh(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmuleuh(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmulouh(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmulesw(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmulosw(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmuleuw(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vmulouw(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ BufferOffset as_vpopcntb(uint8_t vrt, uint8_t vrb);
-+ BufferOffset as_vperm(uint8_t vrt, uint8_t vra, uint8_t vrb, uint8_t vrc);
-+ // POWER8+ (ISA 2.07). VX-form bit-permute. See PPC_vbpermq comment.
-+ BufferOffset as_vbpermq(uint8_t vrt, uint8_t vra, uint8_t vrb);
-+ // POWER10 (ISA 3.1) Vector Extract Mask. RT is a GPR.
-+ BufferOffset as_vextractbm(Register rt, FloatRegister vrb);
-+ BufferOffset as_vextracthm(Register rt, FloatRegister vrb);
-+ BufferOffset as_vextractwm(Register rt, FloatRegister vrb);
-+ BufferOffset as_vextractdm(Register rt, FloatRegister vrb);
-+ // POWER10 (ISA 3.1) Vector Insert from GPR at immediate byte offset.
-+ // UIM range: vinsw 0..12, vinsd 0..8 (caller must enforce).
-+ BufferOffset as_vinsw(FloatRegister vrt, Register rb, uint8_t uim);
-+ BufferOffset as_vinsd(FloatRegister vrt, Register rb, uint8_t uim);
-+ // POWER10 (ISA 3.1) Vector Insert byte / halfword from GPR with the
-+ // byte position supplied by another GPR (RA & 0xF for vinsbrx,
-+ // RA & 0xE for vinshrx). "rx" = right-indexed = LE-natural.
-+ BufferOffset as_vinsbrx(FloatRegister vrt, Register ra, Register rb);
-+ BufferOffset as_vinshrx(FloatRegister vrt, Register ra, Register rb);
-+ // POWER9 (ISA 3.0) Vector Insert byte / halfword from VR at immediate
-+ // byte position. UIM range: vinsertb 0..15, vinserth 0..14
-+ // (caller must enforce; vinserth UIM is in bytes, even-aligned).
-+ BufferOffset as_vinsertb(FloatRegister vrt, FloatRegister vrb, uint8_t uim);
-+ BufferOffset as_vinserth(FloatRegister vrt, FloatRegister vrb, uint8_t uim);
-+ // POWER9 (ISA 3.0) Vector Extract byte / halfword from VR at immediate
-+ // BE byte position. UIM range: vextractub 0..15, vextractuh 0..14
-+ // (caller must enforce; vextractuh UIM is in bytes, even-aligned). The
-+ // extracted byte/halfword lands at BE byte 7 of VRT, with the rest
-+ // zeroed — so a subsequent mfvsrd reads it as the low byte/halfword
-+ // of the GPR with implicit zero-extension.
-+ BufferOffset as_vextractub(FloatRegister vrt, FloatRegister vrb, uint8_t uim);
-+ BufferOffset as_vextractuh(FloatRegister vrt, FloatRegister vrb, uint8_t uim);
-+ // VX-form with 5-bit signed immediate splat: each lane of VRT is
-+ // set to sign_extend(SIMM5) (range [-16, 15]) at byte/halfword/word granularity.
-+ BufferOffset as_vspltisb(uint8_t vrt, int8_t simm5);
-+ BufferOffset as_vspltish(uint8_t vrt, int8_t simm5);
-+ BufferOffset as_vspltisw(uint8_t vrt, int8_t simm5);
-+
-+ // VA-form ternary VMX instructions.
-+ BufferOffset as_vmladduhm(uint8_t vrt, uint8_t vra, uint8_t vrb, uint8_t vrc);
-+ BufferOffset as_vmhraddshs(uint8_t vrt, uint8_t vra, uint8_t vrb,
-+ uint8_t vrc);
-+ BufferOffset as_vmsumshm(uint8_t vrt, uint8_t vra, uint8_t vrb, uint8_t vrc);
-+ BufferOffset as_vmsumuhm(uint8_t vrt, uint8_t vra, uint8_t vrb, uint8_t vrc);
-+ BufferOffset as_xxpermdi(FloatRegister xt, FloatRegister xa, FloatRegister xb,
-+ uint8_t dm);
-+ BufferOffset as_xxspltw(FloatRegister xt, FloatRegister xb, uint8_t uim);
-+ // POWER9 (ISA 3.0). Splat 8-bit immediate to all 16 bytes of an FPR-encoded
-+ // VSR (TX bit forced 0). XX1-form, no Rc.
-+ BufferOffset as_xxspltib(FloatRegister xt, uint8_t imm8);
-+ BufferOffset as_xxinsertw(FloatRegister xt, FloatRegister xb, uint8_t uim);
-+ BufferOffset as_xxextractuw(FloatRegister xt, FloatRegister xb, uint8_t uim);
-+ BufferOffset as_mtvsrdd(FloatRegister xt, Register ra, Register rb);
-+ BufferOffset as_mfvsrld(Register rt, FloatRegister xs);
-+
-+ // VMX vector operations.
-+ BufferOffset as_vspltb(FloatRegister vrt, FloatRegister vrb, uint8_t uim);
-+ BufferOffset as_vsplth(FloatRegister vrt, FloatRegister vrb, uint8_t uim);
-+ BufferOffset as_vsldoi(FloatRegister vrt, FloatRegister vra,
-+ FloatRegister vrb, uint8_t shb);
-+
-+ // Barrier and sync instructions.
-+ BufferOffset as_lwsync();
-+ BufferOffset as_sync();
-+ BufferOffset as_isync();
-+
-+ // Convenience pseudo-instructions.
-+ BufferOffset xs_trap();
-+ BufferOffset xs_trap_tagged(TrapTag tag);
-+ BufferOffset xs_mr(Register rd, Register ra);
-+ BufferOffset xs_mtctr(Register ra);
-+ BufferOffset xs_mtlr(Register ra);
-+ BufferOffset xs_mflr(Register rd);
-+ BufferOffset xs_mtcr(Register rs);
-+ BufferOffset xs_mfxer(Register ra);
-+ BufferOffset xs_mtxer(Register ra);
-+ BufferOffset xs_li(Register rd, int16_t im);
-+ BufferOffset xs_lis(Register rd, int16_t im);
-+ BufferOffset x_subi(Register rd, Register ra, int16_t im);
-+ BufferOffset x_not(Register rd, Register ra);
-+ BufferOffset x_slwi(Register rd, Register rs, int n);
-+ BufferOffset x_sldi(Register rd, Register rs, int n);
-+ BufferOffset x_srwi(Register rd, Register rs, int n);
-+ BufferOffset x_srdi(Register rd, Register rs, int n);
-+ BufferOffset x_insertbits0_15(Register rd, Register rs);
-+ BufferOffset x_bit_value(Register rd, Register rs, unsigned bit);
-+ BufferOffset x_sr_mulli(Register rd, Register ra, int16_t im);
-+
-+ // --- Label operations.
-+ void bind(Label* label) { bind(label, nextOffset()); }
-+ void bind(Label* label, BufferOffset boff);
-+ void bind(InstImm* inst, uintptr_t branch, uintptr_t target);
-+ void bind(CodeLabel* label) { label->target()->bind(currentOffset()); }
-+ uint32_t currentOffset() { return nextOffset().getOffset(); }
-+ void retarget(Label* label, Label* target);
-+ void call(Label* label);
-+ void call(void* target);
-+
-+ void as_break(uint32_t code);
-+
-+ // --- Static capability queries.
-+ static bool SupportsFloatingPoint() { return true; }
-+ static bool SupportsWasmSimd() { return true; }
-+ static bool SupportsUnalignedAccesses() { return true; }
-+ static bool SupportsFastUnalignedFPAccesses() { return true; }
-+ // POWER9 has scalar FP16 hardware (xscvdphp/xscvhpdp); POWER8 doesn't.
-+ // Runtime-gate like x86's SupportsFloat32To16 (which keys off F16C).
-+ static bool SupportsFloat64To16() { return HasPOWER9(); }
-+ static bool SupportsFloat32To16() { return HasPOWER9(); }
-+ static bool HasRoundInstruction(RoundingMode mode) {
-+ // PPC64 has friz (trunc), frip (ceil), frim (floor), which are all correct.
-+ // frin (round-to-nearest) does NOT implement proper IEEE banker's rounding
-+ // (ties to even), so NearestTiesToEven is not supported.
-+ return mode == RoundingMode::TowardsZero || mode == RoundingMode::Up ||
-+ mode == RoundingMode::Down;
-+ }
-+
-+ protected:
-+ InstImm invertBranch(InstImm branch, BOffImm16 skipOffset);
-+ void addPendingJump(BufferOffset src, ImmPtr target, RelocationKind kind) {
-+ enoughMemory_ &= jumps_.append(RelativePatch(src, target.value, kind));
-+ if (kind == RelocationKind::JITCODE) {
-+ writeRelocation(src);
-+ }
-+ }
-+ void addLongJump(BufferOffset src, BufferOffset dst) {
-+ CodeLabel cl;
-+ cl.patchAt()->bind(src.getOffset());
-+ cl.target()->bind(dst.getOffset());
-+ cl.setLinkMode(CodeLabel::JumpImmediate);
-+ addCodeLabel(std::move(cl));
-+ }
-+
-+ public:
-+ void flushBuffer() { m_buffer.flushPool(); }
-+ void comment(const char* msg) { spew("; %s", msg); }
-+ static uint32_t NopSize() { return 4; }
-+
-+ // --- Static patching API.
-+ static uint64_t ExtractLoad64Value(Instruction* inst0);
-+ static void UpdateLoad64Value(Instruction* inst0, uint64_t value);
-+ static void WriteLoad64Instructions(Instruction* inst0, Register reg,
-+ uint64_t value);
-+
-+ static void PatchWrite_Imm32(CodeLocationLabel label, Imm32 imm);
-+ static uint8_t* NextInstruction(uint8_t* instruction,
-+ uint32_t* count = nullptr);
-+ static void ToggleToJmp(CodeLocationLabel inst_);
-+ static void ToggleToCmp(CodeLocationLabel inst_);
-+
-+ void verifyHeapAccessDisassembly(uint32_t begin, uint32_t end,
-+ const Disassembler::HeapAccess& ha) {}
-+
-+ // --- Public patching API (required by shared code).
-+ static void Bind(uint8_t* rawCode, const CodeLabel& label);
-+ void processCodeLabels(uint8_t* rawCode);
-+
-+ static void TraceJumpRelocations(JSTracer* trc, JitCode* code,
-+ CompactBufferReader& reader);
-+ static void TraceDataRelocations(JSTracer* trc, JitCode* code,
-+ CompactBufferReader& reader);
-+
-+ void executableCopy(uint8_t* buffer);
-+
-+ static uint32_t PatchWrite_NearCallSize();
-+ static void PatchWrite_NearCall(CodeLocationLabel start,
-+ CodeLocationLabel toCall);
-+ static void PatchDataWithValueCheck(CodeLocationLabel label, ImmPtr newValue,
-+ ImmPtr expectedValue);
-+ static void PatchDataWithValueCheck(CodeLocationLabel label,
-+ PatchedImmPtr newValue,
-+ PatchedImmPtr expectedValue);
-+ static void ToggleCall(CodeLocationLabel inst_, bool enabled);
-+
-+ private:
-+ GeneralRegisterSet scratch_register_list_;
-+
-+ public:
-+ GeneralRegisterSet* GetScratchRegisterList() {
-+ return &scratch_register_list_;
-+ }
-+}; // Assembler
-+
-+inline bool IsUnaligned(const wasm::MemoryAccessDesc& access) {
-+ if (!access.align()) {
-+ return false;
-+ }
-+ return access.align() < access.byteSize();
-+}
-+
-+} // namespace jit
-+} // namespace js
-+
-+// Whether an Imm32 fits in an unsigned 16-bit immediate.
-+#define PPC_IMM_OK_U(x) (MOZ_LIKELY(((x).value & 0xffff0000) == 0))
-+
-+// Whether an Imm32 fits in a signed 16-bit immediate.
-+#define PPC_IMM_OK_S(x) \
-+ (MOZ_LIKELY(((x).value & 0xffff8000) == 0 || \
-+ ((x).value & 0xffff8000) == 0xffff8000))
-+
-+// Whether the offset part of an Address fits in a signed 16-bit immediate.
-+#define PPC_OFFS_OK(x) \
-+ (MOZ_LIKELY(((x).offset & 0xffff8000) == 0 || \
-+ ((x).offset & 0xffff8000) == 0xffff8000))
-+
-+// Same test but checking a bit ahead (for paired loads).
-+#define PPC_OFFS_INCR_OK(x, incr) \
-+ (MOZ_LIKELY((((x).offset + (incr)) & 0xffff8000) == 0 || \
-+ (((x).offset + (incr)) & 0xffff8000) == 0xffff8000))
-+
-+#endif /* jit_ppc64_Assembler_ppc64_h */
-diff --git a/js/src/jit/ppc64/CodeGenerator-ppc64.cpp b/js/src/jit/ppc64/CodeGenerator-ppc64.cpp
-new file mode 100644
-index 000000000000..0a436fb1201a
---- /dev/null
-+++ b/js/src/jit/ppc64/CodeGenerator-ppc64.cpp
-@@ -0,0 +1,3647 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#include "jit/ppc64/CodeGenerator-ppc64.h"
-+
-+#include "mozilla/MathAlgorithms.h"
-+
-+#include <bit>
-+
-+#include "builtin/Number.h"
-+#include "jit/CodeGenerator.h"
-+#include "jit/InlineScriptTree.h"
-+#include "jit/JitRuntime.h"
-+#include "jit/MIR-wasm.h"
-+#include "jit/MIR.h"
-+#include "jit/MIRGraph.h"
-+#include "vm/JSContext.h"
-+#include "vm/Realm.h"
-+#include "vm/Shape.h"
-+
-+#include "jit/shared/CodeGenerator-shared-inl.h"
-+#include "vm/JSScript-inl.h"
-+
-+using namespace js;
-+using namespace js::jit;
-+
-+using JS::GenericNaN;
-+using mozilla::NegativeInfinity;
-+
-+namespace js {
-+namespace jit {
-+
-+CodeGeneratorPPC64::CodeGeneratorPPC64(MIRGenerator* gen, LIRGraph* graph,
-+ MacroAssembler* masm,
-+ const wasm::CodeMetadata* codeMeta)
-+ : CodeGeneratorShared(gen, graph, masm, codeMeta) {}
-+
-+Operand CodeGeneratorPPC64::ToOperand(const LAllocation& a) {
-+ if (a.isGeneralReg()) {
-+ return Operand(a.toGeneralReg()->reg());
-+ }
-+ if (a.isFloatReg()) {
-+ return Operand(a.toFloatReg()->reg());
-+ }
-+ return Operand(ToAddress(a));
-+}
-+
-+Operand CodeGeneratorPPC64::ToOperand(const LAllocation* a) {
-+ return ToOperand(*a);
-+}
-+
-+MoveOperand CodeGeneratorPPC64::toMoveOperand(LAllocation a) const {
-+ if (a.isGeneralReg()) {
-+ return MoveOperand(ToRegister(a));
-+ }
-+ if (a.isFloatReg()) {
-+ return MoveOperand(ToFloatRegister(a));
-+ }
-+ MoveOperand::Kind kind = a.isStackArea() ? MoveOperand::Kind::EffectiveAddress
-+ : MoveOperand::Kind::Memory;
-+ Address address = ToAddress(a);
-+ MOZ_ASSERT((address.offset & 3) == 0);
-+ return MoveOperand(address, kind);
-+}
-+
-+void CodeGeneratorPPC64::bailoutFrom(Label* label, LSnapshot* snapshot) {
-+ MOZ_ASSERT_IF(!masm.oom(), label->used());
-+ MOZ_ASSERT_IF(!masm.oom(), !label->bound());
-+
-+ encode(snapshot);
-+
-+ InlineScriptTree* tree = snapshot->mir()->block()->trackedTree();
-+ auto* ool = new (alloc()) LambdaOutOfLineCode([=, this](OutOfLineCode& ool) {
-+ // Push snapshotOffset and make sure stack is aligned.
-+ masm.subPtr(Imm32(sizeof(Value)), StackPointer);
-+ masm.storePtr(ImmWord(snapshot->snapshotOffset()),
-+ Address(StackPointer, 0));
-+ masm.jump(&deoptLabel_);
-+ });
-+ addOutOfLineCode(ool,
-+ new (alloc()) BytecodeSite(tree, tree->script()->code()));
-+
-+ masm.retarget(label, ool->entry());
-+}
-+
-+void CodeGeneratorPPC64::bailout(LSnapshot* snapshot) {
-+ Label label;
-+ masm.jump(&label);
-+ bailoutFrom(&label, snapshot);
-+}
-+
-+void CodeGeneratorPPC64::bailoutIfFalseBool(Register lhs, LSnapshot* snapshot) {
-+ Label bail;
-+ masm.branchTest32(Assembler::Zero, lhs, Imm32(0xFF), &bail);
-+ bailoutFrom(&bail, snapshot);
-+}
-+
-+bool CodeGeneratorPPC64::generateOutOfLineCode() {
-+ if (!CodeGeneratorShared::generateOutOfLineCode()) {
-+ return false;
-+ }
-+
-+ if (deoptLabel_.used()) {
-+ masm.bind(&deoptLabel_);
-+
-+ // Frame size is stored in LR and pushed by GenerateBailoutThunk
-+ // (via PushBailoutFrame -> pushReturnAddress -> mflr).
-+ {
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.movePtr(ImmWord(frameSize()), scratch);
-+ masm.xs_mtlr(scratch);
-+ }
-+
-+ TrampolinePtr handler = gen->jitRuntime()->getGenericBailoutHandler();
-+ masm.jump(handler);
-+ }
-+
-+ return !masm.oom();
-+}
-+
-+void CodeGeneratorPPC64::branchToBlock(MBasicBlock* block) {
-+ Label* label = skipTrivialBlocks(block)->lir()->label();
-+ masm.jump(label);
-+}
-+
-+void CodeGeneratorPPC64::branchToBlock(Assembler::DoubleCondition cond,
-+ FloatRegister lhs, FloatRegister rhs,
-+ MBasicBlock* mir) {
-+ Label* label = skipTrivialBlocks(mir)->lir()->label();
-+ masm.branchDouble(cond, lhs, rhs, label);
-+}
-+
-+void CodeGeneratorPPC64::branchToBlock(Assembler::FloatFormat fmt,
-+ Assembler::DoubleCondition cond,
-+ FloatRegister lhs, FloatRegister rhs,
-+ MBasicBlock* mir) {
-+ Label* label = skipTrivialBlocks(mir)->lir()->label();
-+ if (fmt == Assembler::DoubleFloat) {
-+ masm.branchDouble(cond, lhs, rhs, label);
-+ } else {
-+ masm.branchFloat(cond, lhs, rhs, label);
-+ }
-+}
-+
-+class OutOfLineTableSwitch : public OutOfLineCodeBase<CodeGeneratorPPC64> {
-+ MTableSwitch* mir_;
-+ CodeLabel jumpLabel_;
-+
-+ void accept(CodeGeneratorPPC64* codegen) {
-+ codegen->visitOutOfLineTableSwitch(this);
-+ }
-+
-+ public:
-+ explicit OutOfLineTableSwitch(MTableSwitch* mir) : mir_(mir) {}
-+
-+ MTableSwitch* mir() const { return mir_; }
-+ CodeLabel* jumpLabel() { return &jumpLabel_; }
-+};
-+
-+void CodeGeneratorPPC64::emitTableSwitchDispatch(MTableSwitch* mir,
-+ Register index,
-+ Register base) {
-+ Label* defaultcase = skipTrivialBlocks(mir->getDefault())->lir()->label();
-+
-+ if (mir->low() != 0) {
-+ masm.subPtr(Imm32(mir->low()), index);
-+ }
-+
-+ int32_t cases = mir->numCases();
-+ masm.branchPtr(Assembler::AboveOrEqual, index, ImmWord(cases), defaultcase);
-+
-+ OutOfLineTableSwitch* ool = new (alloc()) OutOfLineTableSwitch(mir);
-+ addOutOfLineCode(ool, mir);
-+
-+ masm.mov(ool->jumpLabel(), base);
-+
-+ BaseIndex pointer(base, index, ScalePointer);
-+ masm.branchToComputedAddress(pointer);
-+}
-+
-+void CodeGeneratorPPC64::generateInvalidateEpilogue() {
-+ // Pad with enough nops so that PatchWrite_NearCall on the last OSI point
-+ // cannot overlap the invalidation epilogue. The patch area is
-+ // PatchWrite_NearCallSize (40) bytes; the last OSI point could be right
-+ // before this epilogue.
-+ for (size_t i = 0; i < Assembler::PatchWrite_NearCallSize();
-+ i += Assembler::NopSize()) {
-+ masm.nop();
-+ }
-+
-+ masm.bind(&invalidate_);
-+
-+ // Push the return address (LR) onto the stack.
-+ masm.pushReturnAddress();
-+
-+ invalidateEpilogueData_ = masm.pushWithPatch(ImmWord(uintptr_t(-1)));
-+
-+ TrampolinePtr thunk = gen->jitRuntime()->getInvalidationThunk();
-+ masm.jump(thunk);
-+}
-+
-+void CodeGeneratorPPC64::visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool) {
-+ MTableSwitch* mir = ool->mir();
-+
-+ masm.haltingAlign(sizeof(void*));
-+ masm.bind(ool->jumpLabel());
-+ masm.addCodeLabel(*ool->jumpLabel());
-+
-+ for (size_t i = 0; i < mir->numCases(); i++) {
-+ LBlock* caseblock = skipTrivialBlocks(mir->getCase(i))->lir();
-+ Label* caseheader = caseblock->label();
-+ uint32_t caseoffset = caseheader->offset();
-+
-+ CodeLabel cl;
-+ masm.writeCodePointer(&cl);
-+ cl.target()->bind(caseoffset);
-+ masm.addCodeLabel(cl);
-+ }
-+}
-+
-+void CodeGeneratorPPC64::visitOutOfLineWasmTruncateCheck(
-+ OutOfLineWasmTruncateCheck* ool) {
-+ if (ool->toType() == MIRType::Int32) {
-+ masm.outOfLineWasmTruncateToInt32Check(ool->input(), ool->output(),
-+ ool->fromType(), ool->flags(),
-+ ool->rejoin(), ool->trapSiteDesc());
-+ } else {
-+ MOZ_ASSERT(ool->toType() == MIRType::Int64);
-+ masm.outOfLineWasmTruncateToInt64Check(ool->input(), ool->output64(),
-+ ool->fromType(), ool->flags(),
-+ ool->rejoin(), ool->trapSiteDesc());
-+ }
-+}
-+
-+void CodeGeneratorPPC64::emitBigIntPtrDiv(LBigIntPtrDiv* ins, Register dividend,
-+ Register divisor, Register output) {
-+ masm.as_divd(output, dividend, divisor);
-+}
-+
-+void CodeGeneratorPPC64::emitBigIntPtrMod(LBigIntPtrMod* ins, Register dividend,
-+ Register divisor, Register output) {
-+ if (HasPOWER9()) {
-+ masm.as_modsd(output, dividend, divisor);
-+ } else {
-+ masm.as_divd(output, dividend, divisor);
-+ masm.as_mulld(output, output, divisor);
-+ masm.as_subf(output, output, dividend);
-+ }
-+}
-+
-+// ===============================================================
-+// Visitors: Box/Unbox
-+
-+void CodeGenerator::visitBox(LBox* box) {
-+ const LAllocation* in = box->getOperand(0);
-+ ValueOperand result = ToOutValue(box);
-+
-+ masm.moveValue(TypedOrValueRegister(box->type(), ToAnyRegister(in)), result);
-+}
-+
-+void CodeGenerator::visitUnbox(LUnbox* unbox) {
-+ MUnbox* mir = unbox->mir();
-+
-+ Register result = ToRegister(unbox->output());
-+
-+ if (mir->fallible()) {
-+ ValueOperand value = ToValue(unbox->input());
-+ Label bail;
-+ switch (mir->type()) {
-+ case MIRType::Int32:
-+ masm.fallibleUnboxInt32(value, result, &bail);
-+ break;
-+ case MIRType::Boolean:
-+ masm.fallibleUnboxBoolean(value, result, &bail);
-+ break;
-+ case MIRType::Object:
-+ masm.fallibleUnboxObject(value, result, &bail);
-+ break;
-+ case MIRType::String:
-+ masm.fallibleUnboxString(value, result, &bail);
-+ break;
-+ case MIRType::Symbol:
-+ masm.fallibleUnboxSymbol(value, result, &bail);
-+ break;
-+ case MIRType::BigInt:
-+ masm.fallibleUnboxBigInt(value, result, &bail);
-+ break;
-+ default:
-+ MOZ_CRASH("Given MIRType cannot be unboxed.");
-+ }
-+ bailoutFrom(&bail, unbox->snapshot());
-+ return;
-+ }
-+
-+ LAllocation* input = unbox->getOperand(LUnbox::Input);
-+ if (input->isGeneralReg()) {
-+ Register inputReg = ToRegister(input);
-+ switch (mir->type()) {
-+ case MIRType::Int32:
-+ masm.unboxInt32(ValueOperand(inputReg), result);
-+ break;
-+ case MIRType::Boolean:
-+ masm.unboxBoolean(ValueOperand(inputReg), result);
-+ break;
-+ case MIRType::Object:
-+ masm.unboxObject(ValueOperand(inputReg), result);
-+ break;
-+ case MIRType::String:
-+ masm.unboxString(ValueOperand(inputReg), result);
-+ break;
-+ case MIRType::Symbol:
-+ masm.unboxSymbol(ValueOperand(inputReg), result);
-+ break;
-+ case MIRType::BigInt:
-+ masm.unboxBigInt(ValueOperand(inputReg), result);
-+ break;
-+ default:
-+ MOZ_CRASH("Given MIRType cannot be unboxed.");
-+ }
-+ return;
-+ }
-+
-+ Address inputAddr = ToAddress(input);
-+ switch (mir->type()) {
-+ case MIRType::Int32:
-+ masm.unboxInt32(inputAddr, result);
-+ break;
-+ case MIRType::Boolean:
-+ masm.unboxBoolean(inputAddr, result);
-+ break;
-+ case MIRType::Object:
-+ masm.unboxObject(inputAddr, result);
-+ break;
-+ case MIRType::String:
-+ masm.unboxString(inputAddr, result);
-+ break;
-+ case MIRType::Symbol:
-+ masm.unboxSymbol(inputAddr, result);
-+ break;
-+ case MIRType::BigInt:
-+ masm.unboxBigInt(inputAddr, result);
-+ break;
-+ default:
-+ MOZ_CRASH("Given MIRType cannot be unboxed.");
-+ }
-+}
-+
-+// ===============================================================
-+// Visitors: Integer Arithmetic
-+
-+void CodeGenerator::visitAddI(LAddI* ins) {
-+ LAllocation* lhs = ins->getOperand(0);
-+ LAllocation* rhs = ins->getOperand(1);
-+ Register dest = ToRegister(ins->getDef(0));
-+
-+ if (rhs->isConstant()) {
-+ Imm32 imm(ToInt32(rhs));
-+ if (ins->snapshot()) {
-+ masm.move32(ToRegister(lhs), dest);
-+ Label overflow;
-+ masm.branchAdd32(Assembler::Overflow, imm, dest, &overflow);
-+ bailoutFrom(&overflow, ins->snapshot());
-+ } else {
-+ masm.add32(imm, ToRegister(lhs), dest);
-+ }
-+ } else {
-+ Register rhsReg = ToRegister(rhs);
-+ if (ins->snapshot()) {
-+ // Use 3-operand add to avoid clobbering rhs when rhs == dest.
-+ masm.as_add(dest, ToRegister(lhs), rhsReg);
-+ // Check 32-bit overflow: sign-extend lower 32 and compare.
-+ masm.as_extsw(SecondScratchReg, dest);
-+ Label overflow;
-+ masm.as_cmpd(dest, SecondScratchReg);
-+ masm.ma_b(Assembler::NotEqual, &overflow);
-+ masm.as_extsw(dest, dest);
-+ bailoutFrom(&overflow, ins->snapshot());
-+ } else {
-+ masm.as_add(dest, ToRegister(lhs), rhsReg);
-+ masm.as_extsw(dest, dest);
-+ }
-+ }
-+}
-+
-+void CodeGenerator::visitAddIntPtr(LAddIntPtr* ins) {
-+ Register dest = ToRegister(ins->getDef(0));
-+ Register lhs = ToRegister(ins->getOperand(0));
-+ const LAllocation* rhs = ins->getOperand(1);
-+
-+ if (rhs->isConstant()) {
-+ if (lhs != dest) {
-+ masm.movePtr(lhs, dest);
-+ }
-+ masm.addPtr(ImmWord(ToIntPtr(rhs)), dest);
-+ } else {
-+ masm.as_add(dest, lhs, ToRegister(rhs));
-+ }
-+}
-+
-+void CodeGenerator::visitAddI64(LAddI64* lir) {
-+ Register dest = ToRegister(lir->getDef(0));
-+ Register lhs = ToRegister(lir->getOperand(0));
-+ const LAllocation* rhs = lir->getOperand(1);
-+
-+ if (rhs->isConstant()) {
-+ if (lhs != dest) {
-+ masm.movePtr(lhs, dest);
-+ }
-+ masm.addPtr(ImmWord(ToInt64(rhs)), dest);
-+ } else {
-+ masm.as_add(dest, lhs, ToRegister(rhs));
-+ }
-+}
-+
-+void CodeGenerator::visitSubI(LSubI* ins) {
-+ LAllocation* lhs = ins->getOperand(0);
-+ LAllocation* rhs = ins->getOperand(1);
-+ Register dest = ToRegister(ins->getDef(0));
-+
-+ if (rhs->isConstant()) {
-+ Imm32 imm(ToInt32(rhs));
-+ if (ins->snapshot()) {
-+ masm.move32(ToRegister(lhs), dest);
-+ Label overflow;
-+ masm.branchSub32(Assembler::Overflow, imm, dest, &overflow);
-+ bailoutFrom(&overflow, ins->snapshot());
-+ } else {
-+ masm.move32(ToRegister(lhs), dest);
-+ masm.sub32(imm, dest);
-+ }
-+ } else {
-+ Register rhsReg = ToRegister(rhs);
-+ if (ins->snapshot()) {
-+ // as_subf(d, a, b) computes d = b - a, so subf(dest, rhs, lhs) = lhs -
-+ // rhs
-+ masm.as_subf(dest, rhsReg, ToRegister(lhs));
-+ masm.as_extsw(SecondScratchReg, dest);
-+ Label overflow;
-+ masm.as_cmpd(dest, SecondScratchReg);
-+ masm.ma_b(Assembler::NotEqual, &overflow);
-+ masm.as_extsw(dest, dest);
-+ bailoutFrom(&overflow, ins->snapshot());
-+ } else {
-+ masm.as_subf(dest, rhsReg, ToRegister(lhs));
-+ masm.as_extsw(dest, dest);
-+ }
-+ }
-+}
-+
-+void CodeGenerator::visitSubIntPtr(LSubIntPtr* ins) {
-+ Register dest = ToRegister(ins->getDef(0));
-+ Register lhs = ToRegister(ins->getOperand(0));
-+ const LAllocation* rhs = ins->getOperand(1);
-+
-+ if (rhs->isConstant()) {
-+ if (lhs != dest) {
-+ masm.movePtr(lhs, dest);
-+ }
-+ masm.subPtr(Imm32(ToIntPtr(rhs)), dest);
-+ } else {
-+ // as_subf(d, a, b) = b - a
-+ masm.as_subf(dest, ToRegister(rhs), lhs);
-+ }
-+}
-+
-+void CodeGenerator::visitSubI64(LSubI64* lir) {
-+ Register dest = ToRegister(lir->getDef(0));
-+ Register lhs = ToRegister(lir->getOperand(0));
-+ const LAllocation* rhs = lir->getOperand(1);
-+
-+ if (rhs->isConstant()) {
-+ if (lhs != dest) {
-+ masm.movePtr(lhs, dest);
-+ }
-+ masm.sub64(Imm64(ToInt64(rhs)), Register64(dest));
-+ } else {
-+ // as_subf(d, a, b) = b - a
-+ masm.as_subf(dest, ToRegister(rhs), lhs);
-+ }
-+}
-+
-+void CodeGenerator::visitMulI(LMulI* ins) {
-+ Register dest = ToRegister(ins->getDef(0));
-+ Register lhs = ToRegister(ins->getOperand(0));
-+ const LAllocation* rhs = ins->getOperand(1);
-+ MMul* mul = ins->mir();
-+
-+ if (rhs->isConstant()) {
-+ int32_t constant = ToInt32(rhs);
-+ Register src = lhs;
-+
-+ // Bailout on -0.0 before the special-case handling below, since cases
-+ // like -1 and 0 return early and would skip the check.
-+ if (mul->canBeNegativeZero() && constant <= 0) {
-+ Assembler::Condition cond =
-+ (constant == 0) ? Assembler::Signed : Assembler::Equal;
-+ bailoutCmp32(cond, src, Imm32(0), ins->snapshot());
-+ }
-+
-+ switch (constant) {
-+ case -1:
-+ if (mul->canOverflow()) {
-+ Label ok;
-+ masm.branchPtr(Assembler::NotEqual, lhs, ImmWord(INT32_MIN), &ok);
-+ bailout(ins->snapshot());
-+ masm.bind(&ok);
-+ }
-+ masm.as_neg(dest, src);
-+ masm.as_extsw(dest, dest);
-+ return;
-+ case 0:
-+ masm.move32(Imm32(0), dest);
-+ return;
-+ case 1:
-+ masm.move32(src, dest);
-+ return;
-+ case 2:
-+ if (mul->canOverflow()) {
-+ masm.move32(src, dest);
-+ Label overflow;
-+ masm.branchAdd32(Assembler::Overflow, dest, dest, &overflow);
-+ bailoutFrom(&overflow, ins->snapshot());
-+ } else {
-+ masm.move32(src, dest);
-+ masm.add32(dest, dest);
-+ }
-+ return;
-+ default:
-+ break;
-+ }
-+
-+ // Check for power of 2 (positive).
-+ uint32_t absCst = mozilla::Abs(constant);
-+ if (absCst > 0 && (absCst & (absCst - 1)) == 0 && !mul->canOverflow()) {
-+ uint32_t shift = mozilla::FloorLog2(absCst);
-+ masm.x_slwi(dest, src, shift);
-+ if (constant < 0) {
-+ masm.as_neg(dest, dest);
-+ }
-+ masm.as_extsw(dest, dest);
-+ return;
-+ }
-+
-+ // General case.
-+ if (mul->canOverflow()) {
-+ masm.move32(src, dest);
-+ Label overflow;
-+ masm.branchMul32(Assembler::Overflow, Imm32(constant), dest, &overflow);
-+ bailoutFrom(&overflow, ins->snapshot());
-+ } else {
-+ masm.move32(src, dest);
-+ masm.mul32(Imm32(constant), dest);
-+ }
-+
-+ // Check for negative zero (for constants not handled above).
-+ if (mul->canBeNegativeZero() && constant < 0) {
-+ Label ok;
-+ masm.branchPtr(Assembler::NotEqual, dest, ImmWord(0), &ok);
-+ bailoutCmp32(Assembler::Signed, src, src, ins->snapshot());
-+ masm.bind(&ok);
-+ }
-+ return;
-+ }
-+
-+ Register rhsReg = ToRegister(rhs);
-+
-+ if (mul->canOverflow()) {
-+ // Use 64-bit multiply so the full result is deterministic, then check
-+ // whether truncating to 32 bits changes the value. Match the
-+ // visitAddI/visitSubI ordering: branch first, truncate only on the
-+ // success path (the bailout discards dest anyway). extsw is
-+ // non-recording (ISA v3.0B) so it doesn't disturb CR0
-+ // either way; the choice is for consistency.
-+ masm.as_mulld(dest, lhs, rhsReg);
-+ masm.as_extsw(SecondScratchReg, dest);
-+ Label overflow;
-+ masm.as_cmpd(dest, SecondScratchReg);
-+ masm.ma_b(Assembler::NotEqual, &overflow);
-+ masm.as_extsw(dest, dest);
-+ bailoutFrom(&overflow, ins->snapshot());
-+ } else {
-+ masm.as_mullw(dest, lhs, rhsReg);
-+ masm.as_extsw(dest, dest);
-+ }
-+
-+ if (mul->canBeNegativeZero()) {
-+ Label done;
-+ masm.branchPtr(Assembler::NotEqual, dest, ImmWord(0), &done);
-+ // Result is 0. Check if lhs|rhs was negative.
-+ {
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.as_or_(scratch, lhs, rhsReg);
-+ bailoutCmp32(Assembler::Signed, scratch, scratch, ins->snapshot());
-+ }
-+ masm.bind(&done);
-+ }
-+}
-+
-+void CodeGenerator::visitMulIntPtr(LMulIntPtr* ins) {
-+ Register dest = ToRegister(ins->getDef(0));
-+ Register lhs = ToRegister(ins->getOperand(0));
-+ const LAllocation* rhs = ins->getOperand(1);
-+
-+ if (rhs->isConstant()) {
-+ if (lhs != dest) {
-+ masm.movePtr(lhs, dest);
-+ }
-+ masm.mulPtr(ImmWord(ToIntPtr(rhs)), dest);
-+ } else {
-+ masm.as_mulld(dest, lhs, ToRegister(rhs));
-+ }
-+}
-+
-+void CodeGenerator::visitMulI64(LMulI64* lir) {
-+ Register dest = ToRegister(lir->getDef(0));
-+ Register lhs = ToRegister(lir->getOperand(0));
-+ const LAllocation* rhs = lir->getOperand(1);
-+
-+ if (rhs->isConstant()) {
-+ if (lhs != dest) {
-+ masm.movePtr(lhs, dest);
-+ }
-+ masm.mulPtr(ImmWord(ToInt64(rhs)), dest);
-+ } else {
-+ masm.as_mulld(dest, lhs, ToRegister(rhs));
-+ }
-+}
-+
-+void CodeGenerator::visitDivI(LDivI* ins) {
-+ Register lhs = ToRegister(ins->lhs());
-+ Register rhs = ToRegister(ins->rhs());
-+ Register dest = ToRegister(ins->output());
-+ Register temp = ToRegister(ins->temp0());
-+ MDiv* mir = ins->mir();
-+
-+ Label done;
-+
-+ // Handle divide by zero.
-+ if (mir->canBeDivideByZero()) {
-+ if (mir->trapOnError()) {
-+ Label nonZero;
-+ masm.branchPtr(Assembler::NotEqual, rhs, ImmWord(0), &nonZero);
-+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->trapSiteDesc());
-+ masm.bind(&nonZero);
-+ } else if (mir->canTruncateInfinities()) {
-+ Label nonZero;
-+ masm.branchPtr(Assembler::NotEqual, rhs, ImmWord(0), &nonZero);
-+ masm.move32(Imm32(0), dest);
-+ masm.jump(&done);
-+ masm.bind(&nonZero);
-+ } else {
-+ MOZ_ASSERT(mir->fallible());
-+ bailoutCmp32(Assembler::Equal, rhs, Imm32(0), ins->snapshot());
-+ }
-+ }
-+
-+ // Handle INT32_MIN / -1 overflow.
-+ if (mir->canBeNegativeOverflow()) {
-+ Label notMinInt;
-+ masm.branchPtr(Assembler::NotEqual, lhs, ImmWord(INT32_MIN), ¬MinInt);
-+ masm.branchPtr(Assembler::NotEqual, rhs, ImmWord(-1), ¬MinInt);
-+
-+ if (mir->trapOnError()) {
-+ masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->trapSiteDesc());
-+ } else if (mir->canTruncateOverflow()) {
-+ masm.move32(Imm32(INT32_MIN), dest);
-+ masm.jump(&done);
-+ } else {
-+ MOZ_ASSERT(mir->fallible());
-+ bailout(ins->snapshot());
-+ }
-+ masm.bind(¬MinInt);
-+ }
-+
-+ // Handle negative zero.
-+ if (!mir->canTruncateNegativeZero() && mir->canBeNegativeZero()) {
-+ Label ok;
-+ masm.branchPtr(Assembler::NotEqual, lhs, ImmWord(0), &ok);
-+ bailoutCmp32(Assembler::LessThan, rhs, Imm32(0), ins->snapshot());
-+ masm.bind(&ok);
-+ }
-+
-+ // Perform the division.
-+ masm.as_divw(dest, lhs, rhs);
-+ masm.as_extsw(dest, dest);
-+
-+ // Check remainder if not truncatable.
-+ if (!mir->canTruncateRemainder()) {
-+ // Compute remainder: temp = lhs - (dest * rhs)
-+ masm.as_mullw(temp, dest, rhs);
-+ masm.as_subf(temp, temp, lhs); // temp = lhs - temp
-+ bailoutCmp32(Assembler::NotEqual, temp, Imm32(0), ins->snapshot());
-+ }
-+
-+ masm.bind(&done);
-+}
-+
-+void CodeGenerator::visitDivPowTwoI(LDivPowTwoI* ins) {
-+ Register lhs = ToRegister(ins->numerator());
-+ Register dest = ToRegister(ins->output());
-+ UseScratchRegisterScope temps(masm);
-+ Register tmp = temps.Acquire();
-+ int32_t shift = ins->shift();
-+
-+ if (shift != 0) {
-+ MDiv* mir = ins->mir();
-+
-+ if (!mir->isTruncated()) {
-+ // If remainder != 0, bailout (check lower 'shift' bits).
-+ masm.x_slwi(tmp, lhs, 32 - shift);
-+ bailoutCmp32(Assembler::NotEqual, tmp, Imm32(0), ins->snapshot());
-+ }
-+
-+ if (!mir->canBeNegativeDividend()) {
-+ // Non-negative dividend: simple right shift.
-+ masm.as_srawi(dest, lhs, shift);
-+ } else {
-+ // Need rounding adjustment for negative numbers.
-+ // Add (1 << shift) - 1 if lhs is negative.
-+ if (shift > 1) {
-+ masm.as_srawi(tmp, lhs, 31);
-+ masm.as_rlwinm(tmp, tmp, 0, 32 - shift, 31);
-+ } else {
-+ // shift == 1: extract sign bit into bit 31
-+ masm.as_rlwinm(tmp, lhs, 1, 31, 31);
-+ }
-+ masm.add32(lhs, tmp);
-+ masm.as_srawi(dest, tmp, shift);
-+ }
-+ } else {
-+ masm.move32(lhs, dest);
-+ }
-+}
-+
-+void CodeGenerator::visitModI(LModI* ins) {
-+ Register lhs = ToRegister(ins->lhs());
-+ Register rhs = ToRegister(ins->rhs());
-+ Register dest = ToRegister(ins->output());
-+ UseScratchRegisterScope temps(masm);
-+ Register temp = temps.Acquire();
-+ MMod* mir = ins->mir();
-+ Label done;
-+
-+ // Handle divide by zero.
-+ if (mir->canBeDivideByZero()) {
-+ if (mir->isTruncated()) {
-+ if (mir->trapOnError()) {
-+ Label nonZero;
-+ masm.branchPtr(Assembler::NotEqual, rhs, ImmWord(0), &nonZero);
-+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, mir->trapSiteDesc());
-+ masm.bind(&nonZero);
-+ } else {
-+ // Truncated division by zero yields integer zero.
-+ masm.move32(rhs, dest);
-+ Label nonZero;
-+ masm.branchPtr(Assembler::NotEqual, rhs, ImmWord(0), &nonZero);
-+ masm.jump(&done);
-+ masm.bind(&nonZero);
-+ }
-+ } else {
-+ MOZ_ASSERT(mir->fallible());
-+ bailoutCmp32(Assembler::Equal, rhs, Imm32(0), ins->snapshot());
-+ }
-+ }
-+
-+ // Handle INT32_MIN % -1.
-+ // PPC64 divw is undefined for INT32_MIN / -1 (quotient overflows), so we
-+ // must return 0 explicitly. The wasm spec also defines rem_s(MIN, -1) = 0.
-+ if (!mir->isUnsigned()) {
-+ Label notMinOverflow;
-+ masm.branchPtr(Assembler::NotEqual, lhs, ImmWord(INT32_MIN),
-+ ¬MinOverflow);
-+ masm.branchPtr(Assembler::NotEqual, rhs, ImmWord(-1), ¬MinOverflow);
-+ masm.move32(Imm32(0), dest);
-+ masm.jump(&done);
-+ masm.bind(¬MinOverflow);
-+ }
-+
-+ if (HasPOWER9()) {
-+ masm.as_modsw(dest, lhs, rhs);
-+ } else {
-+ masm.as_divw(temp, lhs, rhs);
-+ masm.as_mullw(temp, temp, rhs);
-+ masm.as_subf(dest, temp, lhs);
-+ }
-+ masm.as_extsw(dest, dest);
-+
-+ // If X%Y == 0 and X < 0, the result is -0, and we need to bail out.
-+ if (mir->canBeNegativeDividend() && !mir->isTruncated()) {
-+ MOZ_ASSERT(mir->fallible());
-+ Label ok;
-+ masm.branchPtr(Assembler::NotEqual, dest, ImmWord(0), &ok);
-+ bailoutCmp32(Assembler::Signed, lhs, Imm32(0), ins->snapshot());
-+ masm.bind(&ok);
-+ }
-+
-+ masm.bind(&done);
-+}
-+
-+void CodeGenerator::visitModPowTwoI(LModPowTwoI* ins) {
-+ Register in = ToRegister(ins->getOperand(0));
-+ Register out = ToRegister(ins->getDef(0));
-+ MMod* mir = ins->mir();
-+ int32_t shift = ins->shift();
-+ uint32_t mask = (uint32_t(1) << shift) - 1;
-+
-+ if (mir->canBeNegativeDividend() && !mir->isTruncated()) {
-+ Label nonNeg;
-+ masm.branchPtr(Assembler::NotEqual, in, ImmWord(0), &nonNeg);
-+ // in == 0: mod is 0, check for negative zero.
-+ bailoutCmp32(Assembler::Signed, in, in, ins->snapshot());
-+ masm.bind(&nonNeg);
-+ }
-+
-+ Label negative, done;
-+ masm.branch32(Assembler::Signed, in, in, &negative);
-+
-+ // Positive case: just mask.
-+ masm.and32(Imm32(mask), in, out);
-+ masm.jump(&done);
-+
-+ // Negative case: negate, mask, negate back.
-+ masm.bind(&negative);
-+ masm.as_neg(out, in);
-+ masm.and32(Imm32(mask), out);
-+ masm.as_neg(out, out);
-+ masm.as_extsw(out, out);
-+
-+ if (!mir->isTruncated() && mir->canBeNegativeDividend()) {
-+ Label ok;
-+ masm.branchPtr(Assembler::NotEqual, out, ImmWord(0), &ok);
-+ bailout(ins->snapshot());
-+ masm.bind(&ok);
-+ }
-+
-+ masm.bind(&done);
-+}
-+
-+void CodeGenerator::visitModMaskI(LModMaskI* ins) {
-+ Register src = ToRegister(ins->input());
-+ Register dest = ToRegister(ins->output());
-+ Register tmp0 = ToRegister(ins->temp0());
-+ Register tmp1 = ToRegister(ins->temp1());
-+ MMod* mir = ins->mir();
-+
-+ if (!mir->isTruncated() && mir->canBeNegativeDividend()) {
-+ MOZ_ASSERT(mir->fallible());
-+
-+ Label bail;
-+ masm.ma_mod_mask(src, dest, tmp0, tmp1, ins->shift(), &bail);
-+ bailoutFrom(&bail, ins->snapshot());
-+ } else {
-+ masm.ma_mod_mask(src, dest, tmp0, tmp1, ins->shift(), nullptr);
-+ }
-+}
-+
-+void CodeGenerator::visitNegI(LNegI* ins) {
-+ Register input = ToRegister(ins->input());
-+ Register output = ToRegister(ins->output());
-+ masm.as_neg(output, input);
-+ masm.as_extsw(output, output);
-+}
-+
-+void CodeGenerator::visitNegI64(LNegI64* ins) {
-+ Register input = ToRegister64(ins->input()).reg;
-+ Register output = ToOutRegister64(ins).reg;
-+ masm.as_neg(output, input);
-+}
-+
-+void CodeGenerator::visitUDivOrMod(LUDivOrMod* ins) {
-+ Register lhs = ToRegister(ins->lhs());
-+ Register rhs = ToRegister(ins->rhs());
-+ Register output = ToRegister(ins->output());
-+ UseScratchRegisterScope temps(masm);
-+ Register temp = temps.Acquire();
-+ Label done;
-+
-+ // Division by zero check.
-+ if (ins->canBeDivideByZero()) {
-+ if (ins->mir()->isTruncated()) {
-+ if (ins->trapOnError()) {
-+ Label nonZero;
-+ masm.branch32(Assembler::NotEqual, rhs, Imm32(0), &nonZero);
-+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, ins->trapSiteDesc());
-+ masm.bind(&nonZero);
-+ } else {
-+ Label nonZero;
-+ masm.branch32(Assembler::NotEqual, rhs, Imm32(0), &nonZero);
-+ masm.move32(Imm32(0), output);
-+ masm.jump(&done);
-+ masm.bind(&nonZero);
-+ }
-+ } else {
-+ bailoutCmp32(Assembler::Equal, rhs, Imm32(0), ins->snapshot());
-+ }
-+ }
-+
-+ // Zero-extend both operands to 64 bits for unsigned divide.
-+ masm.move32To64ZeroExtend(lhs, Register64(lhs));
-+ masm.move32To64ZeroExtend(rhs, Register64(rhs));
-+
-+ if (ins->mir()->isDiv()) {
-+ // Division path: compute quotient. Check remainder if needed.
-+ if (!ins->mir()->toDiv()->canTruncateRemainder()) {
-+ if (HasPOWER9()) {
-+ masm.as_moduw(temp, lhs, rhs);
-+ } else {
-+ masm.as_divwu(temp, lhs, rhs);
-+ masm.as_mullw(temp, temp, rhs);
-+ masm.as_subf(temp, temp, lhs);
-+ }
-+ bailoutCmp32(Assembler::NotEqual, temp, Imm32(0), ins->snapshot());
-+ }
-+ masm.as_divwu(output, lhs, rhs);
-+ } else {
-+ // Modulo path.
-+ if (HasPOWER9()) {
-+ masm.as_moduw(output, lhs, rhs);
-+ } else {
-+ masm.as_divwu(temp, lhs, rhs);
-+ masm.as_mullw(temp, temp, rhs);
-+ masm.as_subf(output, temp, lhs);
-+ }
-+ }
-+
-+ masm.as_extsw(output, output);
-+
-+ if (!ins->mir()->isTruncated()) {
-+ bailoutCmp32(Assembler::LessThan, output, Imm32(0), ins->snapshot());
-+ }
-+
-+ masm.bind(&done);
-+}
-+
-+void CodeGenerator::visitDivOrModI64(LDivOrModI64* lir) {
-+ Register lhs = ToRegister(lir->getOperand(0));
-+ Register rhs = ToRegister(lir->getOperand(1));
-+ Register output = ToRegister(lir->output());
-+
-+ Label done;
-+
-+ // Division by zero trap.
-+ if (lir->canBeDivideByZero()) {
-+ Label nonZero;
-+ masm.branchPtr(Assembler::NotEqual, rhs, ImmWord(0), &nonZero);
-+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, lir->trapSiteDesc());
-+ masm.bind(&nonZero);
-+ }
-+
-+ // INT64_MIN / -1 overflow trap (for div only).
-+ if (lir->canBeNegativeOverflow()) {
-+ Label notMinInt;
-+ masm.branchPtr(Assembler::NotEqual, lhs, ImmWord(INT64_MIN), ¬MinInt);
-+ masm.branchPtr(Assembler::NotEqual, rhs, ImmWord(-1), ¬MinInt);
-+ if (lir->mir()->isDiv()) {
-+ masm.wasmTrap(wasm::Trap::IntegerOverflow, lir->trapSiteDesc());
-+ } else {
-+ masm.movePtr(ImmWord(0), output);
-+ masm.jump(&done);
-+ }
-+ masm.bind(¬MinInt);
-+ }
-+
-+ if (lir->mir()->isDiv()) {
-+ masm.as_divd(output, lhs, rhs);
-+ } else if (HasPOWER9()) {
-+ masm.as_modsd(output, lhs, rhs);
-+ } else {
-+ masm.as_divd(output, lhs, rhs);
-+ masm.as_mulld(output, output, rhs);
-+ masm.as_subf(output, output, lhs);
-+ }
-+
-+ masm.bind(&done);
-+}
-+
-+void CodeGenerator::visitUDivOrModI64(LUDivOrModI64* lir) {
-+ Register lhs = ToRegister(lir->getOperand(0));
-+ Register rhs = ToRegister(lir->getOperand(1));
-+ Register output = ToRegister(lir->output());
-+
-+ // Division by zero trap.
-+ if (lir->canBeDivideByZero()) {
-+ Label nonZero;
-+ masm.branchPtr(Assembler::NotEqual, rhs, ImmWord(0), &nonZero);
-+ masm.wasmTrap(wasm::Trap::IntegerDivideByZero, lir->trapSiteDesc());
-+ masm.bind(&nonZero);
-+ }
-+
-+ if (lir->mir()->isDiv()) {
-+ masm.as_divdu(output, lhs, rhs);
-+ } else if (HasPOWER9()) {
-+ masm.as_modud(output, lhs, rhs);
-+ } else {
-+ masm.as_divdu(output, lhs, rhs);
-+ masm.as_mulld(output, output, rhs);
-+ masm.as_subf(output, output, lhs);
-+ }
-+}
-+
-+// ===============================================================
-+// Visitors: Bitwise
-+
-+void CodeGenerator::visitBitNotI(LBitNotI* ins) {
-+ Register input = ToRegister(ins->input());
-+ Register dest = ToRegister(ins->output());
-+ masm.as_nor(dest, input, input);
-+ masm.as_extsw(dest, dest);
-+}
-+
-+void CodeGenerator::visitBitNotI64(LBitNotI64* ins) {
-+ Register input = ToRegister64(ins->input()).reg;
-+ Register dest = ToOutRegister64(ins).reg;
-+ masm.as_nor(dest, input, input);
-+}
-+
-+void CodeGenerator::visitBitOpI(LBitOpI* ins) {
-+ Register dest = ToRegister(ins->getDef(0));
-+ Register lhs = ToRegister(ins->getOperand(0));
-+ const LAllocation* rhs = ins->getOperand(1);
-+
-+ switch (ins->bitop()) {
-+ case JSOp::BitOr:
-+ if (rhs->isConstant()) {
-+ masm.or32(Imm32(ToInt32(rhs)), lhs, dest);
-+ } else {
-+ masm.as_or_(dest, lhs, ToRegister(rhs));
-+ masm.as_extsw(dest, dest);
-+ }
-+ break;
-+ case JSOp::BitXor:
-+ if (rhs->isConstant()) {
-+ masm.xor32(Imm32(ToInt32(rhs)), lhs, dest);
-+ } else {
-+ masm.as_xor_(dest, lhs, ToRegister(rhs));
-+ masm.as_extsw(dest, dest);
-+ }
-+ break;
-+ case JSOp::BitAnd:
-+ if (rhs->isConstant()) {
-+ masm.and32(Imm32(ToInt32(rhs)), lhs, dest);
-+ } else {
-+ masm.as_and_(dest, lhs, ToRegister(rhs));
-+ masm.as_extsw(dest, dest);
-+ }
-+ break;
-+ default:
-+ MOZ_CRASH("unexpected binary opcode");
-+ }
-+}
-+
-+void CodeGenerator::visitBitOpI64(LBitOpI64* lir) {
-+ Register dest = ToRegister(lir->getDef(0));
-+ Register lhs = ToRegister(lir->getOperand(0));
-+ const LAllocation* rhs = lir->getOperand(1);
-+
-+ switch (lir->bitop()) {
-+ case JSOp::BitOr:
-+ if (rhs->isConstant()) {
-+ if (lhs != dest) {
-+ masm.movePtr(lhs, dest);
-+ }
-+ masm.or64(Imm64(ToInt64(rhs)), Register64(dest));
-+ } else {
-+ masm.as_or_(dest, lhs, ToRegister(rhs));
-+ }
-+ break;
-+ case JSOp::BitXor:
-+ if (rhs->isConstant()) {
-+ if (lhs != dest) {
-+ masm.movePtr(lhs, dest);
-+ }
-+ masm.xor64(Imm64(ToInt64(rhs)), Register64(dest));
-+ } else {
-+ masm.as_xor_(dest, lhs, ToRegister(rhs));
-+ }
-+ break;
-+ case JSOp::BitAnd:
-+ if (rhs->isConstant()) {
-+ if (lhs != dest) {
-+ masm.movePtr(lhs, dest);
-+ }
-+ masm.and64(Imm64(ToInt64(rhs)), Register64(dest));
-+ } else {
-+ masm.as_and_(dest, lhs, ToRegister(rhs));
-+ }
-+ break;
-+ default:
-+ MOZ_CRASH("unexpected binary opcode");
-+ }
-+}
-+
-+void CodeGenerator::visitShiftI(LShiftI* ins) {
-+ Register lhs = ToRegister(ins->lhs());
-+ const LAllocation* rhs = ins->rhs();
-+ Register dest = ToRegister(ins->output());
-+
-+ if (rhs->isConstant()) {
-+ int32_t shift = ToInt32(rhs) & 0x1f;
-+ switch (ins->bitop()) {
-+ case JSOp::Lsh:
-+ if (shift) {
-+ masm.lshift32(Imm32(shift), lhs, dest);
-+ } else {
-+ masm.move32(lhs, dest);
-+ }
-+ break;
-+ case JSOp::Rsh:
-+ if (shift) {
-+ masm.rshift32Arithmetic(Imm32(shift), lhs, dest);
-+ } else {
-+ masm.move32(lhs, dest);
-+ }
-+ break;
-+ case JSOp::Ursh:
-+ if (shift) {
-+ masm.rshift32(Imm32(shift), lhs, dest);
-+ } else {
-+ // x >>> 0 can produce values that need to be treated as unsigned.
-+ masm.move32(lhs, dest);
-+ }
-+ if (ins->mir()->toUrsh()->fallible()) {
-+ // x >>> 0 can produce values that don't fit in signed int32.
-+ bailoutCmp32(Assembler::LessThan, dest, Imm32(0), ins->snapshot());
-+ }
-+ break;
-+ default:
-+ MOZ_CRASH("unexpected shift opcode");
-+ }
-+ } else {
-+ Register shiftReg = ToRegister(rhs);
-+ // PPC slw/srw/sraw use 6 bits of shift amount; JS requires mod 32.
-+ UseScratchRegisterScope temps(masm);
-+ Register masked = temps.Acquire();
-+ masm.as_rlwinm(masked, shiftReg, 0, 27, 31);
-+ switch (ins->bitop()) {
-+ case JSOp::Lsh:
-+ masm.as_slw(dest, lhs, masked);
-+ masm.as_extsw(dest, dest);
-+ break;
-+ case JSOp::Rsh:
-+ masm.as_sraw(dest, lhs, masked);
-+ break;
-+ case JSOp::Ursh:
-+ masm.as_srw(dest, lhs, masked);
-+ masm.as_extsw(dest, dest);
-+ if (ins->mir()->toUrsh()->fallible()) {
-+ bailoutCmp32(Assembler::LessThan, dest, Imm32(0), ins->snapshot());
-+ }
-+ break;
-+ default:
-+ MOZ_CRASH("unexpected shift opcode");
-+ }
-+ }
-+}
-+
-+void CodeGenerator::visitShiftIntPtr(LShiftIntPtr* ins) {
-+ Register lhs = ToRegister(ins->lhs());
-+ Register dest = ToRegister(ins->output());
-+
-+ if (ins->rhs()->isConstant()) {
-+ // ShiftIntPtr's RHS constant is IntPtr- or Int32-typed, not Int64. Use
-+ // ToIntPtr() which dispatches on the underlying MIRType (the previous
-+ // MConstant::toInt64() call asserted when the constant wasn't Int64).
-+ int32_t shift = int32_t(ToIntPtr(ins->rhs())) & 0x3f;
-+ switch (ins->bitop()) {
-+ case JSOp::Lsh:
-+ if (shift) {
-+ masm.lshiftPtr(Imm32(shift), lhs, dest);
-+ } else {
-+ masm.movePtr(lhs, dest);
-+ }
-+ break;
-+ case JSOp::Rsh:
-+ if (shift) {
-+ masm.rshiftPtrArithmetic(Imm32(shift), lhs, dest);
-+ } else {
-+ masm.movePtr(lhs, dest);
-+ }
-+ break;
-+ case JSOp::Ursh:
-+ if (shift) {
-+ masm.rshiftPtr(Imm32(shift), lhs, dest);
-+ } else {
-+ masm.movePtr(lhs, dest);
-+ }
-+ break;
-+ default:
-+ MOZ_CRASH("unexpected shift opcode");
-+ }
-+ } else {
-+ Register shiftReg = ToRegister(ins->rhs());
-+ // sld/srd/srad use the low 7 bits of the shift count: counts >= 64
-+ // produce 0 (sign-fill for srad). Mask to 6 bits for mod-64 semantics.
-+ UseScratchRegisterScope temps(masm);
-+ Register masked = temps.Acquire();
-+ masm.as_rldicl(masked, shiftReg, 0, 58);
-+ switch (ins->bitop()) {
-+ case JSOp::Lsh:
-+ masm.as_sld(dest, lhs, masked);
-+ break;
-+ case JSOp::Rsh:
-+ masm.as_srad(dest, lhs, masked);
-+ break;
-+ case JSOp::Ursh:
-+ masm.as_srd(dest, lhs, masked);
-+ break;
-+ default:
-+ MOZ_CRASH("unexpected shift opcode");
-+ }
-+ }
-+}
-+
-+void CodeGenerator::visitShiftI64(LShiftI64* lir) {
-+ Register lhs = ToRegister64(lir->lhs()).reg;
-+ Register dest = ToOutRegister64(lir).reg;
-+ const LAllocation* rhs = lir->rhs();
-+
-+ if (rhs->isConstant()) {
-+ int32_t shift = int32_t(rhs->toConstant()->toInt64()) & 0x3f;
-+ switch (lir->bitop()) {
-+ case JSOp::Lsh:
-+ if (shift) {
-+ masm.lshiftPtr(Imm32(shift), lhs, dest);
-+ } else {
-+ masm.movePtr(lhs, dest);
-+ }
-+ break;
-+ case JSOp::Rsh:
-+ if (shift) {
-+ masm.rshiftPtrArithmetic(Imm32(shift), lhs, dest);
-+ } else {
-+ masm.movePtr(lhs, dest);
-+ }
-+ break;
-+ case JSOp::Ursh:
-+ if (shift) {
-+ masm.rshiftPtr(Imm32(shift), lhs, dest);
-+ } else {
-+ masm.movePtr(lhs, dest);
-+ }
-+ break;
-+ default:
-+ MOZ_CRASH("unexpected shift opcode");
-+ }
-+ } else {
-+ Register shiftReg = ToRegister(rhs);
-+ // Wasm i64 shifts require shift count modulo 64. PPC64 sld/srd/srad
-+ // use a 7-bit shift field, so shifts >= 64 produce 0 (or sign-fill
-+ // for srad). Mask to 6 bits first.
-+ UseScratchRegisterScope temps(masm);
-+ Register masked = temps.Acquire();
-+ masm.as_rldicl(masked, shiftReg, 0, 58); // clrldi: keep low 6 bits
-+ switch (lir->bitop()) {
-+ case JSOp::Lsh:
-+ masm.as_sld(dest, lhs, masked);
-+ break;
-+ case JSOp::Rsh:
-+ masm.as_srad(dest, lhs, masked);
-+ break;
-+ case JSOp::Ursh:
-+ masm.as_srd(dest, lhs, masked);
-+ break;
-+ default:
-+ MOZ_CRASH("unexpected shift opcode");
-+ }
-+ }
-+}
-+
-+void CodeGenerator::visitUrshD(LUrshD* ins) {
-+ Register lhs = ToRegister(ins->lhs());
-+ const LAllocation* rhs = ins->rhs();
-+ FloatRegister dest = ToFloatRegister(ins->output());
-+
-+ Register temp = ToRegister(ins->temp0());
-+
-+ if (rhs->isConstant()) {
-+ int32_t shift = ToInt32(rhs) & 0x1f;
-+ if (shift) {
-+ masm.rshift32(Imm32(shift), lhs, temp);
-+ } else {
-+ masm.move32(lhs, temp);
-+ }
-+ } else {
-+ masm.move32(lhs, temp);
-+ masm.rshift32(ToRegister(rhs), temp);
-+ }
-+
-+ masm.convertUInt32ToDouble(temp, dest);
-+}
-+
-+// ===============================================================
-+// Visitors: Floating-point arithmetic
-+
-+void CodeGenerator::visitMathD(LMathD* math) {
-+ FloatRegister lhs = ToFloatRegister(math->lhs());
-+ FloatRegister rhs = ToFloatRegister(math->rhs());
-+ FloatRegister dest = ToFloatRegister(math->output());
-+
-+ switch (math->jsop()) {
-+ case JSOp::Add:
-+ masm.as_fadd(dest, lhs, rhs);
-+ break;
-+ case JSOp::Sub:
-+ masm.as_fsub(dest, lhs, rhs);
-+ break;
-+ case JSOp::Mul:
-+ masm.as_fmul(dest, lhs, rhs);
-+ break;
-+ case JSOp::Div:
-+ masm.as_fdiv(dest, lhs, rhs);
-+ break;
-+ default:
-+ MOZ_CRASH("unexpected double opcode");
-+ }
-+}
-+
-+void CodeGenerator::visitMathF(LMathF* math) {
-+ FloatRegister lhs = ToFloatRegister(math->lhs());
-+ FloatRegister rhs = ToFloatRegister(math->rhs());
-+ FloatRegister dest = ToFloatRegister(math->output());
-+
-+ switch (math->jsop()) {
-+ case JSOp::Add:
-+ masm.as_fadds(dest, lhs, rhs);
-+ break;
-+ case JSOp::Sub:
-+ masm.as_fsubs(dest, lhs, rhs);
-+ break;
-+ case JSOp::Mul:
-+ masm.as_fmuls(dest, lhs, rhs);
-+ break;
-+ case JSOp::Div:
-+ masm.as_fdivs(dest, lhs, rhs);
-+ break;
-+ default:
-+ MOZ_CRASH("unexpected float32 opcode");
-+ }
-+}
-+
-+void CodeGenerator::visitMinMaxD(LMinMaxD* ins) {
-+ FloatRegister first = ToFloatRegister(ins->first());
-+ FloatRegister second = ToFloatRegister(ins->second());
-+ mozilla::DebugOnly<FloatRegister> output = ToFloatRegister(ins->output());
-+
-+ MOZ_ASSERT(first == output);
-+ if (ins->mir()->isMax()) {
-+ masm.maxDouble(second, first, /* handleNaN = */ true);
-+ } else {
-+ masm.minDouble(second, first, /* handleNaN = */ true);
-+ }
-+}
-+
-+void CodeGenerator::visitMinMaxF(LMinMaxF* ins) {
-+ FloatRegister first = ToFloatRegister(ins->first());
-+ FloatRegister second = ToFloatRegister(ins->second());
-+ mozilla::DebugOnly<FloatRegister> output = ToFloatRegister(ins->output());
-+
-+ MOZ_ASSERT(first == output);
-+ if (ins->mir()->isMax()) {
-+ masm.maxFloat32(second, first, /* handleNaN = */ true);
-+ } else {
-+ masm.minFloat32(second, first, /* handleNaN = */ true);
-+ }
-+}
-+
-+void CodeGenerator::visitNegD(LNegD* ins) {
-+ FloatRegister input = ToFloatRegister(ins->input());
-+ FloatRegister output = ToFloatRegister(ins->output());
-+ masm.as_fneg(output, input);
-+}
-+
-+void CodeGenerator::visitNegF(LNegF* ins) {
-+ FloatRegister input = ToFloatRegister(ins->input());
-+ FloatRegister output = ToFloatRegister(ins->output());
-+ masm.as_fneg(output, input);
-+}
-+
-+void CodeGenerator::visitPowHalfD(LPowHalfD* ins) {
-+ FloatRegister input = ToFloatRegister(ins->input());
-+ FloatRegister output = ToFloatRegister(ins->output());
-+
-+ Label done, skip;
-+
-+ // Check for -Infinity.
-+ masm.loadConstantDouble(NegativeInfinity<double>(), ScratchDoubleReg);
-+ masm.branchDouble(Assembler::DoubleNotEqualOrUnordered, input,
-+ ScratchDoubleReg, &skip);
-+ masm.loadConstantDouble(std::numeric_limits<double>::infinity(), output);
-+ masm.jump(&done);
-+
-+ masm.bind(&skip);
-+ // Add 0.0 to handle -0.
-+ masm.loadConstantDouble(0.0, ScratchDoubleReg);
-+ masm.as_fadd(output, input, ScratchDoubleReg);
-+ masm.as_fsqrt(output, output);
-+
-+ masm.bind(&done);
-+}
-+
-+void CodeGenerator::visitNotD(LNotD* ins) {
-+ FloatRegister input = ToFloatRegister(ins->input());
-+ Register dest = ToRegister(ins->output());
-+
-+ masm.loadConstantDouble(0.0, ScratchDoubleReg);
-+ masm.as_fcmpu(input, ScratchDoubleReg);
-+ masm.ma_cmp_set_dbl(dest, Assembler::DoubleEqualOrUnordered);
-+}
-+
-+void CodeGenerator::visitNotF(LNotF* ins) {
-+ FloatRegister input = ToFloatRegister(ins->input());
-+ Register dest = ToRegister(ins->output());
-+
-+ masm.loadConstantFloat32(0.0f, ScratchFloat32Reg);
-+ masm.as_fcmpu(input, ScratchFloat32Reg);
-+ masm.ma_cmp_set_dbl(dest, Assembler::DoubleEqualOrUnordered);
-+}
-+
-+// ===============================================================
-+// Visitors: FP comparisons and branches
-+
-+void CodeGenerator::visitCompareD(LCompareD* comp) {
-+ FloatRegister lhs = ToFloatRegister(comp->left());
-+ FloatRegister rhs = ToFloatRegister(comp->right());
-+ Register dest = ToRegister(comp->output());
-+ Assembler::DoubleCondition cond =
-+ comp->mir()->jsop() == JSOp::StrictEq ? Assembler::DoubleEqual
-+ : comp->mir()->jsop() == JSOp::StrictNe
-+ ? Assembler::DoubleNotEqualOrUnordered
-+ : JSOpToDoubleCondition(comp->mir()->jsop());
-+
-+ masm.as_fcmpu(lhs, rhs);
-+ masm.ma_cmp_set_dbl(dest, cond);
-+}
-+
-+void CodeGenerator::visitCompareF(LCompareF* comp) {
-+ FloatRegister lhs = ToFloatRegister(comp->left());
-+ FloatRegister rhs = ToFloatRegister(comp->right());
-+ Register dest = ToRegister(comp->output());
-+ Assembler::DoubleCondition cond =
-+ comp->mir()->jsop() == JSOp::StrictEq ? Assembler::DoubleEqual
-+ : comp->mir()->jsop() == JSOp::StrictNe
-+ ? Assembler::DoubleNotEqualOrUnordered
-+ : JSOpToDoubleCondition(comp->mir()->jsop());
-+
-+ masm.as_fcmpu(lhs, rhs);
-+ masm.ma_cmp_set_dbl(dest, cond);
-+}
-+
-+void CodeGenerator::visitCompareDAndBranch(LCompareDAndBranch* comp) {
-+ FloatRegister lhs = ToFloatRegister(comp->left());
-+ FloatRegister rhs = ToFloatRegister(comp->right());
-+
-+ Assembler::DoubleCondition cond =
-+ JSOpToDoubleCondition(comp->cmpMir()->jsop());
-+ MBasicBlock* ifTrue = comp->ifTrue();
-+ MBasicBlock* ifFalse = comp->ifFalse();
-+
-+ if (isNextBlock(ifFalse->lir())) {
-+ branchToBlock(Assembler::DoubleFloat, cond, lhs, rhs, ifTrue);
-+ } else {
-+ branchToBlock(Assembler::DoubleFloat, Assembler::InvertCondition(cond), lhs,
-+ rhs, ifFalse);
-+ jumpToBlock(ifTrue);
-+ }
-+}
-+
-+void CodeGenerator::visitCompareFAndBranch(LCompareFAndBranch* comp) {
-+ FloatRegister lhs = ToFloatRegister(comp->left());
-+ FloatRegister rhs = ToFloatRegister(comp->right());
-+
-+ Assembler::DoubleCondition cond =
-+ JSOpToDoubleCondition(comp->cmpMir()->jsop());
-+ MBasicBlock* ifTrue = comp->ifTrue();
-+ MBasicBlock* ifFalse = comp->ifFalse();
-+
-+ if (isNextBlock(ifFalse->lir())) {
-+ branchToBlock(Assembler::SingleFloat, cond, lhs, rhs, ifTrue);
-+ } else {
-+ branchToBlock(Assembler::SingleFloat, Assembler::InvertCondition(cond), lhs,
-+ rhs, ifFalse);
-+ jumpToBlock(ifTrue);
-+ }
-+}
-+
-+void CodeGenerator::visitTestDAndBranch(LTestDAndBranch* test) {
-+ FloatRegister input = ToFloatRegister(test->input());
-+
-+ MBasicBlock* ifTrue = test->ifTrue();
-+ MBasicBlock* ifFalse = test->ifFalse();
-+
-+ masm.loadConstantDouble(0.0, ScratchDoubleReg);
-+
-+ if (isNextBlock(ifFalse->lir())) {
-+ branchToBlock(Assembler::DoubleFloat, Assembler::DoubleNotEqual, input,
-+ ScratchDoubleReg, ifTrue);
-+ } else {
-+ branchToBlock(Assembler::DoubleFloat, Assembler::DoubleEqualOrUnordered,
-+ input, ScratchDoubleReg, ifFalse);
-+ jumpToBlock(ifTrue);
-+ }
-+}
-+
-+void CodeGenerator::visitTestFAndBranch(LTestFAndBranch* test) {
-+ FloatRegister input = ToFloatRegister(test->input());
-+
-+ MBasicBlock* ifTrue = test->ifTrue();
-+ MBasicBlock* ifFalse = test->ifFalse();
-+
-+ masm.loadConstantFloat32(0.0f, ScratchFloat32Reg);
-+
-+ if (isNextBlock(ifFalse->lir())) {
-+ branchToBlock(Assembler::SingleFloat, Assembler::DoubleNotEqual, input,
-+ ScratchFloat32Reg, ifTrue);
-+ } else {
-+ branchToBlock(Assembler::SingleFloat, Assembler::DoubleEqualOrUnordered,
-+ input, ScratchFloat32Reg, ifFalse);
-+ jumpToBlock(ifTrue);
-+ }
-+}
-+
-+// ===============================================================
-+// Visitors: Truncation
-+
-+void CodeGenerator::visitTruncateDToInt32(LTruncateDToInt32* ins) {
-+ emitTruncateDouble(ToFloatRegister(ins->input()), ToRegister(ins->output()),
-+ ins->mir());
-+}
-+
-+void CodeGenerator::visitTruncateFToInt32(LTruncateFToInt32* ins) {
-+ emitTruncateFloat32(ToFloatRegister(ins->input()), ToRegister(ins->output()),
-+ ins->mir());
-+}
-+
-+// ===============================================================
-+// Visitors: Int64 / Wasm type conversions
-+
-+void CodeGenerator::visitExtendInt32ToInt64(LExtendInt32ToInt64* lir) {
-+ Register input = ToRegister(lir->input());
-+ Register output = ToRegister(lir->output());
-+
-+ if (lir->mir()->isUnsigned()) {
-+ masm.move32To64ZeroExtend(input, Register64(output));
-+ } else {
-+ masm.as_extsw(output, input);
-+ }
-+}
-+
-+void CodeGenerator::visitWrapInt64ToInt32(LWrapInt64ToInt32* lir) {
-+ const LInt64Allocation input = lir->input();
-+ Register output = ToRegister(lir->output());
-+
-+ if (lir->mir()->bottomHalf()) {
-+ if (input.value().isMemory()) {
-+ masm.load32(ToAddress(input), output);
-+ } else {
-+ masm.move64To32(ToRegister64(input), output);
-+ }
-+ } else {
-+ // The only producer of `bottomHalf=false` MWrapInt64ToInt32 in the
-+ // current MIR pipeline is the GPR-pair argument splitter in
-+ // WasmIonCompile.cpp, which is gated on JS_CODEGEN_REGISTER_PAIR
-+ // (32-bit ARM only). PPC64 is 64-bit and never reaches this path.
-+ // Matches the same defensive crash in x64 / ARM64 backends.
-+ MOZ_CRASH("Not implemented.");
-+ }
-+}
-+
-+void CodeGenerator::visitSignExtendInt64(LSignExtendInt64* lir) {
-+ Register64 input = ToRegister64(lir->input());
-+ Register64 output = ToOutRegister64(lir);
-+
-+ switch (lir->mir()->mode()) {
-+ case MSignExtendInt64::Byte:
-+ masm.as_extsb(output.reg, input.reg);
-+ break;
-+ case MSignExtendInt64::Half:
-+ masm.as_extsh(output.reg, input.reg);
-+ break;
-+ case MSignExtendInt64::Word:
-+ masm.as_extsw(output.reg, input.reg);
-+ break;
-+ }
-+}
-+
-+void CodeGenerator::visitWasmExtendU32Index(LWasmExtendU32Index* lir) {
-+ Register input = ToRegister(lir->input());
-+ Register output = ToRegister(lir->output());
-+ masm.move32To64ZeroExtend(input, Register64(output));
-+}
-+
-+void CodeGenerator::visitWasmWrapU32Index(LWasmWrapU32Index* lir) {
-+ Register input = ToRegister(lir->input());
-+ Register output = ToRegister(lir->output());
-+ masm.move32(input, output);
-+}
-+
-+void CodeGenerator::visitWasmTruncateToInt32(LWasmTruncateToInt32* lir) {
-+ auto input = ToFloatRegister(lir->input());
-+ auto output = ToRegister(lir->output());
-+
-+ MWasmTruncateToInt32* mir = lir->mir();
-+ MIRType fromType = mir->input()->type();
-+
-+ MOZ_ASSERT(fromType == MIRType::Double || fromType == MIRType::Float32);
-+
-+ auto* ool = new (alloc()) OutOfLineWasmTruncateCheck(mir, input, output);
-+ addOutOfLineCode(ool, mir);
-+
-+ Label* oolEntry = ool->entry();
-+ if (mir->isUnsigned()) {
-+ if (fromType == MIRType::Double) {
-+ masm.wasmTruncateDoubleToUInt32(input, output, mir->isSaturating(),
-+ oolEntry);
-+ } else if (fromType == MIRType::Float32) {
-+ masm.wasmTruncateFloat32ToUInt32(input, output, mir->isSaturating(),
-+ oolEntry);
-+ } else {
-+ MOZ_CRASH("unexpected type");
-+ }
-+
-+ masm.bind(ool->rejoin());
-+ return;
-+ }
-+
-+ if (fromType == MIRType::Double) {
-+ masm.wasmTruncateDoubleToInt32(input, output, mir->isSaturating(),
-+ oolEntry);
-+ } else if (fromType == MIRType::Float32) {
-+ masm.wasmTruncateFloat32ToInt32(input, output, mir->isSaturating(),
-+ oolEntry);
-+ } else {
-+ MOZ_CRASH("unexpected type");
-+ }
-+
-+ masm.bind(ool->rejoin());
-+}
-+
-+void CodeGenerator::visitWasmTruncateToInt64(LWasmTruncateToInt64* lir) {
-+ FloatRegister input = ToFloatRegister(lir->input());
-+ Register64 output = ToOutRegister64(lir);
-+
-+ MWasmTruncateToInt64* mir = lir->mir();
-+ MIRType fromType = mir->input()->type();
-+
-+ MOZ_ASSERT(fromType == MIRType::Double || fromType == MIRType::Float32);
-+
-+ auto* ool = new (alloc()) OutOfLineWasmTruncateCheck(mir, input, output);
-+ addOutOfLineCode(ool, mir);
-+
-+ Label* oolEntry = ool->entry();
-+ Label* oolRejoin = ool->rejoin();
-+ bool isSaturating = mir->isSaturating();
-+
-+ if (fromType == MIRType::Double) {
-+ if (mir->isUnsigned()) {
-+ masm.wasmTruncateDoubleToUInt64(input, output, isSaturating, oolEntry,
-+ oolRejoin, InvalidFloatReg);
-+ } else {
-+ masm.wasmTruncateDoubleToInt64(input, output, isSaturating, oolEntry,
-+ oolRejoin, InvalidFloatReg);
-+ }
-+ } else {
-+ if (mir->isUnsigned()) {
-+ masm.wasmTruncateFloat32ToUInt64(input, output, isSaturating, oolEntry,
-+ oolRejoin, InvalidFloatReg);
-+ } else {
-+ masm.wasmTruncateFloat32ToInt64(input, output, isSaturating, oolEntry,
-+ oolRejoin, InvalidFloatReg);
-+ }
-+ }
-+}
-+
-+void CodeGenerator::visitInt64ToFloatingPoint(LInt64ToFloatingPoint* lir) {
-+ Register64 input = ToRegister64(lir->input());
-+ FloatRegister output = ToFloatRegister(lir->output());
-+ MIRType outputType = lir->mir()->type();
-+
-+ if (outputType == MIRType::Double) {
-+ if (lir->mir()->isUnsigned()) {
-+ masm.convertUInt64ToDouble(input, output, Register::Invalid());
-+ } else {
-+ masm.convertInt64ToDouble(input, output);
-+ }
-+ } else {
-+ if (lir->mir()->isUnsigned()) {
-+ masm.convertUInt64ToFloat32(input, output, Register::Invalid());
-+ } else {
-+ masm.convertInt64ToFloat32(input, output);
-+ }
-+ }
-+}
-+
-+void CodeGenerator::visitWasmUint32ToDouble(LWasmUint32ToDouble* lir) {
-+ Register input = ToRegister(lir->input());
-+ FloatRegister output = ToFloatRegister(lir->output());
-+ masm.convertUInt32ToDouble(input, output);
-+}
-+
-+void CodeGenerator::visitWasmUint32ToFloat32(LWasmUint32ToFloat32* lir) {
-+ Register input = ToRegister(lir->input());
-+ FloatRegister output = ToFloatRegister(lir->output());
-+ masm.convertUInt32ToFloat32(input, output);
-+}
-+
-+void CodeGenerator::visitWasmBuiltinTruncateDToInt32(
-+ LWasmBuiltinTruncateDToInt32* lir) {
-+ emitTruncateDouble(ToFloatRegister(lir->getOperand(0)),
-+ ToRegister(lir->getDef(0)), lir->mir());
-+}
-+
-+void CodeGenerator::visitWasmBuiltinTruncateFToInt32(
-+ LWasmBuiltinTruncateFToInt32* lir) {
-+ emitTruncateFloat32(ToFloatRegister(lir->getOperand(0)),
-+ ToRegister(lir->getDef(0)), lir->mir());
-+}
-+
-+// ===============================================================
-+// Visitors: Wasm load/store
-+
-+template <typename T>
-+void CodeGeneratorPPC64::emitWasmLoad(T* lir) {
-+ const MWasmLoad* mir = lir->mir();
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+
-+ Register memoryBase = ToRegister(lir->memoryBase());
-+ Register ptr = ToRegister(lir->ptr());
-+ Register ptrScratch = ToTempRegisterOrInvalid(lir->temp0());
-+
-+ if (mir->base()->type() == MIRType::Int32) {
-+ masm.move32To64ZeroExtend(ptr, Register64(scratch));
-+ ptr = scratch;
-+ ptrScratch = ptrScratch != InvalidReg ? scratch : InvalidReg;
-+ }
-+
-+ masm.wasmLoad(mir->access(), memoryBase, ptr, ptrScratch,
-+ ToAnyRegister(lir->output()));
-+}
-+
-+template <typename T>
-+void CodeGeneratorPPC64::emitWasmStore(T* lir) {
-+ const MWasmStore* mir = lir->mir();
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+
-+ Register memoryBase = ToRegister(lir->memoryBase());
-+ Register ptr = ToRegister(lir->ptr());
-+ Register ptrScratch = ToTempRegisterOrInvalid(lir->temp0());
-+
-+ if (mir->base()->type() == MIRType::Int32) {
-+ masm.move32To64ZeroExtend(ptr, Register64(scratch));
-+ ptr = scratch;
-+ ptrScratch = ptrScratch != InvalidReg ? scratch : InvalidReg;
-+ }
-+
-+ masm.wasmStore(mir->access(), ToAnyRegister(lir->value()), memoryBase, ptr,
-+ ptrScratch);
-+}
-+
-+void CodeGenerator::visitWasmLoad(LWasmLoad* lir) { emitWasmLoad(lir); }
-+
-+void CodeGenerator::visitWasmStore(LWasmStore* lir) { emitWasmStore(lir); }
-+
-+void CodeGenerator::visitWasmLoadI64(LWasmLoadI64* lir) {
-+ const MWasmLoad* mir = lir->mir();
-+
-+ Register memoryBase = ToRegister(lir->memoryBase());
-+ Register ptrScratch = ToTempRegisterOrInvalid(lir->temp0());
-+
-+ Register ptrReg = ToRegister(lir->ptr());
-+ if (mir->base()->type() == MIRType::Int32) {
-+ masm.move32ZeroExtendToPtr(ptrReg, ptrReg);
-+ }
-+
-+ masm.wasmLoadI64(mir->access(), memoryBase, ptrReg, ptrScratch,
-+ ToOutRegister64(lir));
-+}
-+
-+void CodeGenerator::visitWasmStoreI64(LWasmStoreI64* lir) {
-+ const MWasmStore* mir = lir->mir();
-+
-+ Register memoryBase = ToRegister(lir->memoryBase());
-+ Register ptrScratch = ToTempRegisterOrInvalid(lir->temp0());
-+
-+ Register ptrReg = ToRegister(lir->ptr());
-+ if (mir->base()->type() == MIRType::Int32) {
-+ masm.move32ZeroExtendToPtr(ptrReg, ptrReg);
-+ }
-+
-+ masm.wasmStoreI64(mir->access(), ToRegister64(lir->value()), memoryBase,
-+ ptrReg, ptrScratch);
-+}
-+
-+void CodeGenerator::visitAsmJSLoadHeap(LAsmJSLoadHeap* ins) {
-+ const MAsmJSLoadHeap* mir = ins->mir();
-+ MOZ_ASSERT(!mir->hasMemoryBase());
-+
-+ const LAllocation* ptr = ins->ptr();
-+ const LDefinition* output = ins->output();
-+ const LAllocation* boundsCheckLimit = ins->boundsCheckLimit();
-+
-+ Register ptrReg = ToRegister(ptr);
-+ Scalar::Type accessType = mir->accessType();
-+ bool isFloat = accessType == Scalar::Float32 || accessType == Scalar::Float64;
-+ Label done;
-+
-+ if (mir->needsBoundsCheck()) {
-+ Label boundsCheckPassed;
-+ Register boundsCheckLimitReg = ToRegister(boundsCheckLimit);
-+ masm.wasmBoundsCheck32(Assembler::Below, ptrReg, boundsCheckLimitReg,
-+ &boundsCheckPassed);
-+ if (isFloat) {
-+ if (accessType == Scalar::Float32) {
-+ masm.loadConstantFloat32(GenericNaN(), ToFloatRegister(output));
-+ } else {
-+ masm.loadConstantDouble(GenericNaN(), ToFloatRegister(output));
-+ }
-+ } else {
-+ masm.movePtr(ImmWord(0), ToRegister(output));
-+ }
-+ masm.jump(&done);
-+ masm.bind(&boundsCheckPassed);
-+ }
-+
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.move32To64ZeroExtend(ptrReg, Register64(scratch));
-+
-+ switch (accessType) {
-+ case Scalar::Int8:
-+ masm.as_lbzx(ToRegister(output), HeapReg, scratch);
-+ masm.as_extsb(ToRegister(output), ToRegister(output));
-+ break;
-+ case Scalar::Uint8:
-+ masm.as_lbzx(ToRegister(output), HeapReg, scratch);
-+ break;
-+ case Scalar::Int16:
-+ masm.as_lhax(ToRegister(output), HeapReg, scratch);
-+ break;
-+ case Scalar::Uint16:
-+ masm.as_lhzx(ToRegister(output), HeapReg, scratch);
-+ break;
-+ case Scalar::Int32:
-+ masm.as_lwzx(ToRegister(output), HeapReg, scratch);
-+ masm.as_extsw(ToRegister(output), ToRegister(output));
-+ break;
-+ case Scalar::Uint32:
-+ masm.as_lwzx(ToRegister(output), HeapReg, scratch);
-+ break;
-+ case Scalar::Float64:
-+ masm.as_lfdx(ToFloatRegister(output), HeapReg, scratch);
-+ break;
-+ case Scalar::Float32:
-+ masm.as_lfsx(ToFloatRegister(output), HeapReg, scratch);
-+ break;
-+ default:
-+ MOZ_CRASH("unexpected array type");
-+ }
-+
-+ if (done.used()) {
-+ masm.bind(&done);
-+ }
-+}
-+
-+void CodeGenerator::visitAsmJSStoreHeap(LAsmJSStoreHeap* ins) {
-+ const MAsmJSStoreHeap* mir = ins->mir();
-+ MOZ_ASSERT(!mir->hasMemoryBase());
-+
-+ const LAllocation* value = ins->value();
-+ const LAllocation* ptr = ins->ptr();
-+ const LAllocation* boundsCheckLimit = ins->boundsCheckLimit();
-+
-+ Register ptrReg = ToRegister(ptr);
-+
-+ Label done;
-+ if (mir->needsBoundsCheck()) {
-+ Register boundsCheckLimitReg = ToRegister(boundsCheckLimit);
-+ masm.wasmBoundsCheck32(Assembler::AboveOrEqual, ptrReg, boundsCheckLimitReg,
-+ &done);
-+ }
-+
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.move32To64ZeroExtend(ptrReg, Register64(scratch));
-+
-+ switch (mir->accessType()) {
-+ case Scalar::Int8:
-+ case Scalar::Uint8:
-+ masm.as_stbx(ToRegister(value), HeapReg, scratch);
-+ break;
-+ case Scalar::Int16:
-+ case Scalar::Uint16:
-+ masm.as_sthx(ToRegister(value), HeapReg, scratch);
-+ break;
-+ case Scalar::Int32:
-+ case Scalar::Uint32:
-+ masm.as_stwx(ToRegister(value), HeapReg, scratch);
-+ break;
-+ case Scalar::Float64:
-+ masm.as_stfdx(ToFloatRegister(value), HeapReg, scratch);
-+ break;
-+ case Scalar::Float32:
-+ masm.as_stfsx(ToFloatRegister(value), HeapReg, scratch);
-+ break;
-+ default:
-+ MOZ_CRASH("unexpected array type");
-+ }
-+
-+ if (done.used()) {
-+ masm.bind(&done);
-+ }
-+}
-+
-+void CodeGenerator::visitWasmStackArg(LWasmStackArg* ins) {
-+ const MWasmStackArg* mir = ins->mir();
-+ if (ins->arg()->isConstant()) {
-+ masm.storePtr(ImmWord(ToInt32(ins->arg())),
-+ Address(StackPointer, mir->spOffset()));
-+ } else {
-+ if (ins->arg()->isGeneralReg()) {
-+ masm.storePtr(ToRegister(ins->arg()),
-+ Address(StackPointer, mir->spOffset()));
-+ } else if (mir->input()->type() == MIRType::Double) {
-+ masm.storeDouble(ToFloatRegister(ins->arg()),
-+ Address(StackPointer, mir->spOffset()));
-+#ifdef ENABLE_WASM_SIMD
-+ } else if (mir->input()->type() == MIRType::Simd128) {
-+ masm.storeUnalignedSimd128(ToFloatRegister(ins->arg()),
-+ Address(StackPointer, mir->spOffset()));
-+#endif
-+ } else {
-+ masm.storeFloat32(ToFloatRegister(ins->arg()),
-+ Address(StackPointer, mir->spOffset()));
-+ }
-+ }
-+}
-+
-+void CodeGenerator::visitWasmStackArgI64(LWasmStackArgI64* ins) {
-+ const MWasmStackArg* mir = ins->mir();
-+ Address dst(StackPointer, mir->spOffset());
-+ if (IsConstant(ins->arg())) {
-+ masm.store64(Imm64(ToInt64(ins->arg())), dst);
-+ } else {
-+ masm.store64(ToRegister64(ins->arg()), dst);
-+ }
-+}
-+
-+void CodeGenerator::visitWasmSelect(LWasmSelect* ins) {
-+ MIRType mirType = ins->mir()->type();
-+
-+ Register cond = ToRegister(ins->condExpr());
-+ const LAllocation* falseExpr = ins->falseExpr();
-+
-+ if (mirType == MIRType::Int32 || mirType == MIRType::WasmAnyRef) {
-+ Register out = ToRegister(ins->output());
-+ MOZ_ASSERT(ToRegister(ins->trueExpr()) == out,
-+ "true expr input is reused for output");
-+ if (falseExpr->isGeneralReg()) {
-+ masm.moveIfZero(out, ToRegister(falseExpr), cond);
-+ } else {
-+ masm.cmp32Load32(Assembler::Zero, cond, cond, ToAddress(falseExpr), out);
-+ }
-+ return;
-+ }
-+
-+ FloatRegister out = ToFloatRegister(ins->output());
-+ MOZ_ASSERT(ToFloatRegister(ins->trueExpr()) == out,
-+ "true expr input is reused for output");
-+
-+ if (falseExpr->isFloatReg()) {
-+ Label done;
-+ // The select condition is a 32-bit value; test 32 bits so high-bit garbage
-+ // does not make a zero condition read as non-zero.
-+ masm.branchTest32(Assembler::NonZero, cond, cond, &done);
-+ if (mirType == MIRType::Float32) {
-+ masm.moveFloat32(ToFloatRegister(falseExpr), out);
-+ } else if (mirType == MIRType::Double) {
-+ masm.moveDouble(ToFloatRegister(falseExpr), out);
-+ } else if (mirType == MIRType::Simd128) {
-+ masm.moveSimd128(ToFloatRegister(falseExpr), out);
-+ } else {
-+ MOZ_CRASH("unhandled type in visitWasmSelect!");
-+ }
-+ masm.bind(&done);
-+ } else {
-+ Label done;
-+ // The select condition is a 32-bit value; test 32 bits so high-bit garbage
-+ // does not make a zero condition read as non-zero.
-+ masm.branchTest32(Assembler::NonZero, cond, cond, &done);
-+
-+ if (mirType == MIRType::Float32) {
-+ masm.loadFloat32(ToAddress(falseExpr), out);
-+ } else if (mirType == MIRType::Double) {
-+ masm.loadDouble(ToAddress(falseExpr), out);
-+ } else if (mirType == MIRType::Simd128) {
-+ masm.loadUnalignedSimd128(ToAddress(falseExpr), out);
-+ } else {
-+ MOZ_CRASH("unhandled type in visitWasmSelect!");
-+ }
-+
-+ masm.bind(&done);
-+ }
-+}
-+
-+void CodeGenerator::visitWasmSelectI64(LWasmSelectI64* lir) {
-+ MOZ_ASSERT(lir->mir()->type() == MIRType::Int64);
-+
-+ Register cond = ToRegister(lir->condExpr());
-+ LInt64Allocation falseExpr = lir->falseExpr();
-+
-+ Register64 out = ToOutRegister64(lir);
-+ MOZ_ASSERT(ToRegister64(lir->trueExpr()) == out,
-+ "true expr is reused for input");
-+
-+ if (falseExpr.value().isGeneralReg()) {
-+ masm.moveIfZero(out.reg, ToRegister(falseExpr.value()), cond);
-+ } else {
-+ Label done;
-+ // The select condition is a 32-bit value; test 32 bits so high-bit garbage
-+ // does not make a zero condition read as non-zero.
-+ masm.branchTest32(Assembler::NonZero, cond, cond, &done);
-+ masm.loadPtr(ToAddress(falseExpr.value()), out.reg);
-+ masm.bind(&done);
-+ }
-+}
-+
-+void CodeGenerator::visitWasmCompareAndSelect(LWasmCompareAndSelect* ins) {
-+ MCompare::CompareType compTy = ins->compareType();
-+ MIRType insTy = ins->mir()->type();
-+ const bool cmpIs32 = compTy == MCompare::Compare_Int32 ||
-+ compTy == MCompare::Compare_UInt32;
-+ const bool cmpIs64 = compTy == MCompare::Compare_Int64 ||
-+ compTy == MCompare::Compare_UInt64;
-+ const bool selIsInt = insTy == MIRType::Int32 || insTy == MIRType::Int64;
-+
-+ MOZ_RELEASE_ASSERT(
-+ (cmpIs32 || cmpIs64) && selIsInt,
-+ "CodeGenerator::visitWasmCompareAndSelect: unexpected types");
-+
-+ Register trueExprAndDest = ToRegister(ins->output());
-+ MOZ_ASSERT(ToRegister(ins->ifTrueExpr()) == trueExprAndDest,
-+ "true expr input is reused for output");
-+
-+ Assembler::Condition cond =
-+ Assembler::InvertCondition(JSOpToCondition(compTy, ins->jsop()));
-+ Register lhs = ToRegister(ins->leftExpr());
-+ Register rhs = ToRegister(ins->rightExpr());
-+ Register falseExpr = ToRegister(ins->ifFalseExpr());
-+
-+ // isel operates on the whole 64-bit GPR regardless of compare width; only
-+ // the compare instruction differs (cmpw/cmplw vs cmpd/cmpld).
-+ if (cmpIs32) {
-+ masm.cmp32Move32(cond, lhs, rhs, falseExpr, trueExprAndDest);
-+ } else {
-+ masm.cmpPtrMovePtr(cond, lhs, rhs, falseExpr, trueExprAndDest);
-+ }
-+}
-+
-+void CodeGenerator::visitWasmAddOffset(LWasmAddOffset* lir) {
-+ MWasmAddOffset* mir = lir->mir();
-+ Register base = ToRegister(lir->base());
-+ Register out = ToRegister(lir->output());
-+
-+ Label ok;
-+ masm.ma_add32TestCarry(Assembler::CarryClear, out, base, Imm32(mir->offset()),
-+ &ok);
-+ masm.wasmTrap(wasm::Trap::OutOfBounds, mir->trapSiteDesc());
-+ masm.bind(&ok);
-+}
-+
-+void CodeGenerator::visitWasmAddOffset64(LWasmAddOffset64* lir) {
-+ MWasmAddOffset* mir = lir->mir();
-+ Register64 base = ToRegister64(lir->base());
-+ Register64 out = ToOutRegister64(lir);
-+
-+ Label ok;
-+ masm.ma_addPtrTestCarry(Assembler::CarryClear, out.reg, base.reg,
-+ ImmWord(mir->offset()), &ok);
-+ masm.wasmTrap(wasm::Trap::OutOfBounds, mir->trapSiteDesc());
-+ masm.bind(&ok);
-+}
-+
-+// ===============================================================
-+// Visitors: Effective Address
-+
-+void CodeGenerator::visitEffectiveAddress2(LEffectiveAddress2* ins) {
-+ const MEffectiveAddress2* mir = ins->mir();
-+ Register output = ToRegister(ins->output());
-+
-+ // EA = index * scale + displacement (no base register)
-+ masm.movePtr(ImmWord(0), output);
-+ BaseIndex addr(output, ToRegister(ins->index()), mir->scale(),
-+ mir->displacement());
-+ masm.computeEffectiveAddress(addr, output);
-+ // Sign-extend to 32-bit
-+ masm.as_extsw(output, output);
-+}
-+
-+void CodeGenerator::visitEffectiveAddress3(LEffectiveAddress3* ins) {
-+ const MEffectiveAddress3* mir = ins->mir();
-+ Register output = ToRegister(ins->output());
-+
-+ BaseIndex addr(ToRegister(ins->base()), ToRegister(ins->index()),
-+ mir->scale(), mir->displacement());
-+ masm.computeEffectiveAddress(addr, output);
-+ // Sign-extend to 32-bit
-+ masm.as_extsw(output, output);
-+}
-+
-+void CodeGenerator::visitWasmMulI64WideHI64(LWasmMulI64WideHI64* ins) {
-+ Register lhs = ToRegister(ins->lhs());
-+ Register rhs = ToRegister(ins->rhs());
-+ Register output = ToRegister(ins->output());
-+
-+ if (ins->isSigned()) {
-+ masm.as_mulhd(output, lhs, rhs);
-+ } else {
-+ masm.as_mulhdu(output, lhs, rhs);
-+ }
-+}
-+
-+// ===============================================================
-+// Visitors: Typed Array Atomics
-+
-+void CodeGenerator::visitCompareExchangeTypedArrayElement(
-+ LCompareExchangeTypedArrayElement* lir) {
-+ Register elements = ToRegister(lir->elements());
-+ AnyRegister output = ToAnyRegister(lir->output());
-+ Register outTemp = ToTempRegisterOrInvalid(lir->temp0());
-+
-+ Register oldval = ToRegister(lir->oldval());
-+ Register newval = ToRegister(lir->newval());
-+ Register valueTemp = ToTempRegisterOrInvalid(lir->temp1());
-+ Register offsetTemp = ToTempRegisterOrInvalid(lir->temp2());
-+ Register maskTemp = ToTempRegisterOrInvalid(lir->temp3());
-+ Scalar::Type arrayType = lir->mir()->arrayType();
-+
-+ auto dest = ToAddressOrBaseIndex(elements, lir->index(), arrayType);
-+
-+ dest.match([&](const auto& dest) {
-+ masm.compareExchangeJS(arrayType, Synchronization::Full(), dest, oldval,
-+ newval, valueTemp, offsetTemp, maskTemp, outTemp,
-+ output);
-+ });
-+}
-+
-+void CodeGenerator::visitAtomicExchangeTypedArrayElement(
-+ LAtomicExchangeTypedArrayElement* lir) {
-+ Register elements = ToRegister(lir->elements());
-+ AnyRegister output = ToAnyRegister(lir->output());
-+ Register outTemp = ToTempRegisterOrInvalid(lir->temp0());
-+
-+ Register value = ToRegister(lir->value());
-+ Register valueTemp = ToTempRegisterOrInvalid(lir->temp1());
-+ Register offsetTemp = ToTempRegisterOrInvalid(lir->temp2());
-+ Register maskTemp = ToTempRegisterOrInvalid(lir->temp3());
-+ Scalar::Type arrayType = lir->mir()->arrayType();
-+
-+ auto dest = ToAddressOrBaseIndex(elements, lir->index(), arrayType);
-+
-+ dest.match([&](const auto& dest) {
-+ masm.atomicExchangeJS(arrayType, Synchronization::Full(), dest, value,
-+ valueTemp, offsetTemp, maskTemp, outTemp, output);
-+ });
-+}
-+
-+void CodeGenerator::visitAtomicTypedArrayElementBinop(
-+ LAtomicTypedArrayElementBinop* lir) {
-+ MOZ_ASSERT(!lir->mir()->isForEffect());
-+
-+ AnyRegister output = ToAnyRegister(lir->output());
-+ Register elements = ToRegister(lir->elements());
-+ Register outTemp = ToTempRegisterOrInvalid(lir->temp0());
-+ Register valueTemp = ToTempRegisterOrInvalid(lir->temp1());
-+ Register offsetTemp = ToTempRegisterOrInvalid(lir->temp2());
-+ Register maskTemp = ToTempRegisterOrInvalid(lir->temp3());
-+ Register value = ToRegister(lir->value());
-+ Scalar::Type arrayType = lir->mir()->arrayType();
-+
-+ auto mem = ToAddressOrBaseIndex(elements, lir->index(), arrayType);
-+
-+ mem.match([&](const auto& mem) {
-+ masm.atomicFetchOpJS(arrayType, Synchronization::Full(),
-+ lir->mir()->operation(), value, mem, valueTemp,
-+ offsetTemp, maskTemp, outTemp, output);
-+ });
-+}
-+
-+void CodeGenerator::visitAtomicTypedArrayElementBinopForEffect(
-+ LAtomicTypedArrayElementBinopForEffect* lir) {
-+ MOZ_ASSERT(lir->mir()->isForEffect());
-+
-+ Register elements = ToRegister(lir->elements());
-+ Register valueTemp = ToTempRegisterOrInvalid(lir->temp0());
-+ Register offsetTemp = ToTempRegisterOrInvalid(lir->temp1());
-+ Register maskTemp = ToTempRegisterOrInvalid(lir->temp2());
-+ Register value = ToRegister(lir->value());
-+ Scalar::Type arrayType = lir->mir()->arrayType();
-+
-+ auto mem = ToAddressOrBaseIndex(elements, lir->index(), arrayType);
-+
-+ mem.match([&](const auto& mem) {
-+ masm.atomicEffectOpJS(arrayType, Synchronization::Full(),
-+ lir->mir()->operation(), value, mem, valueTemp,
-+ offsetTemp, maskTemp);
-+ });
-+}
-+
-+void CodeGenerator::visitCompareExchangeTypedArrayElement64(
-+ LCompareExchangeTypedArrayElement64* lir) {
-+ Register elements = ToRegister(lir->elements());
-+ Register64 oldval = ToRegister64(lir->oldval());
-+ Register64 newval = ToRegister64(lir->newval());
-+ Register64 out = ToOutRegister64(lir);
-+ Scalar::Type arrayType = lir->mir()->arrayType();
-+
-+ auto dest = ToAddressOrBaseIndex(elements, lir->index(), arrayType);
-+
-+ dest.match([&](const auto& dest) {
-+ masm.compareExchange64(Synchronization::Full(), dest, oldval, newval, out);
-+ });
-+}
-+
-+void CodeGenerator::visitAtomicExchangeTypedArrayElement64(
-+ LAtomicExchangeTypedArrayElement64* lir) {
-+ Register elements = ToRegister(lir->elements());
-+ Register64 value = ToRegister64(lir->value());
-+ Register64 out = ToOutRegister64(lir);
-+ Scalar::Type arrayType = lir->mir()->arrayType();
-+
-+ auto dest = ToAddressOrBaseIndex(elements, lir->index(), arrayType);
-+
-+ dest.match([&](const auto& dest) {
-+ masm.atomicExchange64(Synchronization::Full(), dest, value, out);
-+ });
-+}
-+
-+void CodeGenerator::visitAtomicTypedArrayElementBinop64(
-+ LAtomicTypedArrayElementBinop64* lir) {
-+ MOZ_ASSERT(lir->mir()->hasUses());
-+
-+ Register elements = ToRegister(lir->elements());
-+ Register64 value = ToRegister64(lir->value());
-+ Register64 temp = ToRegister64(lir->temp0());
-+ Register64 out = ToOutRegister64(lir);
-+
-+ Scalar::Type arrayType = lir->mir()->arrayType();
-+ AtomicOp atomicOp = lir->mir()->operation();
-+
-+ auto dest = ToAddressOrBaseIndex(elements, lir->index(), arrayType);
-+
-+ dest.match([&](const auto& dest) {
-+ masm.atomicFetchOp64(Synchronization::Full(), atomicOp, value, dest, temp,
-+ out);
-+ });
-+}
-+
-+void CodeGenerator::visitAtomicTypedArrayElementBinopForEffect64(
-+ LAtomicTypedArrayElementBinopForEffect64* lir) {
-+ MOZ_ASSERT(!lir->mir()->hasUses());
-+
-+ Register elements = ToRegister(lir->elements());
-+ Register64 value = ToRegister64(lir->value());
-+ Register64 temp = ToRegister64(lir->temp0());
-+
-+ Scalar::Type arrayType = lir->mir()->arrayType();
-+ AtomicOp atomicOp = lir->mir()->operation();
-+
-+ auto dest = ToAddressOrBaseIndex(elements, lir->index(), arrayType);
-+
-+ dest.match([&](const auto& dest) {
-+ masm.atomicEffectOp64(Synchronization::Full(), atomicOp, value, dest, temp);
-+ });
-+}
-+
-+void CodeGenerator::visitAtomicLoad64(LAtomicLoad64* lir) {
-+ Register elements = ToRegister(lir->elements());
-+ Register64 out = ToOutRegister64(lir);
-+ Scalar::Type storageType = lir->mir()->storageType();
-+
-+ auto source = ToAddressOrBaseIndex(elements, lir->index(), storageType);
-+
-+ auto sync = Synchronization::Load();
-+ masm.memoryBarrierBefore(sync);
-+ source.match([&](const auto& source) { masm.load64(source, out); });
-+ masm.memoryBarrierAfter(sync);
-+}
-+
-+void CodeGenerator::visitAtomicStore64(LAtomicStore64* lir) {
-+ Register elements = ToRegister(lir->elements());
-+ Register64 value = ToRegister64(lir->value());
-+ Scalar::Type writeType = lir->mir()->writeType();
-+
-+ auto dest = ToAddressOrBaseIndex(elements, lir->index(), writeType);
-+
-+ auto sync = Synchronization::Store();
-+ masm.memoryBarrierBefore(sync);
-+ dest.match([&](const auto& dest) { masm.store64(value, dest); });
-+ masm.memoryBarrierAfter(sync);
-+}
-+
-+// Wasm Atomics
-+void CodeGenerator::visitWasmCompareExchangeHeap(
-+ LWasmCompareExchangeHeap* ins) {
-+ MWasmCompareExchangeHeap* mir = ins->mir();
-+ Register memoryBase = ToRegister(ins->memoryBase());
-+ Register ptrReg = ToRegister(ins->ptr());
-+ BaseIndex srcAddr(memoryBase, ptrReg, TimesOne, mir->access().offset32());
-+
-+ Register oldval = ToRegister(ins->oldValue());
-+ Register newval = ToRegister(ins->newValue());
-+ Register valueTemp = ToTempRegisterOrInvalid(ins->temp0());
-+ Register offsetTemp = ToTempRegisterOrInvalid(ins->temp1());
-+ Register maskTemp = ToTempRegisterOrInvalid(ins->temp2());
-+
-+ masm.wasmCompareExchange(mir->access(), srcAddr, oldval, newval, valueTemp,
-+ offsetTemp, maskTemp, ToRegister(ins->output()));
-+}
-+
-+void CodeGenerator::visitWasmAtomicExchangeHeap(LWasmAtomicExchangeHeap* ins) {
-+ MWasmAtomicExchangeHeap* mir = ins->mir();
-+ Register memoryBase = ToRegister(ins->memoryBase());
-+ Register ptrReg = ToRegister(ins->ptr());
-+ Register value = ToRegister(ins->value());
-+ BaseIndex srcAddr(memoryBase, ptrReg, TimesOne, mir->access().offset32());
-+
-+ Register valueTemp = ToTempRegisterOrInvalid(ins->temp0());
-+ Register offsetTemp = ToTempRegisterOrInvalid(ins->temp1());
-+ Register maskTemp = ToTempRegisterOrInvalid(ins->temp2());
-+
-+ masm.wasmAtomicExchange(mir->access(), srcAddr, value, valueTemp, offsetTemp,
-+ maskTemp, ToRegister(ins->output()));
-+}
-+
-+void CodeGenerator::visitWasmAtomicBinopHeap(LWasmAtomicBinopHeap* ins) {
-+ MOZ_ASSERT(ins->mir()->hasUses());
-+
-+ MWasmAtomicBinopHeap* mir = ins->mir();
-+ Register memoryBase = ToRegister(ins->memoryBase());
-+ Register ptrReg = ToRegister(ins->ptr());
-+ Register valueTemp = ToTempRegisterOrInvalid(ins->temp0());
-+ Register offsetTemp = ToTempRegisterOrInvalid(ins->temp1());
-+ Register maskTemp = ToTempRegisterOrInvalid(ins->temp2());
-+
-+ BaseIndex srcAddr(memoryBase, ptrReg, TimesOne, mir->access().offset32());
-+
-+ masm.wasmAtomicFetchOp(mir->access(), mir->operation(),
-+ ToRegister(ins->value()), srcAddr, valueTemp,
-+ offsetTemp, maskTemp, ToRegister(ins->output()));
-+}
-+
-+void CodeGenerator::visitWasmAtomicBinopHeapForEffect(
-+ LWasmAtomicBinopHeapForEffect* ins) {
-+ MOZ_ASSERT(!ins->mir()->hasUses());
-+
-+ MWasmAtomicBinopHeap* mir = ins->mir();
-+ Register memoryBase = ToRegister(ins->memoryBase());
-+ Register ptrReg = ToRegister(ins->ptr());
-+ Register valueTemp = ToTempRegisterOrInvalid(ins->temp0());
-+ Register offsetTemp = ToTempRegisterOrInvalid(ins->temp1());
-+ Register maskTemp = ToTempRegisterOrInvalid(ins->temp2());
-+
-+ BaseIndex srcAddr(memoryBase, ptrReg, TimesOne, mir->access().offset32());
-+ masm.wasmAtomicEffectOp(mir->access(), mir->operation(),
-+ ToRegister(ins->value()), srcAddr, valueTemp,
-+ offsetTemp, maskTemp);
-+}
-+
-+void CodeGenerator::visitWasmCompareExchangeI64(LWasmCompareExchangeI64* lir) {
-+ Register memoryBase = ToRegister(lir->memoryBase());
-+ Register ptr = ToRegister(lir->ptr());
-+ Register64 oldValue = ToRegister64(lir->oldValue());
-+ Register64 newValue = ToRegister64(lir->newValue());
-+ Register64 output = ToOutRegister64(lir);
-+ uint32_t offset = lir->mir()->access().offset32();
-+
-+ BaseIndex addr(memoryBase, ptr, TimesOne, offset);
-+ masm.wasmCompareExchange64(lir->mir()->access(), addr, oldValue, newValue,
-+ output);
-+}
-+
-+void CodeGenerator::visitWasmAtomicExchangeI64(LWasmAtomicExchangeI64* lir) {
-+ Register memoryBase = ToRegister(lir->memoryBase());
-+ Register ptr = ToRegister(lir->ptr());
-+ Register64 value = ToRegister64(lir->value());
-+ Register64 output = ToOutRegister64(lir);
-+ uint32_t offset = lir->mir()->access().offset32();
-+
-+ BaseIndex addr(memoryBase, ptr, TimesOne, offset);
-+ masm.wasmAtomicExchange64(lir->mir()->access(), addr, value, output);
-+}
-+
-+void CodeGenerator::visitWasmAtomicBinopI64(LWasmAtomicBinopI64* lir) {
-+ Register memoryBase = ToRegister(lir->memoryBase());
-+ Register ptr = ToRegister(lir->ptr());
-+ Register64 value = ToRegister64(lir->value());
-+ Register64 output = ToOutRegister64(lir);
-+ Register64 temp = ToRegister64(lir->temp0());
-+ uint32_t offset = lir->mir()->access().offset32();
-+
-+ BaseIndex addr(memoryBase, ptr, TimesOne, offset);
-+
-+ masm.wasmAtomicFetchOp64(lir->mir()->access(), lir->mir()->operation(), value,
-+ addr, temp, output);
-+}
-+
-+// SIMD code generators.
-+void CodeGenerator::visitSimd128(LSimd128* ins) {
-+ FloatRegister dest = ToFloatRegister(ins->output());
-+ masm.loadConstantSimd128(ins->simd128(), dest);
-+}
-+void CodeGenerator::visitWasmTernarySimd128(LWasmTernarySimd128* ins) {
-+ FloatRegister v0 = ToFloatRegister(ins->v0());
-+ FloatRegister v1 = ToFloatRegister(ins->v1());
-+ FloatRegister v2 = ToFloatRegister(ins->v2());
-+ FloatRegister dest = ToFloatRegister(ins->output());
-+ switch (ins->simdOp()) {
-+ case wasm::SimdOp::V128Bitselect:
-+ // bitselect(v0, v1, v2): result = (v0 & v2) | (v1 & ~v2)
-+ // xxsel: XC=0→XA, XC=1→XB → (XA & ~XC) | (XB & XC)
-+ // Need XA=v1, XB=v0, XC=v2.
-+ masm.as_xxsel(dest, v1, v0, v2);
-+ break;
-+ case wasm::SimdOp::I8x16RelaxedLaneSelect:
-+ case wasm::SimdOp::I16x8RelaxedLaneSelect:
-+ case wasm::SimdOp::I32x4RelaxedLaneSelect:
-+ case wasm::SimdOp::I64x2RelaxedLaneSelect:
-+ // relaxed laneSelect(v0, v1, mask=v2): same as bitselect
-+ masm.as_xxsel(dest, v1, v0, v2);
-+ break;
-+ // Lowering uses defineReuseInput on V2Index for ternary ops — the
-+ // allocator is required to place `dest` in v2's slot. Assert that
-+ // here; the FMA/dot helpers write their result through v2 in-place,
-+ // so dest == v2 makes the trailing moveSimd128 unnecessary.
-+ case wasm::SimdOp::I32x4RelaxedDotI8x16I7x16AddS:
-+ MOZ_ASSERT(dest == v2);
-+ masm.dotInt8x16Int7x16ThenAdd(v0, v1, v2,
-+ ToFloatRegister(ins->temp0()));
-+ break;
-+ case wasm::SimdOp::F32x4RelaxedMadd:
-+ MOZ_ASSERT(dest == v2);
-+ masm.fmaFloat32x4(v0, v1, v2);
-+ break;
-+ case wasm::SimdOp::F64x2RelaxedMadd:
-+ MOZ_ASSERT(dest == v2);
-+ masm.fmaFloat64x2(v0, v1, v2);
-+ break;
-+ case wasm::SimdOp::F32x4RelaxedNmadd:
-+ MOZ_ASSERT(dest == v2);
-+ masm.fnmaFloat32x4(v0, v1, v2);
-+ break;
-+ case wasm::SimdOp::F64x2RelaxedNmadd:
-+ MOZ_ASSERT(dest == v2);
-+ masm.fnmaFloat64x2(v0, v1, v2);
-+ break;
-+ default:
-+ MOZ_CRASH("PPC64: NYI SIMD ternary op");
-+ }
-+}
-+void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
-+ FloatRegister lhs = ToFloatRegister(ins->lhs());
-+ FloatRegister rhs = ToFloatRegister(ins->rhs());
-+ FloatRegister dest = ToFloatRegister(ins->output());
-+ switch (ins->simdOp()) {
-+ // Bitwise
-+ case wasm::SimdOp::V128And:
-+ masm.bitwiseAndSimd128(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::V128Or:
-+ masm.bitwiseOrSimd128(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::V128Xor:
-+ masm.bitwiseXorSimd128(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::V128AndNot:
-+ masm.bitwiseAndNotSimd128(lhs, rhs, dest);
-+ break;
-+ // Integer add
-+ case wasm::SimdOp::I8x16Add:
-+ masm.addInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8Add:
-+ masm.addInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4Add:
-+ masm.addInt32x4(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I64x2Add:
-+ masm.addInt64x2(lhs, rhs, dest);
-+ break;
-+ // Integer sub
-+ case wasm::SimdOp::I8x16Sub:
-+ masm.subInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8Sub:
-+ masm.subInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4Sub:
-+ masm.subInt32x4(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I64x2Sub:
-+ masm.subInt64x2(lhs, rhs, dest);
-+ break;
-+ // Saturating add
-+ case wasm::SimdOp::I8x16AddSatS:
-+ masm.addSatInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I8x16AddSatU:
-+ masm.unsignedAddSatInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8AddSatS:
-+ masm.addSatInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8AddSatU:
-+ masm.unsignedAddSatInt16x8(lhs, rhs, dest);
-+ break;
-+ // Saturating sub
-+ case wasm::SimdOp::I8x16SubSatS:
-+ masm.subSatInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I8x16SubSatU:
-+ masm.unsignedSubSatInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8SubSatS:
-+ masm.subSatInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8SubSatU:
-+ masm.unsignedSubSatInt16x8(lhs, rhs, dest);
-+ break;
-+ // Integer multiply
-+ case wasm::SimdOp::I16x8Mul:
-+ masm.mulInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4Mul:
-+ masm.mulInt32x4(lhs, rhs, dest);
-+ break;
-+ // Integer min/max signed
-+ case wasm::SimdOp::I8x16MinS:
-+ masm.minInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I8x16MaxS:
-+ masm.maxInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8MinS:
-+ masm.minInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8MaxS:
-+ masm.maxInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4MinS:
-+ masm.minInt32x4(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4MaxS:
-+ masm.maxInt32x4(lhs, rhs, dest);
-+ break;
-+ // Integer min/max unsigned
-+ case wasm::SimdOp::I8x16MinU:
-+ masm.unsignedMinInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I8x16MaxU:
-+ masm.unsignedMaxInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8MinU:
-+ masm.unsignedMinInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8MaxU:
-+ masm.unsignedMaxInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4MinU:
-+ masm.unsignedMinInt32x4(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4MaxU:
-+ masm.unsignedMaxInt32x4(lhs, rhs, dest);
-+ break;
-+ // Average unsigned
-+ case wasm::SimdOp::I8x16AvgrU:
-+ masm.unsignedAverageInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8AvgrU:
-+ masm.unsignedAverageInt16x8(lhs, rhs, dest);
-+ break;
-+ // Q15 multiply
-+ case wasm::SimdOp::I16x8Q15MulrSatS:
-+ masm.q15MulrSatInt16x8(lhs, rhs, dest);
-+ break;
-+ // Integer compare
-+ case wasm::SimdOp::I8x16Eq:
-+ masm.compareInt8x16(Assembler::Equal, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I8x16Ne:
-+ masm.compareInt8x16(Assembler::NotEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I8x16LtS:
-+ masm.compareInt8x16(Assembler::LessThan, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I8x16GtS:
-+ masm.compareInt8x16(Assembler::GreaterThan, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I8x16LeS:
-+ masm.compareInt8x16(Assembler::LessThanOrEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I8x16GeS:
-+ masm.compareInt8x16(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I8x16LtU:
-+ masm.compareInt8x16(Assembler::Below, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I8x16GtU:
-+ masm.compareInt8x16(Assembler::Above, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I8x16LeU:
-+ masm.compareInt8x16(Assembler::BelowOrEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I8x16GeU:
-+ masm.compareInt8x16(Assembler::AboveOrEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8Eq:
-+ masm.compareInt16x8(Assembler::Equal, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8Ne:
-+ masm.compareInt16x8(Assembler::NotEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8LtS:
-+ masm.compareInt16x8(Assembler::LessThan, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8GtS:
-+ masm.compareInt16x8(Assembler::GreaterThan, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8LeS:
-+ masm.compareInt16x8(Assembler::LessThanOrEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8GeS:
-+ masm.compareInt16x8(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8LtU:
-+ masm.compareInt16x8(Assembler::Below, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8GtU:
-+ masm.compareInt16x8(Assembler::Above, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8LeU:
-+ masm.compareInt16x8(Assembler::BelowOrEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8GeU:
-+ masm.compareInt16x8(Assembler::AboveOrEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4Eq:
-+ masm.compareInt32x4(Assembler::Equal, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4Ne:
-+ masm.compareInt32x4(Assembler::NotEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4LtS:
-+ masm.compareInt32x4(Assembler::LessThan, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4GtS:
-+ masm.compareInt32x4(Assembler::GreaterThan, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4LeS:
-+ masm.compareInt32x4(Assembler::LessThanOrEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4GeS:
-+ masm.compareInt32x4(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4LtU:
-+ masm.compareInt32x4(Assembler::Below, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4GtU:
-+ masm.compareInt32x4(Assembler::Above, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4LeU:
-+ masm.compareInt32x4(Assembler::BelowOrEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4GeU:
-+ masm.compareInt32x4(Assembler::AboveOrEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I64x2Eq:
-+ masm.compareInt64x2(Assembler::Equal, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I64x2Ne:
-+ masm.compareInt64x2(Assembler::NotEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I64x2LtS:
-+ masm.compareInt64x2(Assembler::LessThan, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I64x2GtS:
-+ masm.compareInt64x2(Assembler::GreaterThan, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I64x2LeS:
-+ masm.compareInt64x2(Assembler::LessThanOrEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I64x2GeS:
-+ masm.compareInt64x2(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
-+ break;
-+ // Float compare
-+ case wasm::SimdOp::F32x4Eq:
-+ masm.compareFloat32x4(Assembler::Equal, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F32x4Ne:
-+ masm.compareFloat32x4(Assembler::NotEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F32x4Lt:
-+ masm.compareFloat32x4(Assembler::LessThan, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F32x4Gt:
-+ masm.compareFloat32x4(Assembler::GreaterThan, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F32x4Le:
-+ masm.compareFloat32x4(Assembler::LessThanOrEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F32x4Ge:
-+ masm.compareFloat32x4(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Eq:
-+ masm.compareFloat64x2(Assembler::Equal, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Ne:
-+ masm.compareFloat64x2(Assembler::NotEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Lt:
-+ masm.compareFloat64x2(Assembler::LessThan, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Gt:
-+ masm.compareFloat64x2(Assembler::GreaterThan, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Le:
-+ masm.compareFloat64x2(Assembler::LessThanOrEqual, lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Ge:
-+ masm.compareFloat64x2(Assembler::GreaterThanOrEqual, lhs, rhs, dest);
-+ break;
-+ // Float arithmetic
-+ case wasm::SimdOp::F32x4Add:
-+ masm.addFloat32x4(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F32x4Sub:
-+ masm.subFloat32x4(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F32x4Mul:
-+ masm.mulFloat32x4(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F32x4Div:
-+ masm.divFloat32x4(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F32x4Min:
-+ masm.minFloat32x4(lhs, rhs, dest, ToFloatRegister(ins->getTemp(0)),
-+ ToFloatRegister(ins->getTemp(1)));
-+ break;
-+ case wasm::SimdOp::F32x4Max:
-+ masm.maxFloat32x4(lhs, rhs, dest, ToFloatRegister(ins->getTemp(0)),
-+ ToFloatRegister(ins->getTemp(1)));
-+ break;
-+ case wasm::SimdOp::F32x4PMin:
-+ masm.pseudoMinFloat32x4(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F32x4PMax:
-+ masm.pseudoMaxFloat32x4(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Add:
-+ masm.addFloat64x2(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Sub:
-+ masm.subFloat64x2(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Mul:
-+ masm.mulFloat64x2(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Div:
-+ masm.divFloat64x2(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Min:
-+ masm.minFloat64x2(lhs, rhs, dest, ToFloatRegister(ins->getTemp(0)),
-+ ToFloatRegister(ins->getTemp(1)));
-+ break;
-+ case wasm::SimdOp::F64x2Max:
-+ masm.maxFloat64x2(lhs, rhs, dest, ToFloatRegister(ins->getTemp(0)),
-+ ToFloatRegister(ins->getTemp(1)));
-+ break;
-+ case wasm::SimdOp::F64x2PMin:
-+ masm.pseudoMinFloat64x2(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::F64x2PMax:
-+ masm.pseudoMaxFloat64x2(lhs, rhs, dest);
-+ break;
-+ // Narrow
-+ case wasm::SimdOp::I8x16NarrowI16x8S:
-+ masm.narrowInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I8x16NarrowI16x8U:
-+ masm.unsignedNarrowInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8NarrowI32x4S:
-+ masm.narrowInt32x4(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8NarrowI32x4U:
-+ masm.unsignedNarrowInt32x4(lhs, rhs, dest);
-+ break;
-+ // i64 multiply
-+ case wasm::SimdOp::I64x2Mul: {
-+ FloatRegister temp0 = ToTempFloatRegisterOrInvalid(ins->temp0());
-+ FloatRegister temp1f = ToTempFloatRegisterOrInvalid(ins->temp1());
-+ masm.mulInt64x2(lhs, rhs, dest, temp0, temp1f);
-+ break;
-+ }
-+ // Extended multiply
-+ case wasm::SimdOp::I16x8ExtmulLowI8x16S:
-+ masm.extMulLowInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8ExtmulHighI8x16S:
-+ masm.extMulHighInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8ExtmulLowI8x16U:
-+ masm.unsignedExtMulLowInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8ExtmulHighI8x16U:
-+ masm.unsignedExtMulHighInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4ExtmulLowI16x8S:
-+ masm.extMulLowInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4ExtmulHighI16x8S:
-+ masm.extMulHighInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4ExtmulLowI16x8U:
-+ masm.unsignedExtMulLowInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4ExtmulHighI16x8U:
-+ masm.unsignedExtMulHighInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I64x2ExtmulLowI32x4S:
-+ masm.extMulLowInt32x4(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I64x2ExtmulHighI32x4S:
-+ masm.extMulHighInt32x4(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I64x2ExtmulLowI32x4U:
-+ masm.unsignedExtMulLowInt32x4(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I64x2ExtmulHighI32x4U:
-+ masm.unsignedExtMulHighInt32x4(lhs, rhs, dest);
-+ break;
-+ // Dot product
-+ case wasm::SimdOp::I32x4DotI16x8S:
-+ masm.widenDotInt16x8(lhs, rhs, dest);
-+ break;
-+ // Relaxed binary ops
-+ case wasm::SimdOp::F32x4RelaxedMin:
-+ masm.minFloat32x4Relaxed(rhs, lhs);
-+ if (dest != lhs) masm.moveSimd128(lhs, dest);
-+ break;
-+ case wasm::SimdOp::F32x4RelaxedMax:
-+ masm.maxFloat32x4Relaxed(rhs, lhs);
-+ if (dest != lhs) masm.moveSimd128(lhs, dest);
-+ break;
-+ case wasm::SimdOp::F64x2RelaxedMin:
-+ masm.minFloat64x2Relaxed(rhs, lhs);
-+ if (dest != lhs) masm.moveSimd128(lhs, dest);
-+ break;
-+ case wasm::SimdOp::F64x2RelaxedMax:
-+ masm.maxFloat64x2Relaxed(rhs, lhs);
-+ if (dest != lhs) masm.moveSimd128(lhs, dest);
-+ break;
-+ case wasm::SimdOp::I8x16RelaxedSwizzle:
-+ masm.swizzleInt8x16Relaxed(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8RelaxedQ15MulrS:
-+ masm.q15MulrInt16x8Relaxed(lhs, rhs, dest);
-+ break;
-+ // Swizzle
-+ case wasm::SimdOp::I8x16Swizzle:
-+ masm.swizzleInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8RelaxedDotI8x16I7x16S:
-+ masm.dotInt8x16Int7x16(lhs, rhs, dest);
-+ break;
-+ default:
-+ MOZ_CRASH("PPC64: NYI SIMD binary op");
-+ }
-+}
-+void CodeGenerator::visitWasmBinarySimd128WithConstant(
-+ LWasmBinarySimd128WithConstant* ins) {
-+ FloatRegister lhs = ToFloatRegister(ins->lhs());
-+ FloatRegister dest = ToFloatRegister(ins->output());
-+ SimdConstant rhs = ins->rhs();
-+ // Load the constant into scratch, then use the binary op.
-+ ScratchSimd128Scope scratch(masm);
-+ masm.loadConstantSimd128(rhs, scratch);
-+ switch (ins->mir()->simdOp()) {
-+ // Bitwise
-+ case wasm::SimdOp::V128And:
-+ masm.bitwiseAndSimd128(lhs, scratch, dest);
-+ break;
-+ case wasm::SimdOp::V128Or:
-+ masm.bitwiseOrSimd128(lhs, scratch, dest);
-+ break;
-+ case wasm::SimdOp::V128Xor:
-+ masm.bitwiseXorSimd128(lhs, scratch, dest);
-+ break;
-+ case wasm::SimdOp::V128AndNot:
-+ masm.bitwiseAndNotSimd128(lhs, scratch, dest);
-+ break;
-+ // Integer add
-+ case wasm::SimdOp::I8x16Add:
-+ masm.addInt8x16(lhs, scratch, dest);
-+ break;
-+ case wasm::SimdOp::I16x8Add:
-+ masm.addInt16x8(lhs, scratch, dest);
-+ break;
-+ case wasm::SimdOp::I32x4Add:
-+ masm.addInt32x4(lhs, scratch, dest);
-+ break;
-+ case wasm::SimdOp::I64x2Add:
-+ masm.addInt64x2(lhs, scratch, dest);
-+ break;
-+ // Integer sub
-+ case wasm::SimdOp::I8x16Sub:
-+ masm.subInt8x16(lhs, scratch, dest);
-+ break;
-+ case wasm::SimdOp::I16x8Sub:
-+ masm.subInt16x8(lhs, scratch, dest);
-+ break;
-+ case wasm::SimdOp::I32x4Sub:
-+ masm.subInt32x4(lhs, scratch, dest);
-+ break;
-+ case wasm::SimdOp::I64x2Sub:
-+ masm.subInt64x2(lhs, scratch, dest);
-+ break;
-+ // Integer multiply (16-/32-bit lanes; I64x2 unreachable, see below)
-+ case wasm::SimdOp::I16x8Mul:
-+ masm.mulInt16x8(lhs, scratch, dest);
-+ break;
-+ case wasm::SimdOp::I32x4Mul:
-+ masm.mulInt32x4(lhs, scratch, dest);
-+ break;
-+ case wasm::SimdOp::I64x2Mul:
-+ // Unreachable on PPC64: MWasmBinarySimd128::specializeForConstantRhs
-+ // returns false in Lowering-ppc64.cpp, so MIR with a constant rhs
-+ // to I64x2Mul is never created on this backend.
-+ //
-+ // The previous in-place implementation was broken in three ways:
-+ // hard-coded VR0/VR1 staging assumed an ordering that didn't match
-+ // the surrounding code; a dead `mfvsrd(a, f0)` clobbered `a`
-+ // immediately before the next mfvsrd; and the trailing
-+ // `xxpermdi(dest, scratch, dest, 0)` with DM=0 placed lane-0 in the
-+ // wrong half. Rather than ship dead-but-broken code, crash loudly
-+ // if reachability ever changes — the future enabler must write a
-+ // correct lowering (e.g. via masm.mulInt64x2 with explicit temps).
-+ MOZ_CRASH("PPC64: I64x2Mul with constant rhs unimplemented "
-+ "(specializeForConstantRhs returns false)");
-+ // Compare
-+ case wasm::SimdOp::I8x16Eq:
-+ masm.compareInt8x16(Assembler::Equal, lhs, scratch, dest);
-+ break;
-+ case wasm::SimdOp::I8x16Ne:
-+ masm.compareInt8x16(Assembler::NotEqual, lhs, scratch, dest);
-+ break;
-+ default:
-+ MOZ_CRASH("PPC64: NYI SIMD binary-with-constant op");
-+ }
-+}
-+void CodeGenerator::visitWasmVariableShiftSimd128(
-+ LWasmVariableShiftSimd128* ins) {
-+ FloatRegister lhs = ToFloatRegister(ins->lhs());
-+ Register rhs = ToRegister(ins->rhs());
-+ FloatRegister dest = ToFloatRegister(ins->output());
-+ switch (ins->mir()->simdOp()) {
-+ case wasm::SimdOp::I8x16Shl:
-+ masm.leftShiftInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I8x16ShrS:
-+ masm.rightShiftInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I8x16ShrU:
-+ masm.unsignedRightShiftInt8x16(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8Shl:
-+ masm.leftShiftInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8ShrS:
-+ masm.rightShiftInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I16x8ShrU:
-+ masm.unsignedRightShiftInt16x8(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4Shl:
-+ masm.leftShiftInt32x4(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4ShrS:
-+ masm.rightShiftInt32x4(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I32x4ShrU:
-+ masm.unsignedRightShiftInt32x4(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I64x2Shl:
-+ masm.leftShiftInt64x2(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I64x2ShrS:
-+ masm.rightShiftInt64x2(lhs, rhs, dest);
-+ break;
-+ case wasm::SimdOp::I64x2ShrU:
-+ masm.unsignedRightShiftInt64x2(lhs, rhs, dest);
-+ break;
-+ default:
-+ MOZ_CRASH("PPC64: NYI SIMD variable shift op");
-+ }
-+}
-+void CodeGenerator::visitWasmConstantShiftSimd128(
-+ LWasmConstantShiftSimd128* ins) {
-+ FloatRegister src = ToFloatRegister(ins->src());
-+ FloatRegister dest = ToFloatRegister(ins->output());
-+ int32_t shift = ins->shift();
-+ switch (ins->mir()->simdOp()) {
-+ case wasm::SimdOp::I8x16Shl:
-+ masm.leftShiftInt8x16(Imm32(shift), src, dest);
-+ break;
-+ case wasm::SimdOp::I8x16ShrS:
-+ masm.rightShiftInt8x16(Imm32(shift), src, dest);
-+ break;
-+ case wasm::SimdOp::I8x16ShrU:
-+ masm.unsignedRightShiftInt8x16(Imm32(shift), src, dest);
-+ break;
-+ case wasm::SimdOp::I16x8Shl:
-+ masm.leftShiftInt16x8(Imm32(shift), src, dest);
-+ break;
-+ case wasm::SimdOp::I16x8ShrS:
-+ masm.rightShiftInt16x8(Imm32(shift), src, dest);
-+ break;
-+ case wasm::SimdOp::I16x8ShrU:
-+ masm.unsignedRightShiftInt16x8(Imm32(shift), src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4Shl:
-+ masm.leftShiftInt32x4(Imm32(shift), src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4ShrS:
-+ masm.rightShiftInt32x4(Imm32(shift), src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4ShrU:
-+ masm.unsignedRightShiftInt32x4(Imm32(shift), src, dest);
-+ break;
-+ case wasm::SimdOp::I64x2Shl:
-+ masm.leftShiftInt64x2(Imm32(shift), src, dest);
-+ break;
-+ case wasm::SimdOp::I64x2ShrS:
-+ masm.rightShiftInt64x2(Imm32(shift), src, dest);
-+ break;
-+ case wasm::SimdOp::I64x2ShrU:
-+ masm.unsignedRightShiftInt64x2(Imm32(shift), src, dest);
-+ break;
-+ default:
-+ MOZ_CRASH("PPC64: NYI SIMD constant shift op");
-+ }
-+}
-+void CodeGenerator::visitWasmSignReplicationSimd128(
-+ LWasmSignReplicationSimd128* ins) {
-+ FloatRegister src = ToFloatRegister(ins->src());
-+ FloatRegister dest = ToFloatRegister(ins->output());
-+ // Sign replication = arithmetic right shift by max amount (all sign bits).
-+ switch (ins->mir()->simdOp()) {
-+ case wasm::SimdOp::I8x16ShrS:
-+ masm.rightShiftInt8x16(Imm32(7), src, dest);
-+ break;
-+ case wasm::SimdOp::I16x8ShrS:
-+ masm.rightShiftInt16x8(Imm32(15), src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4ShrS:
-+ masm.rightShiftInt32x4(Imm32(31), src, dest);
-+ break;
-+ case wasm::SimdOp::I64x2ShrS:
-+ masm.rightShiftInt64x2(Imm32(63), src, dest);
-+ break;
-+ default:
-+ MOZ_CRASH("Unexpected sign replication op");
-+ }
-+}
-+void CodeGenerator::visitWasmShuffleSimd128(LWasmShuffleSimd128* ins) {
-+ FloatRegister lhs = ToFloatRegister(ins->lhs());
-+ FloatRegister rhs = ToFloatRegister(ins->rhs());
-+ FloatRegister dest = ToFloatRegister(ins->output());
-+ SimdConstant ctrl = ins->control();
-+ const uint8_t* lanes = reinterpret_cast<const uint8_t*>(ctrl.bytes());
-+ masm.shuffleInt8x16(lanes, lhs, rhs, dest);
-+}
-+void CodeGenerator::visitWasmPermuteSimd128(LWasmPermuteSimd128* ins) {
-+ FloatRegister src = ToFloatRegister(ins->src());
-+ FloatRegister dest = ToFloatRegister(ins->output());
-+ // PPC64: the shuffle analysis transforms control bytes into specialized
-+ // formats. Reconstruct raw Wasm byte indices for our vperm implementation.
-+ SimdConstant ctrl = ins->control();
-+ uint8_t rawLanes[16];
-+ switch (ins->op()) {
-+ case SimdPermuteOp::MOVE:
-+ masm.moveSimd128(src, dest);
-+ return;
-+ case SimdPermuteOp::PERMUTE_32x4: {
-+ const int32_t* words = reinterpret_cast<const int32_t*>(ctrl.bytes());
-+ for (int i = 0; i < 4; i++)
-+ for (int j = 0; j < 4; j++)
-+ rawLanes[i * 4 + j] = words[i] * 4 + j;
-+ break;
-+ }
-+ case SimdPermuteOp::PERMUTE_16x8: {
-+ // control has int16 halfword indices. High byte of halfs[0] may have
-+ // platform-specific flags (Perm16x8Action). Mask to get the index only.
-+ const int16_t* halfs = reinterpret_cast<const int16_t*>(ctrl.bytes());
-+ for (int i = 0; i < 8; i++) {
-+ int hwIdx = halfs[i] & 0x7;
-+ rawLanes[i * 2] = hwIdx * 2;
-+ rawLanes[i * 2 + 1] = hwIdx * 2 + 1;
-+ }
-+ break;
-+ }
-+ case SimdPermuteOp::BROADCAST_8x16: {
-+ uint8_t lane = reinterpret_cast<const int8_t*>(ctrl.bytes())[0];
-+ for (int i = 0; i < 16; i++) rawLanes[i] = lane;
-+ break;
-+ }
-+ case SimdPermuteOp::BROADCAST_16x8: {
-+ uint8_t lane = reinterpret_cast<const int8_t*>(ctrl.bytes())[0];
-+ for (int i = 0; i < 8; i++) {
-+ rawLanes[i * 2] = lane * 2;
-+ rawLanes[i * 2 + 1] = lane * 2 + 1;
-+ }
-+ break;
-+ }
-+ case SimdPermuteOp::ROTATE_RIGHT_8x16: {
-+ uint8_t shift = reinterpret_cast<const int8_t*>(ctrl.bytes())[0];
-+ for (int i = 0; i < 16; i++) rawLanes[i] = (i + shift) % 16;
-+ break;
-+ }
-+ case SimdPermuteOp::SHIFT_LEFT_8x16: {
-+ // Shifted-out positions must be zero. Use index 16+ to pick from zero.
-+ uint8_t shift = reinterpret_cast<const int8_t*>(ctrl.bytes())[0];
-+ for (int i = 0; i < 16; i++)
-+ rawLanes[i] = (i >= shift) ? (i - shift) : (16 + i);
-+ goto needsZeroRhs;
-+ }
-+ case SimdPermuteOp::SHIFT_RIGHT_8x16: {
-+ uint8_t shift = reinterpret_cast<const int8_t*>(ctrl.bytes())[0];
-+ for (int i = 0; i < 16; i++)
-+ rawLanes[i] = (i + shift < 16) ? (i + shift) : (16 + i);
-+ goto needsZeroRhs;
-+ }
-+ case SimdPermuteOp::REVERSE_16x8: {
-+ // Reverse bytes within each 16-bit lane: [1,0,3,2,5,4,...]
-+ for (int i = 0; i < 8; i++) {
-+ rawLanes[i * 2] = i * 2 + 1;
-+ rawLanes[i * 2 + 1] = i * 2;
-+ }
-+ break;
-+ }
-+ case SimdPermuteOp::REVERSE_32x4: {
-+ // Reverse bytes within each 32-bit lane: [3,2,1,0,7,6,5,4,...]
-+ for (int i = 0; i < 4; i++)
-+ for (int j = 0; j < 4; j++)
-+ rawLanes[i * 4 + j] = i * 4 + (3 - j);
-+ break;
-+ }
-+ case SimdPermuteOp::REVERSE_64x2: {
-+ // Reverse bytes within each 64-bit lane: [7,6,5,4,3,2,1,0,15,...]
-+ for (int i = 0; i < 2; i++)
-+ for (int j = 0; j < 8; j++)
-+ rawLanes[i * 8 + j] = i * 8 + (7 - j);
-+ break;
-+ }
-+ case SimdPermuteOp::ZERO_EXTEND_8x16_TO_16x8:
-+ case SimdPermuteOp::ZERO_EXTEND_8x16_TO_32x4:
-+ case SimdPermuteOp::ZERO_EXTEND_8x16_TO_64x2:
-+ case SimdPermuteOp::ZERO_EXTEND_16x8_TO_32x4:
-+ case SimdPermuteOp::ZERO_EXTEND_16x8_TO_64x2:
-+ case SimdPermuteOp::ZERO_EXTEND_32x4_TO_64x2: {
-+ const int8_t* bytes = reinterpret_cast<const int8_t*>(ctrl.bytes());
-+ for (int i = 0; i < 16; i++) rawLanes[i] = bytes[i];
-+ goto needsZeroRhs;
-+ }
-+ default: {
-+ // PERMUTE_8x16 and others: control has raw byte indices.
-+ const int8_t* bytes = reinterpret_cast<const int8_t*>(ctrl.bytes());
-+ for (int i = 0; i < 16; i++) rawLanes[i] = bytes[i];
-+ break;
-+ }
-+ }
-+ masm.shuffleInt8x16(rawLanes, src, src, dest);
-+ return;
-+
-+ needsZeroRhs: {
-+ // Wasm convention: rawLanes[i] in 0..15 selects src.LE_byte[idx], and
-+ // rawLanes[i] >= 16 means "zero". Without spilling, we can't satisfy
-+ // vperm's three-input constraint AND keep src alive when dest == src.
-+ // Strategy: vperm src with itself (any valid byte for the "zero"
-+ // positions, bytes get masked out below), then AND with a mask that
-+ // zeros those positions.
-+ int8_t ctrl[16], mask[16];
-+ for (unsigned i = 0; i < 16; i++) {
-+ uint8_t idx = rawLanes[i];
-+ if (idx < 16) {
-+ ctrl[i] = 15 - idx;
-+ mask[i] = -1;
-+ } else {
-+ ctrl[i] = 0;
-+ mask[i] = 0;
-+ }
-+ }
-+ ScratchSimd128Scope scratch(masm);
-+ masm.loadConstantSimd128(SimdConstant::CreateX16(ctrl), scratch);
-+ masm.as_vperm(dest.encoding() & 31,
-+ src.encoding() & 31,
-+ src.encoding() & 31,
-+ scratch.encoding() & 31);
-+ masm.loadConstantSimd128(SimdConstant::CreateX16(mask), scratch);
-+ masm.as_xxland(dest, dest, scratch);
-+ return;
-+ }
-+}
-+void CodeGenerator::visitWasmReplaceLaneSimd128(LWasmReplaceLaneSimd128* ins) {
-+ FloatRegister lhsDest = ToFloatRegister(ins->output());
-+ MOZ_ASSERT(ToFloatRegister(ins->lhs()) == lhsDest);
-+ uint32_t lane = ins->mir()->laneIndex();
-+ switch (ins->mir()->simdOp()) {
-+ case wasm::SimdOp::I8x16ReplaceLane:
-+ masm.replaceLaneInt8x16(lane, ToRegister(ins->rhs()), lhsDest);
-+ break;
-+ case wasm::SimdOp::I16x8ReplaceLane:
-+ masm.replaceLaneInt16x8(lane, ToRegister(ins->rhs()), lhsDest);
-+ break;
-+ case wasm::SimdOp::I32x4ReplaceLane:
-+ masm.replaceLaneInt32x4(lane, ToRegister(ins->rhs()), lhsDest);
-+ break;
-+ case wasm::SimdOp::F32x4ReplaceLane:
-+ masm.replaceLaneFloat32x4(lane, ToFloatRegister(ins->rhs()), lhsDest);
-+ break;
-+ case wasm::SimdOp::F64x2ReplaceLane:
-+ masm.replaceLaneFloat64x2(lane, ToFloatRegister(ins->rhs()), lhsDest);
-+ break;
-+ default:
-+ MOZ_CRASH("PPC64: NYI SIMD replace lane op");
-+ }
-+}
-+void CodeGenerator::visitWasmReplaceInt64LaneSimd128(
-+ LWasmReplaceInt64LaneSimd128* ins) {
-+ MOZ_ASSERT(ins->mir()->simdOp() == wasm::SimdOp::I64x2ReplaceLane);
-+ FloatRegister lhsDest = ToFloatRegister(ins->output());
-+ MOZ_ASSERT(ToFloatRegister(ins->lhs()) == lhsDest);
-+ masm.replaceLaneInt64x2(ins->mir()->laneIndex(),
-+ ToRegister64(ins->rhs()), lhsDest);
-+}
-+void CodeGenerator::visitWasmScalarToSimd128(LWasmScalarToSimd128* ins) {
-+ FloatRegister dest = ToFloatRegister(ins->output());
-+ switch (ins->mir()->simdOp()) {
-+ case wasm::SimdOp::I8x16Splat:
-+ masm.splatX16(ToRegister(ins->src()), dest);
-+ break;
-+ case wasm::SimdOp::I16x8Splat:
-+ masm.splatX8(ToRegister(ins->src()), dest);
-+ break;
-+ case wasm::SimdOp::I32x4Splat:
-+ masm.splatX4(ToRegister(ins->src()), dest);
-+ break;
-+ case wasm::SimdOp::F32x4Splat:
-+ masm.splatX4(ToFloatRegister(ins->src()), dest);
-+ break;
-+ case wasm::SimdOp::F64x2Splat:
-+ masm.splatX2(ToFloatRegister(ins->src()), dest);
-+ break;
-+ default:
-+ MOZ_CRASH("PPC64: NYI SIMD scalar-to-simd op");
-+ }
-+}
-+void CodeGenerator::visitWasmInt64ToSimd128(LWasmInt64ToSimd128* ins) {
-+ FloatRegister dest = ToFloatRegister(ins->output());
-+ switch (ins->mir()->simdOp()) {
-+ case wasm::SimdOp::I64x2Splat:
-+ masm.splatX2(ToRegister64(ins->src()), dest);
-+ break;
-+ default:
-+ MOZ_CRASH("PPC64: NYI SIMD int64-to-simd op");
-+ }
-+}
-+void CodeGenerator::visitWasmUnarySimd128(LWasmUnarySimd128* ins) {
-+ FloatRegister src = ToFloatRegister(ins->src());
-+ FloatRegister dest = ToFloatRegister(ins->output());
-+ switch (ins->mir()->simdOp()) {
-+ case wasm::SimdOp::I8x16Neg:
-+ masm.negInt8x16(src, dest);
-+ break;
-+ case wasm::SimdOp::I16x8Neg:
-+ masm.negInt16x8(src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4Neg:
-+ masm.negInt32x4(src, dest);
-+ break;
-+ case wasm::SimdOp::I64x2Neg:
-+ masm.negInt64x2(src, dest);
-+ break;
-+ case wasm::SimdOp::I8x16Abs:
-+ masm.absInt8x16(src, dest);
-+ break;
-+ case wasm::SimdOp::I16x8Abs:
-+ masm.absInt16x8(src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4Abs:
-+ masm.absInt32x4(src, dest);
-+ break;
-+ case wasm::SimdOp::I64x2Abs:
-+ masm.absInt64x2(src, dest);
-+ break;
-+ case wasm::SimdOp::V128Not:
-+ masm.bitwiseNotSimd128(src, dest);
-+ break;
-+ case wasm::SimdOp::F32x4Neg:
-+ masm.negFloat32x4(src, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Neg:
-+ masm.negFloat64x2(src, dest);
-+ break;
-+ case wasm::SimdOp::F32x4Abs:
-+ masm.absFloat32x4(src, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Abs:
-+ masm.absFloat64x2(src, dest);
-+ break;
-+ case wasm::SimdOp::F32x4Sqrt:
-+ masm.sqrtFloat32x4(src, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Sqrt:
-+ masm.sqrtFloat64x2(src, dest);
-+ break;
-+ case wasm::SimdOp::F32x4Ceil:
-+ masm.ceilFloat32x4(src, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Ceil:
-+ masm.ceilFloat64x2(src, dest);
-+ break;
-+ case wasm::SimdOp::F32x4Floor:
-+ masm.floorFloat32x4(src, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Floor:
-+ masm.floorFloat64x2(src, dest);
-+ break;
-+ case wasm::SimdOp::F32x4Trunc:
-+ masm.truncFloat32x4(src, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Trunc:
-+ masm.truncFloat64x2(src, dest);
-+ break;
-+ case wasm::SimdOp::F32x4Nearest:
-+ masm.nearestFloat32x4(src, dest);
-+ break;
-+ case wasm::SimdOp::F64x2Nearest:
-+ masm.nearestFloat64x2(src, dest);
-+ break;
-+ // Conversions
-+ case wasm::SimdOp::F32x4ConvertI32x4S:
-+ masm.convertInt32x4ToFloat32x4(src, dest);
-+ break;
-+ case wasm::SimdOp::F32x4ConvertI32x4U:
-+ masm.unsignedConvertInt32x4ToFloat32x4(src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4TruncSatF32x4S:
-+ masm.truncSatFloat32x4ToInt32x4(src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4TruncSatF32x4U:
-+ masm.unsignedTruncSatFloat32x4ToInt32x4(src, dest);
-+ break;
-+ case wasm::SimdOp::F64x2ConvertLowI32x4S:
-+ masm.convertInt32x4ToFloat64x2(src, dest);
-+ break;
-+ case wasm::SimdOp::F64x2ConvertLowI32x4U:
-+ masm.unsignedConvertInt32x4ToFloat64x2(src, dest);
-+ break;
-+ case wasm::SimdOp::F32x4DemoteF64x2Zero:
-+ masm.convertFloat64x2ToFloat32x4(src, dest);
-+ break;
-+ case wasm::SimdOp::F64x2PromoteLowF32x4:
-+ masm.convertFloat32x4ToFloat64x2(src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4TruncSatF64x2SZero:
-+ masm.truncSatFloat64x2ToInt32x4(src, dest, ScratchSimd128Reg);
-+ break;
-+ case wasm::SimdOp::I32x4TruncSatF64x2UZero:
-+ masm.unsignedTruncSatFloat64x2ToInt32x4(src, dest, ScratchSimd128Reg);
-+ break;
-+ // Widen
-+ case wasm::SimdOp::I16x8ExtendLowI8x16S:
-+ masm.widenLowInt8x16(src, dest);
-+ break;
-+ case wasm::SimdOp::I16x8ExtendHighI8x16S:
-+ masm.widenHighInt8x16(src, dest);
-+ break;
-+ case wasm::SimdOp::I16x8ExtendLowI8x16U:
-+ masm.unsignedWidenLowInt8x16(src, dest);
-+ break;
-+ case wasm::SimdOp::I16x8ExtendHighI8x16U:
-+ masm.unsignedWidenHighInt8x16(src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4ExtendLowI16x8S:
-+ masm.widenLowInt16x8(src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4ExtendHighI16x8S:
-+ masm.widenHighInt16x8(src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4ExtendLowI16x8U:
-+ masm.unsignedWidenLowInt16x8(src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4ExtendHighI16x8U:
-+ masm.unsignedWidenHighInt16x8(src, dest);
-+ break;
-+ case wasm::SimdOp::I64x2ExtendLowI32x4S:
-+ masm.widenLowInt32x4(src, dest);
-+ break;
-+ case wasm::SimdOp::I64x2ExtendHighI32x4S:
-+ masm.widenHighInt32x4(src, dest);
-+ break;
-+ case wasm::SimdOp::I64x2ExtendLowI32x4U:
-+ masm.unsignedWidenLowInt32x4(src, dest);
-+ break;
-+ case wasm::SimdOp::I64x2ExtendHighI32x4U:
-+ masm.unsignedWidenHighInt32x4(src, dest);
-+ break;
-+ // Extended add pairwise
-+ case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S:
-+ masm.extAddPairwiseInt8x16(src, dest);
-+ break;
-+ case wasm::SimdOp::I16x8ExtaddPairwiseI8x16U:
-+ masm.unsignedExtAddPairwiseInt8x16(src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4ExtaddPairwiseI16x8S:
-+ masm.extAddPairwiseInt16x8(src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4ExtaddPairwiseI16x8U:
-+ masm.unsignedExtAddPairwiseInt16x8(src, dest);
-+ break;
-+ // Relaxed truncation
-+ case wasm::SimdOp::I32x4RelaxedTruncF32x4S:
-+ masm.truncFloat32x4ToInt32x4Relaxed(src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4RelaxedTruncF32x4U:
-+ masm.unsignedTruncFloat32x4ToInt32x4Relaxed(src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4RelaxedTruncF64x2SZero:
-+ masm.truncFloat64x2ToInt32x4Relaxed(src, dest);
-+ break;
-+ case wasm::SimdOp::I32x4RelaxedTruncF64x2UZero:
-+ masm.unsignedTruncFloat64x2ToInt32x4Relaxed(src, dest);
-+ break;
-+ // Popcnt
-+ case wasm::SimdOp::I8x16Popcnt:
-+ masm.popcntInt8x16(src, dest);
-+ break;
-+ default:
-+ MOZ_CRASH("PPC64: NYI SIMD unary op");
-+ }
-+}
-+void CodeGenerator::visitWasmReduceSimd128(LWasmReduceSimd128* ins) {
-+ FloatRegister src = ToFloatRegister(ins->src());
-+ uint32_t imm = ins->mir()->imm();
-+ switch (ins->mir()->simdOp()) {
-+ case wasm::SimdOp::I8x16ExtractLaneS:
-+ masm.extractLaneInt8x16(imm, src, ToRegister(ins->output()));
-+ break;
-+ case wasm::SimdOp::I8x16ExtractLaneU:
-+ masm.unsignedExtractLaneInt8x16(imm, src, ToRegister(ins->output()));
-+ break;
-+ case wasm::SimdOp::I16x8ExtractLaneS:
-+ masm.extractLaneInt16x8(imm, src, ToRegister(ins->output()));
-+ break;
-+ case wasm::SimdOp::I16x8ExtractLaneU:
-+ masm.unsignedExtractLaneInt16x8(imm, src, ToRegister(ins->output()));
-+ break;
-+ case wasm::SimdOp::I32x4ExtractLane:
-+ masm.extractLaneInt32x4(imm, src, ToRegister(ins->output()));
-+ break;
-+ case wasm::SimdOp::F32x4ExtractLane:
-+ masm.extractLaneFloat32x4(imm, src, ToFloatRegister(ins->output()));
-+ break;
-+ case wasm::SimdOp::F64x2ExtractLane:
-+ masm.extractLaneFloat64x2(imm, src, ToFloatRegister(ins->output()));
-+ break;
-+ case wasm::SimdOp::V128AnyTrue:
-+ masm.anyTrueSimd128(src, ToRegister(ins->output()));
-+ break;
-+ case wasm::SimdOp::I8x16AllTrue:
-+ masm.allTrueInt8x16(src, ToRegister(ins->output()));
-+ break;
-+ case wasm::SimdOp::I16x8AllTrue:
-+ masm.allTrueInt16x8(src, ToRegister(ins->output()));
-+ break;
-+ case wasm::SimdOp::I32x4AllTrue:
-+ masm.allTrueInt32x4(src, ToRegister(ins->output()));
-+ break;
-+ case wasm::SimdOp::I64x2AllTrue:
-+ masm.allTrueInt64x2(src, ToRegister(ins->output()));
-+ break;
-+ case wasm::SimdOp::I8x16Bitmask:
-+ masm.bitmaskInt8x16(src, ToRegister(ins->output()), ScratchSimd128Reg);
-+ break;
-+ case wasm::SimdOp::I16x8Bitmask:
-+ masm.bitmaskInt16x8(src, ToRegister(ins->output()), ScratchSimd128Reg);
-+ break;
-+ case wasm::SimdOp::I32x4Bitmask:
-+ masm.bitmaskInt32x4(src, ToRegister(ins->output()), ScratchSimd128Reg);
-+ break;
-+ case wasm::SimdOp::I64x2Bitmask:
-+ masm.bitmaskInt64x2(src, ToRegister(ins->output()), ScratchSimd128Reg);
-+ break;
-+ default:
-+ MOZ_CRASH("PPC64: NYI SIMD reduce op");
-+ }
-+}
-+void CodeGenerator::visitWasmReduceAndBranchSimd128(
-+ LWasmReduceAndBranchSimd128* ins) {
-+ FloatRegister src = ToFloatRegister(ins->src());
-+ UseScratchRegisterScope temps(masm);
-+ Register tmp = temps.Acquire();
-+ switch (ins->simdOp()) {
-+ case wasm::SimdOp::V128AnyTrue:
-+ masm.anyTrueSimd128(src, tmp);
-+ break;
-+ case wasm::SimdOp::I8x16AllTrue:
-+ masm.allTrueInt8x16(src, tmp);
-+ break;
-+ case wasm::SimdOp::I16x8AllTrue:
-+ masm.allTrueInt16x8(src, tmp);
-+ break;
-+ case wasm::SimdOp::I32x4AllTrue:
-+ masm.allTrueInt32x4(src, tmp);
-+ break;
-+ case wasm::SimdOp::I64x2AllTrue:
-+ masm.allTrueInt64x2(src, tmp);
-+ break;
-+ case wasm::SimdOp::I8x16Bitmask:
-+ masm.bitmaskInt8x16(src, tmp, ScratchSimd128Reg);
-+ break;
-+ case wasm::SimdOp::I16x8Bitmask:
-+ masm.bitmaskInt16x8(src, tmp, ScratchSimd128Reg);
-+ break;
-+ case wasm::SimdOp::I32x4Bitmask:
-+ masm.bitmaskInt32x4(src, tmp, ScratchSimd128Reg);
-+ break;
-+ case wasm::SimdOp::I64x2Bitmask:
-+ masm.bitmaskInt64x2(src, tmp, ScratchSimd128Reg);
-+ break;
-+ default:
-+ MOZ_CRASH("PPC64: NYI SIMD reduce-and-branch op");
-+ }
-+ masm.as_cmpdi(tmp, 0);
-+ // Branch to ifTrue if nonzero, fall through to ifFalse.
-+ Label* ifTrue = skipTrivialBlocks(ins->ifTrue())->lir()->label();
-+ Label* ifFalse = skipTrivialBlocks(ins->ifFalse())->lir()->label();
-+ masm.ma_b(Assembler::NotEqual, ifTrue);
-+ masm.jump(ifFalse);
-+}
-+void CodeGenerator::visitWasmReduceSimd128ToInt64(
-+ LWasmReduceSimd128ToInt64* ins) {
-+ FloatRegister src = ToFloatRegister(ins->src());
-+ Register64 dest = ToOutRegister64(ins);
-+ switch (ins->mir()->simdOp()) {
-+ case wasm::SimdOp::I64x2ExtractLane:
-+ masm.extractLaneInt64x2(ins->mir()->imm(), src, dest);
-+ break;
-+ default:
-+ MOZ_CRASH("PPC64: NYI SIMD reduce-to-int64 op");
-+ }
-+}
-+static inline wasm::MemoryAccessDesc DeriveMemoryAccessDesc(
-+ const wasm::MemoryAccessDesc& access, Scalar::Type type) {
-+ return wasm::MemoryAccessDesc(access.memoryIndex(), type, access.align(),
-+ access.offset32(), access.trapDesc(),
-+ access.isHugeMemory());
-+}
-+
-+void CodeGenerator::visitWasmLoadLaneSimd128(LWasmLoadLaneSimd128* ins) {
-+ const MWasmLoadLaneSimd128* mir = ins->mir();
-+ Register memoryBase = ToRegister(ins->memoryBase());
-+ Register ptr = ToRegister(ins->ptr());
-+ FloatRegister src = ToFloatRegister(ins->src());
-+ FloatRegister dest = ToFloatRegister(ins->output());
-+ UseScratchRegisterScope temps(masm);
-+ Register tmp = temps.Acquire();
-+ masm.moveSimd128(src, dest);
-+ switch (mir->laneSize()) {
-+ case 1:
-+ masm.wasmLoad(DeriveMemoryAccessDesc(mir->access(), Scalar::Int8),
-+ memoryBase, ptr, ptr, AnyRegister(tmp));
-+ masm.replaceLaneInt8x16(mir->laneIndex(), tmp, dest);
-+ break;
-+ case 2:
-+ masm.wasmLoad(DeriveMemoryAccessDesc(mir->access(), Scalar::Int16),
-+ memoryBase, ptr, ptr, AnyRegister(tmp));
-+ masm.replaceLaneInt16x8(mir->laneIndex(), tmp, dest);
-+ break;
-+ case 4:
-+ masm.wasmLoad(DeriveMemoryAccessDesc(mir->access(), Scalar::Int32),
-+ memoryBase, ptr, ptr, AnyRegister(tmp));
-+ masm.replaceLaneInt32x4(mir->laneIndex(), tmp, dest);
-+ break;
-+ case 8: {
-+ masm.wasmLoadI64(DeriveMemoryAccessDesc(mir->access(), Scalar::Int64),
-+ memoryBase, ptr, ptr,
-+ Register64(tmp));
-+ masm.replaceLaneInt64x2(mir->laneIndex(), Register64(tmp), dest);
-+ break;
-+ }
-+ default:
-+ MOZ_CRASH("Unexpected lane size");
-+ }
-+}
-+void CodeGenerator::visitWasmStoreLaneSimd128(LWasmStoreLaneSimd128* ins) {
-+ const MWasmStoreLaneSimd128* mir = ins->mir();
-+ Register memoryBase = ToRegister(ins->memoryBase());
-+ Register ptr = ToRegister(ins->ptr());
-+ FloatRegister src = ToFloatRegister(ins->src());
-+ UseScratchRegisterScope temps(masm);
-+ Register tmp = temps.Acquire();
-+ switch (mir->laneSize()) {
-+ case 1:
-+ masm.unsignedExtractLaneInt8x16(mir->laneIndex(), src, tmp);
-+ masm.wasmStore(DeriveMemoryAccessDesc(mir->access(), Scalar::Int8),
-+ AnyRegister(tmp), memoryBase, ptr, ptr);
-+ break;
-+ case 2:
-+ masm.unsignedExtractLaneInt16x8(mir->laneIndex(), src, tmp);
-+ masm.wasmStore(DeriveMemoryAccessDesc(mir->access(), Scalar::Int16),
-+ AnyRegister(tmp), memoryBase, ptr, ptr);
-+ break;
-+ case 4:
-+ masm.extractLaneInt32x4(mir->laneIndex(), src, tmp);
-+ masm.wasmStore(DeriveMemoryAccessDesc(mir->access(), Scalar::Int32),
-+ AnyRegister(tmp), memoryBase, ptr, ptr);
-+ break;
-+ case 8:
-+ masm.extractLaneInt64x2(mir->laneIndex(), src, Register64(tmp));
-+ masm.wasmStoreI64(DeriveMemoryAccessDesc(mir->access(), Scalar::Int64),
-+ Register64(tmp), memoryBase, ptr, ptr);
-+ break;
-+ default:
-+ MOZ_CRASH("Unexpected lane size");
-+ }
-+}
-+
-+} // namespace jit
-+} // namespace js
-diff --git a/js/src/jit/ppc64/CodeGenerator-ppc64.h b/js/src/jit/ppc64/CodeGenerator-ppc64.h
-new file mode 100644
-index 000000000000..3414eceb5ac4
---- /dev/null
-+++ b/js/src/jit/ppc64/CodeGenerator-ppc64.h
-@@ -0,0 +1,101 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#ifndef jit_ppc64_CodeGenerator_ppc64_h
-+#define jit_ppc64_CodeGenerator_ppc64_h
-+
-+#include "jit/ppc64/Assembler-ppc64.h"
-+#include "jit/shared/CodeGenerator-shared.h"
-+
-+namespace js {
-+namespace jit {
-+
-+class CodeGeneratorPPC64;
-+class OutOfLineTableSwitch;
-+
-+using OutOfLineWasmTruncateCheck =
-+ OutOfLineWasmTruncateCheckBase<CodeGeneratorPPC64>;
-+
-+class CodeGeneratorPPC64 : public CodeGeneratorShared {
-+ friend class MoveResolverPPC64;
-+
-+ protected:
-+ CodeGeneratorPPC64(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm,
-+ const wasm::CodeMetadata* codeMeta);
-+
-+ NonAssertingLabel deoptLabel_;
-+
-+ Operand ToOperand(const LAllocation& a);
-+ Operand ToOperand(const LAllocation* a);
-+ MoveOperand toMoveOperand(LAllocation a) const;
-+
-+ template <typename T1, typename T2>
-+ void bailoutCmp32(Assembler::Condition c, T1 lhs, T2 rhs,
-+ LSnapshot* snapshot) {
-+ Label bail;
-+ masm.branch32(c, lhs, rhs, &bail);
-+ bailoutFrom(&bail, snapshot);
-+ }
-+ template <typename T1, typename T2>
-+ void bailoutCmpPtr(Assembler::Condition c, T1 lhs, T2 rhs,
-+ LSnapshot* snapshot) {
-+ Label bail;
-+ masm.branchPtr(c, lhs, rhs, &bail);
-+ bailoutFrom(&bail, snapshot);
-+ }
-+ template <typename T1, typename T2>
-+ void bailoutTest32(Assembler::Condition c, T1 lhs, T2 rhs,
-+ LSnapshot* snapshot) {
-+ Label bail;
-+ masm.branchTest32(c, lhs, rhs, &bail);
-+ bailoutFrom(&bail, snapshot);
-+ }
-+ void bailoutIfFalseBool(Register lhs, LSnapshot* snapshot);
-+ void bailoutFrom(Label* label, LSnapshot* snapshot);
-+ void bailout(LSnapshot* snapshot);
-+
-+ protected:
-+ bool generateOutOfLineCode();
-+ void branchToBlock(MBasicBlock* block);
-+
-+ template <typename T>
-+ void branchToBlock(Assembler::Condition cond, Register lhs, T rhs,
-+ MBasicBlock* mir) {
-+ Label* label = skipTrivialBlocks(mir)->lir()->label();
-+ masm.branch32(cond, lhs, rhs, label);
-+ }
-+ void branchToBlock(Assembler::DoubleCondition cond, FloatRegister lhs,
-+ FloatRegister rhs, MBasicBlock* mir);
-+ void branchToBlock(Assembler::FloatFormat fmt,
-+ Assembler::DoubleCondition cond, FloatRegister lhs,
-+ FloatRegister rhs, MBasicBlock* mir);
-+
-+ void emitTableSwitchDispatch(MTableSwitch* mir, Register index,
-+ Register base);
-+
-+ void emitBigIntPtrDiv(LBigIntPtrDiv* ins, Register dividend, Register divisor,
-+ Register output);
-+ void emitBigIntPtrMod(LBigIntPtrMod* ins, Register dividend, Register divisor,
-+ Register output);
-+
-+ void generateInvalidateEpilogue();
-+
-+ template <typename T>
-+ void emitWasmLoad(T* lir);
-+ template <typename T>
-+ void emitWasmStore(T* lir);
-+
-+ public:
-+ void visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool);
-+ void visitOutOfLineWasmTruncateCheck(OutOfLineWasmTruncateCheck* ool);
-+};
-+
-+typedef CodeGeneratorPPC64 CodeGeneratorSpecific;
-+
-+} // namespace jit
-+} // namespace js
-+
-+#endif /* jit_ppc64_CodeGenerator_ppc64_h */
-diff --git a/js/src/jit/ppc64/LIR-ppc64.h b/js/src/jit/ppc64/LIR-ppc64.h
-new file mode 100644
-index 000000000000..686875056127
---- /dev/null
-+++ b/js/src/jit/ppc64/LIR-ppc64.h
-@@ -0,0 +1,135 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#ifndef jit_ppc64_LIR_ppc64_h
-+#define jit_ppc64_LIR_ppc64_h
-+
-+namespace js {
-+namespace jit {
-+
-+class LUnbox : public LInstructionHelper<1, BOX_PIECES, 0> {
-+ public:
-+ LIR_HEADER(Unbox);
-+
-+ explicit LUnbox(const LAllocation& input) : LInstructionHelper(classOpcode) {
-+ setOperand(0, input);
-+ }
-+
-+ static const size_t Input = 0;
-+
-+ LBoxAllocation input() const { return getBoxOperand(Input); }
-+
-+ MUnbox* mir() const { return mir_->toUnbox(); }
-+ const char* extraName() const { return StringFromMIRType(mir()->type()); }
-+};
-+
-+class LUDivOrMod : public LBinaryMath<0> {
-+ public:
-+ LIR_HEADER(UDivOrMod);
-+
-+ LUDivOrMod() : LBinaryMath(classOpcode) {}
-+
-+ MBinaryArithInstruction* mir() const {
-+ MOZ_ASSERT(mir_->isDiv() || mir_->isMod());
-+ return static_cast<MBinaryArithInstruction*>(mir_);
-+ }
-+
-+ bool canBeDivideByZero() const {
-+ if (mir_->isMod()) {
-+ return mir_->toMod()->canBeDivideByZero();
-+ }
-+ return mir_->toDiv()->canBeDivideByZero();
-+ }
-+
-+ bool trapOnError() const {
-+ if (mir_->isMod()) {
-+ return mir_->toMod()->trapOnError();
-+ }
-+ return mir_->toDiv()->trapOnError();
-+ }
-+
-+ wasm::TrapSiteDesc trapSiteDesc() const {
-+ MOZ_ASSERT(mir_->isDiv() || mir_->isMod());
-+ if (mir_->isMod()) {
-+ return mir_->toMod()->trapSiteDesc();
-+ }
-+ return mir_->toDiv()->trapSiteDesc();
-+ }
-+};
-+
-+class LDivOrModI64 : public LBinaryMath<0> {
-+ public:
-+ LIR_HEADER(DivOrModI64);
-+
-+ LDivOrModI64(const LAllocation& lhs, const LAllocation& rhs)
-+ : LBinaryMath(classOpcode) {
-+ setOperand(0, lhs);
-+ setOperand(1, rhs);
-+ }
-+
-+ MBinaryArithInstruction* mir() const {
-+ MOZ_ASSERT(mir_->isDiv() || mir_->isMod());
-+ return static_cast<MBinaryArithInstruction*>(mir_);
-+ }
-+
-+ bool canBeDivideByZero() const {
-+ if (mir_->isMod()) {
-+ return mir_->toMod()->canBeDivideByZero();
-+ }
-+ return mir_->toDiv()->canBeDivideByZero();
-+ }
-+ bool canBeNegativeOverflow() const {
-+ if (mir_->isMod()) {
-+ return mir_->toMod()->canBeNegativeDividend();
-+ }
-+ return mir_->toDiv()->canBeNegativeOverflow();
-+ }
-+ wasm::TrapSiteDesc trapSiteDesc() const {
-+ MOZ_ASSERT(mir_->isDiv() || mir_->isMod());
-+ if (mir_->isMod()) {
-+ return mir_->toMod()->trapSiteDesc();
-+ }
-+ return mir_->toDiv()->trapSiteDesc();
-+ }
-+};
-+
-+class LUDivOrModI64 : public LBinaryMath<0> {
-+ public:
-+ LIR_HEADER(UDivOrModI64);
-+
-+ LUDivOrModI64(const LAllocation& lhs, const LAllocation& rhs)
-+ : LBinaryMath(classOpcode) {
-+ setOperand(0, lhs);
-+ setOperand(1, rhs);
-+ }
-+
-+ const char* extraName() const {
-+ return mir()->isTruncated() ? "Truncated" : nullptr;
-+ }
-+
-+ MBinaryArithInstruction* mir() const {
-+ MOZ_ASSERT(mir_->isDiv() || mir_->isMod());
-+ return static_cast<MBinaryArithInstruction*>(mir_);
-+ }
-+ bool canBeDivideByZero() const {
-+ if (mir_->isMod()) {
-+ return mir_->toMod()->canBeDivideByZero();
-+ }
-+ return mir_->toDiv()->canBeDivideByZero();
-+ }
-+ wasm::TrapSiteDesc trapSiteDesc() const {
-+ MOZ_ASSERT(mir_->isDiv() || mir_->isMod());
-+ if (mir_->isMod()) {
-+ return mir_->toMod()->trapSiteDesc();
-+ }
-+ return mir_->toDiv()->trapSiteDesc();
-+ }
-+};
-+
-+} // namespace jit
-+} // namespace js
-+
-+#endif /* jit_ppc64_LIR_ppc64_h */
-diff --git a/js/src/jit/ppc64/Lowering-ppc64.cpp b/js/src/jit/ppc64/Lowering-ppc64.cpp
-new file mode 100644
-index 000000000000..be0ead19d273
---- /dev/null
-+++ b/js/src/jit/ppc64/Lowering-ppc64.cpp
-@@ -0,0 +1,1324 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#include "jit/ppc64/Lowering-ppc64.h"
-+
-+#include "mozilla/MathAlgorithms.h"
-+
-+#include "jit/Lowering.h"
-+#include "jit/MIR-wasm.h"
-+#include "jit/MIR.h"
-+#include "jit/ppc64/Assembler-ppc64.h"
-+#include "wasm/WasmFeatures.h" // for wasm::ReportSimdAnalysis
-+
-+#include "jit/shared/Lowering-shared-inl.h"
-+
-+using namespace js;
-+using namespace js::jit;
-+
-+using mozilla::FloorLog2;
-+
-+namespace js {
-+namespace jit {
-+
-+LTableSwitch* LIRGeneratorPPC64::newLTableSwitch(const LAllocation& in,
-+ const LDefinition& inputCopy) {
-+ return new (alloc()) LTableSwitch(in, inputCopy, temp());
-+}
-+
-+LTableSwitchV* LIRGeneratorPPC64::newLTableSwitchV(const LBoxAllocation& in) {
-+ return new (alloc()) LTableSwitchV(in, temp(), tempDouble(), temp());
-+}
-+
-+void LIRGeneratorPPC64::lowerForShift(LInstructionHelper<1, 2, 0>* ins,
-+ MDefinition* mir, MDefinition* lhs,
-+ MDefinition* rhs) {
-+ lowerForALU(ins, mir, lhs, rhs);
-+}
-+
-+template <class LInstr>
-+void LIRGeneratorPPC64::lowerForShiftInt64(LInstr* ins, MDefinition* mir,
-+ MDefinition* lhs, MDefinition* rhs) {
-+ if constexpr (std::is_same_v<LInstr, LShiftI64>) {
-+ ins->setLhs(useInt64RegisterAtStart(lhs));
-+ ins->setRhs(useRegisterOrConstantAtStart(rhs));
-+ } else {
-+ ins->setInput(useInt64RegisterAtStart(lhs));
-+ ins->setCount(useRegisterOrConstantAtStart(rhs));
-+ }
-+ defineInt64(ins, mir);
-+}
-+
-+template void LIRGeneratorPPC64::lowerForShiftInt64(LShiftI64* ins,
-+ MDefinition* mir,
-+ MDefinition* lhs,
-+ MDefinition* rhs);
-+template void LIRGeneratorPPC64::lowerForShiftInt64(LRotateI64* ins,
-+ MDefinition* mir,
-+ MDefinition* lhs,
-+ MDefinition* rhs);
-+
-+void LIRGeneratorPPC64::lowerForALU(LInstructionHelper<1, 1, 0>* ins,
-+ MDefinition* mir, MDefinition* input) {
-+ ins->setOperand(0, useRegisterAtStart(input));
-+ define(ins, mir);
-+}
-+
-+void LIRGeneratorPPC64::lowerForALU(LInstructionHelper<1, 2, 0>* ins,
-+ MDefinition* mir, MDefinition* lhs,
-+ MDefinition* rhs) {
-+ ins->setOperand(0, useRegisterAtStart(lhs));
-+ ins->setOperand(1, useRegisterOrConstantAtStart(rhs));
-+ define(ins, mir);
-+}
-+
-+void LIRGeneratorPPC64::lowerForALUInt64(
-+ LInstructionHelper<INT64_PIECES, INT64_PIECES, 0>* ins, MDefinition* mir,
-+ MDefinition* input) {
-+ ins->setInt64Operand(0, useInt64RegisterAtStart(input));
-+ defineInt64(ins, mir);
-+}
-+
-+void LIRGeneratorPPC64::lowerForALUInt64(
-+ LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
-+ MDefinition* mir, MDefinition* lhs, MDefinition* rhs) {
-+ ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
-+ ins->setInt64Operand(INT64_PIECES, useInt64RegisterOrConstantAtStart(rhs));
-+ defineInt64(ins, mir);
-+}
-+
-+void LIRGeneratorPPC64::lowerForMulInt64(LMulI64* ins, MMul* mir,
-+ MDefinition* lhs, MDefinition* rhs) {
-+ lowerForALUInt64(ins, mir, lhs, rhs);
-+}
-+
-+void LIRGeneratorPPC64::lowerForFPU(LInstructionHelper<1, 1, 0>* ins,
-+ MDefinition* mir, MDefinition* input) {
-+ ins->setOperand(0, useRegisterAtStart(input));
-+ define(ins, mir);
-+}
-+
-+void LIRGeneratorPPC64::lowerForFPU(LInstructionHelper<1, 2, 0>* ins,
-+ MDefinition* mir, MDefinition* lhs,
-+ MDefinition* rhs) {
-+ ins->setOperand(0, useRegisterAtStart(lhs));
-+ ins->setOperand(1, useRegisterAtStart(rhs));
-+ define(ins, mir);
-+}
-+
-+LBoxAllocation LIRGeneratorPPC64::useBoxFixed(MDefinition* mir, Register reg1,
-+ Register reg2, bool useAtStart) {
-+ MOZ_ASSERT(mir->type() == MIRType::Value);
-+
-+ ensureDefined(mir);
-+ return LBoxAllocation(LUse(reg1, mir->virtualRegister(), useAtStart));
-+}
-+
-+LAllocation LIRGeneratorPPC64::useByteOpRegister(MDefinition* mir) {
-+ return useRegister(mir);
-+}
-+
-+LAllocation LIRGeneratorPPC64::useByteOpRegisterAtStart(MDefinition* mir) {
-+ return useRegisterAtStart(mir);
-+}
-+
-+LAllocation LIRGeneratorPPC64::useByteOpRegisterOrNonDoubleConstant(
-+ MDefinition* mir) {
-+ return useRegisterOrNonDoubleConstant(mir);
-+}
-+
-+LDefinition LIRGeneratorPPC64::tempByteOpRegister() { return temp(); }
-+
-+LDefinition LIRGeneratorPPC64::tempToUnbox() { return temp(); }
-+
-+void LIRGeneratorPPC64::lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition,
-+ LBlock* block, size_t lirIndex) {
-+ lowerTypedPhiInput(phi, inputPosition, block, lirIndex);
-+}
-+
-+void LIRGeneratorPPC64::lowerInt64PhiInput(MPhi* phi, uint32_t inputPosition,
-+ LBlock* block, size_t lirIndex) {
-+ lowerTypedPhiInput(phi, inputPosition, block, lirIndex);
-+}
-+
-+void LIRGeneratorPPC64::defineInt64Phi(MPhi* phi, size_t lirIndex) {
-+ defineTypedPhi(phi, lirIndex);
-+}
-+
-+void LIRGeneratorPPC64::lowerMulI(MMul* mul, MDefinition* lhs,
-+ MDefinition* rhs) {
-+ LMulI* lir = new (alloc()) LMulI;
-+ if (mul->fallible()) {
-+ assignSnapshot(lir, mul->bailoutKind());
-+ }
-+ if (mul->canBeNegativeZero() && !rhs->isConstant()) {
-+ lir->setOperand(0, useRegister(lhs));
-+ lir->setOperand(1, useRegister(rhs));
-+ define(lir, mul);
-+ return;
-+ }
-+ lowerForALU(lir, mul, lhs, rhs);
-+}
-+
-+void LIRGeneratorPPC64::lowerDivI(MDiv* div) {
-+ if (div->rhs()->isConstant()) {
-+ int32_t rhs = div->rhs()->toConstant()->toInt32();
-+ int32_t shift = FloorLog2(uint32_t(rhs));
-+ if (rhs > 0 && 1 << shift == rhs) {
-+ LDivPowTwoI* lir =
-+ new (alloc()) LDivPowTwoI(useRegister(div->lhs()), shift);
-+ if (div->fallible()) {
-+ assignSnapshot(lir, div->bailoutKind());
-+ }
-+ define(lir, div);
-+ return;
-+ }
-+ }
-+ LDivI* lir = new (alloc())
-+ LDivI(useRegister(div->lhs()), useRegister(div->rhs()), temp());
-+ if (div->fallible()) {
-+ assignSnapshot(lir, div->bailoutKind());
-+ }
-+ define(lir, div);
-+}
-+
-+void LIRGeneratorPPC64::lowerDivI64(MDiv* div) {
-+ auto* lir = new (alloc())
-+ LDivOrModI64(useRegister(div->lhs()), useRegister(div->rhs()));
-+ defineInt64(lir, div);
-+}
-+
-+void LIRGeneratorPPC64::lowerModI(MMod* mod) {
-+ if (mod->rhs()->isConstant()) {
-+ int32_t rhs = mod->rhs()->toConstant()->toInt32();
-+ int32_t shift = FloorLog2(uint32_t(rhs));
-+ if (rhs > 0 && 1 << shift == rhs) {
-+ LModPowTwoI* lir =
-+ new (alloc()) LModPowTwoI(useRegister(mod->lhs()), shift);
-+ if (mod->fallible()) {
-+ assignSnapshot(lir, mod->bailoutKind());
-+ }
-+ define(lir, mod);
-+ return;
-+ } else if (shift < 31 && (1 << (shift + 1)) - 1 == rhs) {
-+ LModMaskI* lir = new (alloc())
-+ LModMaskI(useRegister(mod->lhs()), temp(), temp(), shift + 1);
-+ if (mod->fallible()) {
-+ assignSnapshot(lir, mod->bailoutKind());
-+ }
-+ define(lir, mod);
-+ return;
-+ }
-+ }
-+ auto* lir =
-+ new (alloc()) LModI(useRegister(mod->lhs()), useRegister(mod->rhs()));
-+ if (mod->fallible()) {
-+ assignSnapshot(lir, mod->bailoutKind());
-+ }
-+ define(lir, mod);
-+}
-+
-+void LIRGeneratorPPC64::lowerModI64(MMod* mod) {
-+ auto* lir = new (alloc())
-+ LDivOrModI64(useRegister(mod->lhs()), useRegister(mod->rhs()));
-+ defineInt64(lir, mod);
-+}
-+
-+void LIRGeneratorPPC64::lowerUDiv(MDiv* div) {
-+ MDefinition* lhs = div->getOperand(0);
-+ MDefinition* rhs = div->getOperand(1);
-+ LUDivOrMod* lir = new (alloc()) LUDivOrMod;
-+ // useRegisterAtStart: CodeGenerator-ppc64's visitUDivOrMod zero-extends
-+ // lhs/rhs into their own slots in place before the 32-bit divwu, so the
-+ // inputs must not be required live after the LIR op begins.
-+ lir->setOperand(0, useRegisterAtStart(lhs));
-+ lir->setOperand(1, useRegisterAtStart(rhs));
-+ if (div->fallible()) {
-+ assignSnapshot(lir, div->bailoutKind());
-+ }
-+ define(lir, div);
-+}
-+
-+void LIRGeneratorPPC64::lowerUDivI64(MDiv* div) {
-+ auto* lir = new (alloc())
-+ LUDivOrModI64(useRegister(div->lhs()), useRegister(div->rhs()));
-+ defineInt64(lir, div);
-+}
-+
-+void LIRGeneratorPPC64::lowerUMod(MMod* mod) {
-+ MDefinition* lhs = mod->getOperand(0);
-+ MDefinition* rhs = mod->getOperand(1);
-+ LUDivOrMod* lir = new (alloc()) LUDivOrMod;
-+ // See lowerUDiv above for why useRegisterAtStart is required here.
-+ lir->setOperand(0, useRegisterAtStart(lhs));
-+ lir->setOperand(1, useRegisterAtStart(rhs));
-+ if (mod->fallible()) {
-+ assignSnapshot(lir, mod->bailoutKind());
-+ }
-+ define(lir, mod);
-+}
-+
-+void LIRGeneratorPPC64::lowerUModI64(MMod* mod) {
-+ auto* lir = new (alloc())
-+ LUDivOrModI64(useRegister(mod->lhs()), useRegister(mod->rhs()));
-+ defineInt64(lir, mod);
-+}
-+
-+void LIRGeneratorPPC64::lowerUrshD(MUrsh* mir) {
-+ MDefinition* lhs = mir->lhs();
-+ MDefinition* rhs = mir->rhs();
-+ MOZ_ASSERT(lhs->type() == MIRType::Int32);
-+ MOZ_ASSERT(rhs->type() == MIRType::Int32);
-+ auto* lir = new (alloc()) LUrshD(useRegisterAtStart(lhs),
-+ useRegisterOrConstantAtStart(rhs), temp());
-+ define(lir, mir);
-+}
-+
-+void LIRGeneratorPPC64::lowerPowOfTwoI(MPow* mir) {
-+ int32_t base = mir->input()->toConstant()->toInt32();
-+ MDefinition* power = mir->power();
-+ auto* lir = new (alloc()) LPowOfTwoI(useRegister(power), base);
-+ assignSnapshot(lir, mir->bailoutKind());
-+ define(lir, mir);
-+}
-+
-+void LIRGeneratorPPC64::lowerBigIntPtrDiv(MBigIntPtrDiv* ins) {
-+ auto* lir = new (alloc())
-+ LBigIntPtrDiv(useRegister(ins->lhs()), useRegister(ins->rhs()),
-+ LDefinition::BogusTemp(), LDefinition::BogusTemp());
-+ assignSnapshot(lir, ins->bailoutKind());
-+ define(lir, ins);
-+}
-+
-+void LIRGeneratorPPC64::lowerBigIntPtrMod(MBigIntPtrMod* ins) {
-+ auto* lir = new (alloc())
-+ LBigIntPtrMod(useRegister(ins->lhs()), useRegister(ins->rhs()), temp(),
-+ LDefinition::BogusTemp());
-+ if (ins->canBeDivideByZero()) {
-+ assignSnapshot(lir, ins->bailoutKind());
-+ }
-+ define(lir, ins);
-+}
-+
-+void LIRGeneratorPPC64::lowerBigIntPtrLsh(MBigIntPtrLsh* ins) {
-+ auto* lir = new (alloc()) LBigIntPtrLsh(
-+ useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), temp());
-+ assignSnapshot(lir, ins->bailoutKind());
-+ define(lir, ins);
-+}
-+
-+void LIRGeneratorPPC64::lowerBigIntPtrRsh(MBigIntPtrRsh* ins) {
-+ auto* lir = new (alloc()) LBigIntPtrRsh(
-+ useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), temp());
-+ assignSnapshot(lir, ins->bailoutKind());
-+ define(lir, ins);
-+}
-+
-+void LIRGeneratorPPC64::lowerTruncateDToInt32(MTruncateToInt32* ins) {
-+ MDefinition* opd = ins->input();
-+ MOZ_ASSERT(opd->type() == MIRType::Double);
-+ define(new (alloc()) LTruncateDToInt32(useRegister(opd), tempDouble()), ins);
-+}
-+
-+void LIRGeneratorPPC64::lowerTruncateFToInt32(MTruncateToInt32* ins) {
-+ MDefinition* opd = ins->input();
-+ MOZ_ASSERT(opd->type() == MIRType::Float32);
-+ define(new (alloc()) LTruncateFToInt32(useRegister(opd), tempFloat32()), ins);
-+}
-+
-+void LIRGeneratorPPC64::lowerBuiltinInt64ToFloatingPoint(
-+ MBuiltinInt64ToFloatingPoint* ins) {
-+ MOZ_CRASH("We don't use it for this architecture");
-+}
-+
-+void LIRGeneratorPPC64::lowerWasmSelectI(MWasmSelect* select) {
-+ auto* lir = new (alloc())
-+ LWasmSelect(useRegisterAtStart(select->trueExpr()),
-+ useAny(select->falseExpr()), useRegister(select->condExpr()));
-+ defineReuseInput(lir, select, LWasmSelect::TrueExprIndex);
-+}
-+
-+void LIRGeneratorPPC64::lowerWasmSelectI64(MWasmSelect* select) {
-+ auto* lir = new (alloc()) LWasmSelectI64(
-+ useInt64RegisterAtStart(select->trueExpr()),
-+ useInt64(select->falseExpr()), useRegister(select->condExpr()));
-+ defineInt64ReuseInput(lir, select, LWasmSelectI64::TrueExprIndex);
-+}
-+
-+void LIRGeneratorPPC64::lowerWasmBuiltinTruncateToInt32(
-+ MWasmBuiltinTruncateToInt32* ins) {
-+ MDefinition* opd = ins->input();
-+ MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
-+
-+ if (opd->type() == MIRType::Double) {
-+ define(new (alloc()) LWasmBuiltinTruncateDToInt32(
-+ useRegister(opd), useFixed(ins->instance(), InstanceReg),
-+ LDefinition::BogusTemp()),
-+ ins);
-+ return;
-+ }
-+
-+ define(new (alloc()) LWasmBuiltinTruncateFToInt32(
-+ useRegister(opd), useFixed(ins->instance(), InstanceReg),
-+ LDefinition::BogusTemp()),
-+ ins);
-+}
-+
-+void LIRGeneratorPPC64::lowerWasmBuiltinTruncateToInt64(
-+ MWasmBuiltinTruncateToInt64* ins) {
-+ MOZ_CRASH("We don't use it for this architecture");
-+}
-+
-+void LIRGeneratorPPC64::lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div) {
-+ MOZ_CRASH("We don't use runtime div for this architecture");
-+}
-+
-+void LIRGeneratorPPC64::lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod) {
-+ MOZ_CRASH("We don't use runtime mod for this architecture");
-+}
-+
-+void LIRGeneratorPPC64::lowerAtomicLoad64(MLoadUnboxedScalar* ins) {
-+ const LUse elements = useRegister(ins->elements());
-+ const LAllocation index =
-+ useRegisterOrIndexConstant(ins->index(), ins->storageType());
-+ auto* lir = new (alloc()) LAtomicLoad64(elements, index);
-+ defineInt64(lir, ins);
-+}
-+
-+void LIRGeneratorPPC64::lowerAtomicStore64(MStoreUnboxedScalar* ins) {
-+ LUse elements = useRegister(ins->elements());
-+ LAllocation index =
-+ useRegisterOrIndexConstant(ins->index(), ins->writeType());
-+ LInt64Allocation value = useInt64Register(ins->value());
-+ add(new (alloc()) LAtomicStore64(elements, index, value), ins);
-+}
-+
-+// ===============================================================
-+// LIRGenerator::visit* implementations
-+
-+void LIRGenerator::visitBox(MBox* box) {
-+ MDefinition* opd = box->getOperand(0);
-+
-+ if (opd->isConstant() && box->canEmitAtUses()) {
-+ emitAtUses(box);
-+ return;
-+ }
-+
-+ if (opd->isConstant()) {
-+ define(new (alloc()) LValue(opd->toConstant()->toJSValue()), box,
-+ LDefinition(LDefinition::BOX));
-+ } else {
-+ LBox* ins = new (alloc()) LBox(useRegisterAtStart(opd), opd->type());
-+ define(ins, box, LDefinition(LDefinition::BOX));
-+ }
-+}
-+
-+void LIRGenerator::visitUnbox(MUnbox* unbox) {
-+ MDefinition* box = unbox->getOperand(0);
-+ MOZ_ASSERT(box->type() == MIRType::Value);
-+
-+ LInstructionHelper<1, BOX_PIECES, 0>* lir;
-+ if (IsFloatingPointType(unbox->type())) {
-+ MOZ_ASSERT(unbox->type() == MIRType::Double);
-+ lir = new (alloc()) LUnboxFloatingPoint(useBoxAtStart(box));
-+ } else if (unbox->fallible()) {
-+ lir = new (alloc()) LUnbox(useRegisterAtStart(box));
-+ } else {
-+ lir = new (alloc()) LUnbox(useAtStart(box));
-+ }
-+
-+ if (unbox->fallible()) {
-+ assignSnapshot(lir, unbox->bailoutKind());
-+ }
-+
-+ define(lir, unbox);
-+}
-+
-+void LIRGenerator::visitCopySign(MCopySign* ins) {
-+ MDefinition* lhs = ins->lhs();
-+ MDefinition* rhs = ins->rhs();
-+
-+ MOZ_ASSERT(IsFloatingPointType(lhs->type()));
-+ MOZ_ASSERT(lhs->type() == rhs->type());
-+ MOZ_ASSERT(lhs->type() == ins->type());
-+
-+ LInstructionHelper<1, 2, 0>* lir;
-+ if (lhs->type() == MIRType::Double) {
-+ lir = new (alloc()) LCopySignD();
-+ } else {
-+ lir = new (alloc()) LCopySignF();
-+ }
-+
-+ lowerForFPU(lir, ins, lhs, rhs);
-+}
-+
-+void LIRGenerator::visitExtendInt32ToInt64(MExtendInt32ToInt64* ins) {
-+ defineInt64(
-+ new (alloc()) LExtendInt32ToInt64(useRegisterAtStart(ins->input())), ins);
-+}
-+
-+void LIRGenerator::visitSignExtendInt64(MSignExtendInt64* ins) {
-+ defineInt64(new (alloc())
-+ LSignExtendInt64(useInt64RegisterAtStart(ins->input())),
-+ ins);
-+}
-+
-+void LIRGenerator::visitInt64ToFloatingPoint(MInt64ToFloatingPoint* ins) {
-+ MDefinition* opd = ins->input();
-+ MOZ_ASSERT(opd->type() == MIRType::Int64);
-+ MOZ_ASSERT(IsFloatingPointType(ins->type()));
-+ define(new (alloc()) LInt64ToFloatingPoint(useInt64Register(opd)), ins);
-+}
-+
-+void LIRGenerator::visitSubstr(MSubstr* ins) {
-+ LSubstr* lir = new (alloc())
-+ LSubstr(useRegister(ins->string()), useRegister(ins->begin()),
-+ useRegister(ins->length()), temp(), temp(), temp());
-+ define(lir, ins);
-+ assignSafepoint(lir, ins);
-+}
-+
-+void LIRGenerator::visitReturnImpl(MDefinition* opd, bool isGenerator) {
-+ MOZ_ASSERT(opd->type() == MIRType::Value);
-+ LReturn* ins = new (alloc()) LReturn(isGenerator);
-+ ins->setOperand(0, useFixed(opd, JSReturnReg));
-+ add(ins);
-+}
-+void LIRGenerator::visitCompareExchangeTypedArrayElement(
-+ MCompareExchangeTypedArrayElement* ins) {
-+ MOZ_ASSERT(!Scalar::isFloatingType(ins->arrayType()));
-+ MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
-+ MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
-+
-+ const LUse elements = useRegister(ins->elements());
-+ const LAllocation index =
-+ useRegisterOrIndexConstant(ins->index(), ins->arrayType());
-+
-+ if (Scalar::isBigIntType(ins->arrayType())) {
-+ LInt64Allocation oldval = useInt64Register(ins->oldval());
-+ LInt64Allocation newval = useInt64Register(ins->newval());
-+
-+ auto* lir = new (alloc())
-+ LCompareExchangeTypedArrayElement64(elements, index, oldval, newval);
-+ defineInt64(lir, ins);
-+ return;
-+ }
-+
-+ const LAllocation oldval = useRegister(ins->oldval());
-+ const LAllocation newval = useRegister(ins->newval());
-+
-+ LDefinition outTemp = LDefinition::BogusTemp();
-+ LDefinition valueTemp = LDefinition::BogusTemp();
-+ LDefinition offsetTemp = LDefinition::BogusTemp();
-+ LDefinition maskTemp = LDefinition::BogusTemp();
-+
-+ if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
-+ outTemp = temp();
-+ }
-+
-+ if (Scalar::byteSize(ins->arrayType()) < 4) {
-+ // PPC64 sub-word CAS uses lbarx/lharx + stbcx./sthcx. (POWER7+); only
-+ // valueTemp is needed, to hold the extsb/extsh-canonicalised oldval
-+ // for the 32-bit cmpw. offsetTemp/maskTemp are unused (no round-down
-+ // + bit-isolate dance), and remain BogusTemp.
-+ valueTemp = temp();
-+ }
-+
-+ LCompareExchangeTypedArrayElement* lir = new (alloc())
-+ LCompareExchangeTypedArrayElement(elements, index, oldval, newval,
-+ outTemp, valueTemp, offsetTemp,
-+ maskTemp);
-+
-+ define(lir, ins);
-+}
-+
-+void LIRGenerator::visitAtomicExchangeTypedArrayElement(
-+ MAtomicExchangeTypedArrayElement* ins) {
-+ MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
-+ MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
-+
-+ const LUse elements = useRegister(ins->elements());
-+ const LAllocation index =
-+ useRegisterOrIndexConstant(ins->index(), ins->arrayType());
-+
-+ if (Scalar::isBigIntType(ins->arrayType())) {
-+ LInt64Allocation value = useInt64Register(ins->value());
-+
-+ auto* lir = new (alloc())
-+ LAtomicExchangeTypedArrayElement64(elements, index, value);
-+ defineInt64(lir, ins);
-+ return;
-+ }
-+
-+ MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32);
-+
-+ const LAllocation value = useRegister(ins->value());
-+
-+ LDefinition outTemp = LDefinition::BogusTemp();
-+ LDefinition valueTemp = LDefinition::BogusTemp();
-+ LDefinition offsetTemp = LDefinition::BogusTemp();
-+ LDefinition maskTemp = LDefinition::BogusTemp();
-+
-+ if (ins->arrayType() == Scalar::Uint32) {
-+ MOZ_ASSERT(ins->type() == MIRType::Double);
-+ outTemp = temp();
-+ }
-+
-+ // PPC64 sub-word atomic exchange uses lbarx/lharx + stbcx./sthcx. directly
-+ // (POWER7+); valueTemp/offsetTemp/maskTemp are never read by the
-+ // implementation (see MacroAssembler-ppc64.cpp's AtomicExchange template).
-+ // Leave them as BogusTemp.
-+
-+ LAtomicExchangeTypedArrayElement* lir =
-+ new (alloc()) LAtomicExchangeTypedArrayElement(
-+ elements, index, value, outTemp, valueTemp, offsetTemp, maskTemp);
-+
-+ define(lir, ins);
-+}
-+
-+void LIRGenerator::visitAtomicTypedArrayElementBinop(
-+ MAtomicTypedArrayElementBinop* ins) {
-+ MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
-+ MOZ_ASSERT(!Scalar::isFloatingType(ins->arrayType()));
-+ MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
-+ MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);
-+
-+ const LUse elements = useRegister(ins->elements());
-+ const LAllocation index =
-+ useRegisterOrIndexConstant(ins->index(), ins->arrayType());
-+
-+ if (Scalar::isBigIntType(ins->arrayType())) {
-+ LInt64Allocation value = useInt64Register(ins->value());
-+ LInt64Definition temp = tempInt64();
-+
-+ if (ins->isForEffect()) {
-+ auto* lir = new (alloc()) LAtomicTypedArrayElementBinopForEffect64(
-+ elements, index, value, temp);
-+ add(lir, ins);
-+ return;
-+ }
-+
-+ auto* lir = new (alloc())
-+ LAtomicTypedArrayElementBinop64(elements, index, value, temp);
-+ defineInt64(lir, ins);
-+ return;
-+ }
-+
-+ LAllocation value = useRegister(ins->value());
-+ LDefinition valueTemp = LDefinition::BogusTemp();
-+ LDefinition offsetTemp = LDefinition::BogusTemp();
-+ LDefinition maskTemp = LDefinition::BogusTemp();
-+
-+ // PPC64 sub-word atomic-binop uses lbarx/lharx + stbcx./sthcx. (POWER7+).
-+ // The fetch-op variant needs valueTemp to hold the post-op value being
-+ // condition-stored (MacroAssembler-ppc64.cpp's AtomicFetchOp); the
-+ // for-effect variant uses an internal scratch and needs no temps at
-+ // all. offsetTemp/maskTemp are unused in either path.
-+ if (Scalar::byteSize(ins->arrayType()) < 4 && !ins->isForEffect()) {
-+ valueTemp = temp();
-+ }
-+
-+ if (ins->isForEffect()) {
-+ LAtomicTypedArrayElementBinopForEffect* lir =
-+ new (alloc()) LAtomicTypedArrayElementBinopForEffect(
-+ elements, index, value, valueTemp, offsetTemp, maskTemp);
-+ add(lir, ins);
-+ return;
-+ }
-+
-+ LDefinition outTemp = LDefinition::BogusTemp();
-+
-+ if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
-+ outTemp = temp();
-+ }
-+
-+ LAtomicTypedArrayElementBinop* lir =
-+ new (alloc()) LAtomicTypedArrayElementBinop(
-+ elements, index, value, outTemp, valueTemp, offsetTemp, maskTemp);
-+ define(lir, ins);
-+}
-+void LIRGenerator::visitAsmJSLoadHeap(MAsmJSLoadHeap* ins) {
-+ MDefinition* base = ins->base();
-+ MOZ_ASSERT(base->type() == MIRType::Int32);
-+
-+ MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
-+ MOZ_ASSERT_IF(ins->needsBoundsCheck(),
-+ boundsCheckLimit->type() == MIRType::Int32);
-+
-+ LAllocation baseAlloc = useRegisterAtStart(base);
-+
-+ LAllocation limitAlloc = ins->needsBoundsCheck()
-+ ? useRegisterAtStart(boundsCheckLimit)
-+ : LAllocation();
-+
-+ MOZ_ASSERT(!ins->hasMemoryBase());
-+ auto* lir =
-+ new (alloc()) LAsmJSLoadHeap(baseAlloc, limitAlloc, LAllocation());
-+ define(lir, ins);
-+}
-+void LIRGenerator::visitAsmJSStoreHeap(MAsmJSStoreHeap* ins) {
-+ MDefinition* base = ins->base();
-+ MOZ_ASSERT(base->type() == MIRType::Int32);
-+
-+ MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
-+ MOZ_ASSERT_IF(ins->needsBoundsCheck(),
-+ boundsCheckLimit->type() == MIRType::Int32);
-+
-+ LAllocation baseAlloc = useRegisterAtStart(base);
-+
-+ LAllocation limitAlloc = ins->needsBoundsCheck()
-+ ? useRegisterAtStart(boundsCheckLimit)
-+ : LAllocation();
-+
-+ MOZ_ASSERT(!ins->hasMemoryBase());
-+ add(new (alloc()) LAsmJSStoreHeap(baseAlloc, useRegisterAtStart(ins->value()),
-+ limitAlloc, LAllocation()),
-+ ins);
-+}
-+void LIRGenerator::visitWasmLoad(MWasmLoad* ins) {
-+ MDefinition* base = ins->base();
-+ MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64);
-+
-+ LAllocation memoryBase =
-+ ins->hasMemoryBase() ? LAllocation(useRegisterAtStart(ins->memoryBase()))
-+ : LGeneralReg(HeapReg);
-+
-+ LAllocation ptr = useRegisterAtStart(base);
-+
-+ LDefinition ptrCopy = LDefinition::BogusTemp();
-+ if (ins->access().offset32()) {
-+ ptrCopy = tempCopy(base, 0);
-+ }
-+
-+ if (ins->type() == MIRType::Int64) {
-+ auto* lir = new (alloc()) LWasmLoadI64(ptr, memoryBase, ptrCopy);
-+ defineInt64(lir, ins);
-+ return;
-+ }
-+
-+ auto* lir = new (alloc()) LWasmLoad(ptr, memoryBase, ptrCopy);
-+ define(lir, ins);
-+}
-+void LIRGenerator::visitWasmStore(MWasmStore* ins) {
-+ MDefinition* base = ins->base();
-+ MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64);
-+
-+ MDefinition* value = ins->value();
-+ LAllocation memoryBase =
-+ ins->hasMemoryBase() ? LAllocation(useRegisterAtStart(ins->memoryBase()))
-+ : LGeneralReg(HeapReg);
-+
-+ LAllocation baseAlloc = useRegisterAtStart(base);
-+
-+ LDefinition ptrCopy = LDefinition::BogusTemp();
-+ if (ins->access().offset32()) {
-+ ptrCopy = tempCopy(base, 0);
-+ }
-+
-+ if (ins->access().type() == Scalar::Int64) {
-+ LInt64Allocation valueAlloc = useInt64RegisterAtStart(value);
-+ auto* lir =
-+ new (alloc()) LWasmStoreI64(baseAlloc, valueAlloc, memoryBase, ptrCopy);
-+ add(lir, ins);
-+ return;
-+ }
-+
-+ LAllocation valueAlloc = useRegisterAtStart(value);
-+ auto* lir =
-+ new (alloc()) LWasmStore(baseAlloc, valueAlloc, memoryBase, ptrCopy);
-+ add(lir, ins);
-+}
-+void LIRGenerator::visitWasmTruncateToInt64(MWasmTruncateToInt64* ins) {
-+ MDefinition* opd = ins->input();
-+ MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
-+
-+ defineInt64(new (alloc()) LWasmTruncateToInt64(useRegister(opd)), ins);
-+}
-+void LIRGenerator::visitWasmUnsignedToDouble(MWasmUnsignedToDouble* ins) {
-+ MOZ_ASSERT(ins->input()->type() == MIRType::Int32);
-+ LWasmUint32ToDouble* lir =
-+ new (alloc()) LWasmUint32ToDouble(useRegisterAtStart(ins->input()));
-+ define(lir, ins);
-+}
-+void LIRGenerator::visitWasmUnsignedToFloat32(MWasmUnsignedToFloat32* ins) {
-+ MOZ_ASSERT(ins->input()->type() == MIRType::Int32);
-+ LWasmUint32ToFloat32* lir =
-+ new (alloc()) LWasmUint32ToFloat32(useRegisterAtStart(ins->input()));
-+ define(lir, ins);
-+}
-+void LIRGenerator::visitWasmCompareExchangeHeap(MWasmCompareExchangeHeap* ins) {
-+ MDefinition* base = ins->base();
-+ MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64);
-+ LAllocation memoryBase = ins->hasMemoryBase()
-+ ? LAllocation(useRegister(ins->memoryBase()))
-+ : LGeneralReg(HeapReg);
-+
-+ if (ins->access().type() == Scalar::Int64) {
-+ auto* lir = new (alloc()) LWasmCompareExchangeI64(
-+ useRegister(base), useInt64Register(ins->oldValue()),
-+ useInt64Register(ins->newValue()), memoryBase);
-+ defineInt64(lir, ins);
-+ return;
-+ }
-+
-+ LDefinition valueTemp = LDefinition::BogusTemp();
-+ LDefinition offsetTemp = LDefinition::BogusTemp();
-+ LDefinition maskTemp = LDefinition::BogusTemp();
-+
-+ // PPC64 sub-word wasm CAS uses lbarx/lharx + stbcx./sthcx. (POWER7+);
-+ // valueTemp holds the extsb/extsh-canonicalised oldval for cmpw, while
-+ // offsetTemp/maskTemp are unused (no round-down + bit-isolate dance).
-+ if (ins->access().byteSize() < 4) {
-+ valueTemp = temp();
-+ }
-+
-+ auto* lir = new (alloc())
-+ LWasmCompareExchangeHeap(useRegister(base), useRegister(ins->oldValue()),
-+ useRegister(ins->newValue()), memoryBase,
-+ valueTemp, offsetTemp, maskTemp);
-+
-+ define(lir, ins);
-+}
-+void LIRGenerator::visitWasmAtomicExchangeHeap(MWasmAtomicExchangeHeap* ins) {
-+ MDefinition* base = ins->base();
-+ MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64);
-+ LAllocation memoryBase = ins->hasMemoryBase()
-+ ? LAllocation(useRegister(ins->memoryBase()))
-+ : LGeneralReg(HeapReg);
-+
-+ if (ins->access().type() == Scalar::Int64) {
-+ auto* lir = new (alloc()) LWasmAtomicExchangeI64(
-+ useRegister(base), useInt64Register(ins->value()), memoryBase);
-+ defineInt64(lir, ins);
-+ return;
-+ }
-+
-+ // PPC64 sub-word wasm atomic exchange uses lbarx/lharx + stbcx./sthcx.
-+ // (POWER7+); valueTemp/offsetTemp/maskTemp are never read by the
-+ // implementation (see MacroAssembler-ppc64.cpp's AtomicExchange template).
-+ // Pass BogusTemp for all three.
-+ LDefinition valueTemp = LDefinition::BogusTemp();
-+ LDefinition offsetTemp = LDefinition::BogusTemp();
-+ LDefinition maskTemp = LDefinition::BogusTemp();
-+
-+ auto* lir = new (alloc())
-+ LWasmAtomicExchangeHeap(useRegister(base), useRegister(ins->value()),
-+ memoryBase, valueTemp, offsetTemp, maskTemp);
-+ define(lir, ins);
-+}
-+void LIRGenerator::visitWasmAtomicBinopHeap(MWasmAtomicBinopHeap* ins) {
-+ MDefinition* base = ins->base();
-+ MOZ_ASSERT(base->type() == MIRType::Int32 || base->type() == MIRType::Int64);
-+ LAllocation memoryBase = ins->hasMemoryBase()
-+ ? LAllocation(useRegister(ins->memoryBase()))
-+ : LGeneralReg(HeapReg);
-+
-+ if (ins->access().type() == Scalar::Int64) {
-+ auto* lir = new (alloc())
-+ LWasmAtomicBinopI64(useRegister(base), useInt64Register(ins->value()),
-+ memoryBase, tempInt64());
-+ defineInt64(lir, ins);
-+ return;
-+ }
-+
-+ LDefinition valueTemp = LDefinition::BogusTemp();
-+ LDefinition offsetTemp = LDefinition::BogusTemp();
-+ LDefinition maskTemp = LDefinition::BogusTemp();
-+
-+ // PPC64 sub-word wasm atomic-binop uses lbarx/lharx + stbcx./sthcx.
-+ // (POWER7+). The fetch-op variant needs valueTemp for the post-op value
-+ // being condition-stored; the for-effect variant uses an internal
-+ // scratch and needs no temps at all. offsetTemp/maskTemp are unused
-+ // in either path.
-+ if (ins->access().byteSize() < 4 && ins->hasUses()) {
-+ valueTemp = temp();
-+ }
-+
-+ if (!ins->hasUses()) {
-+ LWasmAtomicBinopHeapForEffect* lir = new (alloc())
-+ LWasmAtomicBinopHeapForEffect(useRegister(base),
-+ useRegister(ins->value()), memoryBase,
-+ valueTemp, offsetTemp, maskTemp);
-+ add(lir, ins);
-+ return;
-+ }
-+
-+ auto* lir = new (alloc())
-+ LWasmAtomicBinopHeap(useRegister(base), useRegister(ins->value()),
-+ memoryBase, valueTemp, offsetTemp, maskTemp);
-+
-+ define(lir, ins);
-+}
-+
-+// SIMD lowering
-+void LIRGenerator::visitWasmTernarySimd128(MWasmTernarySimd128* ins) {
-+#ifdef ENABLE_WASM_SIMD
-+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
-+ // useRegister for v0/v1 and useRegisterAtStart only for v2 — matches
-+ // ARM64's V128Bitselect policy. defineReuseInput requires the reused
-+ // input to be useRegisterAtStart and the others to remain alive
-+ // (useRegister); reusing all three policies as useRegisterAtStart
-+ // trips the allocator's "*def->output() != alloc" assertion because
-+ // v0/v1 may then share the slot with the output.
-+ LDefinition temp0 = LDefinition::BogusTemp();
-+ if (ins->simdOp() == wasm::SimdOp::I32x4RelaxedDotI8x16I7x16AddS) {
-+ temp0 = tempSimd128();
-+ }
-+ auto* lir = new (alloc()) LWasmTernarySimd128(
-+ useRegister(ins->v0()), useRegister(ins->v1()),
-+ useRegisterAtStart(ins->v2()), temp0,
-+ ins->simdOp());
-+ // The PPC64 visitor (CodeGenerator-ppc64.cpp:visitWasmTernarySimd128)
-+ // emits the FMA / DOT_THEN_ADD chain with v2 as the implicit
-+ // accumulator. defineReuseInput tells the allocator to put `dest`
-+ // in v2's slot, eliminating the previous conditional moveSimd128.
-+ defineReuseInput(lir, ins, LWasmTernarySimd128::V2Index);
-+#else
-+ MOZ_CRASH("No SIMD");
-+#endif
-+}
-+void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) {
-+#ifdef ENABLE_WASM_SIMD
-+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
-+ LDefinition temp0 = LDefinition::BogusTemp();
-+ LDefinition temp1 = LDefinition::BogusTemp();
-+ // mulInt64x2 (i64x2.mul) routes through GPRs (mfvsrd/mulld/mtvsrd) and
-+ // uses an internal ScratchSimd128 + GPR scratches; its FloatRegister
-+ // temp1/temp2 parameters are inherited from the shared ARM64+PPC64
-+ // signature but unused on PPC64. Only FP min/max need SIMD temps for
-+ // the wasm NaN-canonicalisation dance.
-+ if (ins->simdOp() == wasm::SimdOp::F32x4Min ||
-+ ins->simdOp() == wasm::SimdOp::F32x4Max ||
-+ ins->simdOp() == wasm::SimdOp::F64x2Min ||
-+ ins->simdOp() == wasm::SimdOp::F64x2Max) {
-+ temp0 = tempSimd128();
-+ temp1 = tempSimd128();
-+ }
-+ auto* lir = new (alloc()) LWasmBinarySimd128(
-+ useRegisterAtStart(ins->lhs()), useRegisterAtStart(ins->rhs()),
-+ temp0, temp1, ins->simdOp());
-+ define(lir, ins);
-+#else
-+ MOZ_CRASH("No SIMD");
-+#endif
-+}
-+void LIRGenerator::visitWasmBinarySimd128WithConstant(
-+ MWasmBinarySimd128WithConstant* ins) {
-+#ifdef ENABLE_WASM_SIMD
-+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
-+ auto* lir = new (alloc()) LWasmBinarySimd128WithConstant(
-+ useRegisterAtStart(ins->lhs()), LDefinition::BogusTemp(), ins->rhs());
-+ define(lir, ins);
-+#else
-+ MOZ_CRASH("No SIMD");
-+#endif
-+}
-+void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) {
-+#ifdef ENABLE_WASM_SIMD
-+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
-+ MOZ_ASSERT(ins->rhs()->type() == MIRType::Int32);
-+
-+ if (ins->rhs()->isConstant()) {
-+ int32_t shiftCountMask;
-+ switch (ins->simdOp()) {
-+ case wasm::SimdOp::I8x16Shl:
-+ case wasm::SimdOp::I8x16ShrU:
-+ case wasm::SimdOp::I8x16ShrS:
-+ shiftCountMask = 7;
-+ break;
-+ case wasm::SimdOp::I16x8Shl:
-+ case wasm::SimdOp::I16x8ShrU:
-+ case wasm::SimdOp::I16x8ShrS:
-+ shiftCountMask = 15;
-+ break;
-+ case wasm::SimdOp::I32x4Shl:
-+ case wasm::SimdOp::I32x4ShrU:
-+ case wasm::SimdOp::I32x4ShrS:
-+ shiftCountMask = 31;
-+ break;
-+ case wasm::SimdOp::I64x2Shl:
-+ case wasm::SimdOp::I64x2ShrU:
-+ case wasm::SimdOp::I64x2ShrS:
-+ shiftCountMask = 63;
-+ break;
-+ default:
-+ MOZ_CRASH("Unexpected shift operation");
-+ }
-+ int32_t shiftCount = ins->rhs()->toConstant()->toInt32() & shiftCountMask;
-+#ifdef DEBUG
-+ js::wasm::ReportSimdAnalysis("shift -> constant shift");
-+#endif
-+ auto* lir = new (alloc())
-+ LWasmConstantShiftSimd128(useRegisterAtStart(ins->lhs()), shiftCount);
-+ define(lir, ins);
-+ } else {
-+#ifdef DEBUG
-+ js::wasm::ReportSimdAnalysis("shift -> variable shift");
-+#endif
-+ auto* lir = new (alloc()) LWasmVariableShiftSimd128(
-+ useRegisterAtStart(ins->lhs()), useRegisterAtStart(ins->rhs()));
-+ define(lir, ins);
-+ }
-+#else
-+ MOZ_CRASH("No SIMD");
-+#endif
-+}
-+#ifdef ENABLE_WASM_SIMD
-+// Helper: reconstruct raw Wasm byte lane indices from analyzed SimdShuffle.
-+static SimdConstant ReconstructShuffleBytes(const SimdShuffle& s) {
-+ int8_t bytes[16];
-+ if (s.permuteOp) {
-+ switch (*s.permuteOp) {
-+ case SimdPermuteOp::MOVE:
-+ for (int i = 0; i < 16; i++) bytes[i] = i;
-+ return SimdConstant::CreateX16(bytes);
-+ case SimdPermuteOp::PERMUTE_32x4: {
-+ const int32_t* w = reinterpret_cast<const int32_t*>(s.control.bytes());
-+ for (int i = 0; i < 4; i++)
-+ for (int j = 0; j < 4; j++) bytes[i*4+j] = w[i]*4+j;
-+ return SimdConstant::CreateX16(bytes);
-+ }
-+ case SimdPermuteOp::PERMUTE_16x8: {
-+ const int16_t* h = reinterpret_cast<const int16_t*>(s.control.bytes());
-+ for (int i = 0; i < 8; i++) {
-+ int idx = h[i] & 0x7;
-+ bytes[i*2] = idx*2;
-+ bytes[i*2+1] = idx*2+1;
-+ }
-+ return SimdConstant::CreateX16(bytes);
-+ }
-+ case SimdPermuteOp::BROADCAST_8x16: {
-+ int8_t lane = reinterpret_cast<const int8_t*>(s.control.bytes())[0];
-+ for (int i = 0; i < 16; i++) bytes[i] = lane;
-+ return SimdConstant::CreateX16(bytes);
-+ }
-+ case SimdPermuteOp::BROADCAST_16x8: {
-+ int8_t lane = reinterpret_cast<const int8_t*>(s.control.bytes())[0];
-+ for (int i = 0; i < 8; i++) {
-+ bytes[i*2] = lane*2; bytes[i*2+1] = lane*2+1;
-+ }
-+ return SimdConstant::CreateX16(bytes);
-+ }
-+ case SimdPermuteOp::ROTATE_RIGHT_8x16: {
-+ uint8_t shift = reinterpret_cast<const int8_t*>(s.control.bytes())[0];
-+ for (int i = 0; i < 16; i++) bytes[i] = (i + shift) % 16;
-+ return SimdConstant::CreateX16(bytes);
-+ }
-+ case SimdPermuteOp::SHIFT_RIGHT_8x16: {
-+ uint8_t shift = reinterpret_cast<const int8_t*>(s.control.bytes())[0];
-+ for (int i = 0; i < 16; i++) bytes[i] = (i+shift < 16) ? (i+shift) : 0;
-+ return SimdConstant::CreateX16(bytes);
-+ }
-+ case SimdPermuteOp::SHIFT_LEFT_8x16: {
-+ uint8_t shift = reinterpret_cast<const int8_t*>(s.control.bytes())[0];
-+ for (int i = 0; i < 16; i++) bytes[i] = (i >= shift) ? (i-shift) : 0;
-+ return SimdConstant::CreateX16(bytes);
-+ }
-+ case SimdPermuteOp::REVERSE_16x8:
-+ // Reverse bytes within each 16-bit lane: [1,0,3,2,5,4,...]
-+ for (int i = 0; i < 8; i++) {
-+ bytes[i*2] = i*2+1; bytes[i*2+1] = i*2;
-+ }
-+ return SimdConstant::CreateX16(bytes);
-+ case SimdPermuteOp::REVERSE_32x4:
-+ // Reverse bytes within each 32-bit lane: [3,2,1,0,7,6,5,4,...]
-+ for (int i = 0; i < 4; i++)
-+ for (int j = 0; j < 4; j++) bytes[i*4+j] = i*4+(3-j);
-+ return SimdConstant::CreateX16(bytes);
-+ case SimdPermuteOp::REVERSE_64x2:
-+ // Reverse bytes within each 64-bit lane: [7,6,5,4,3,2,1,0,15,...]
-+ for (int i = 0; i < 2; i++)
-+ for (int j = 0; j < 8; j++) bytes[i*8+j] = i*8+(7-j);
-+ return SimdConstant::CreateX16(bytes);
-+ default:
-+ break;
-+ }
-+ }
-+ // Handle SimdShuffleOp (two-operand patterns).
-+ if (s.shuffleOp) {
-+ switch (*s.shuffleOp) {
-+ case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16: {
-+ // control[0] = suffix length. ARM64 uses 16-count as the EXT shift.
-+ // Reconstruct raw byte indices: EXT(rhs, lhs, 16-count) =
-+ // take (16-count) bytes from rhs end, then count bytes from lhs start.
-+ uint8_t count = 16 - reinterpret_cast<const int8_t*>(s.control.bytes())[0];
-+ for (int i = 0; i < 16; i++) {
-+ int idx = i + count;
-+ bytes[i] = (idx < 16) ? (idx + 16) : (idx - 16);
-+ }
-+ return SimdConstant::CreateX16(bytes);
-+ }
-+ case SimdShuffleOp::BLEND_8x16: {
-+ // control has 0 (lhs) or -1 (rhs) per byte.
-+ const int8_t* mask = reinterpret_cast<const int8_t*>(s.control.bytes());
-+ for (int i = 0; i < 16; i++)
-+ bytes[i] = mask[i] ? (i + 16) : i;
-+ return SimdConstant::CreateX16(bytes);
-+ }
-+ case SimdShuffleOp::BLEND_16x8: {
-+ const int16_t* mask = reinterpret_cast<const int16_t*>(s.control.bytes());
-+ for (int i = 0; i < 8; i++) {
-+ int base = mask[i] ? (i * 2 + 16) : (i * 2);
-+ bytes[i * 2] = base;
-+ bytes[i * 2 + 1] = base + 1;
-+ }
-+ return SimdConstant::CreateX16(bytes);
-+ }
-+#define INTERLEAVE(name, width, low_start, count) \
-+ case SimdShuffleOp::name: { \
-+ for (int i = 0; i < count; i++) { \
-+ int lhsIdx = low_start + i * width; \
-+ int rhsIdx = lhsIdx + 16; \
-+ for (int j = 0; j < width; j++) { \
-+ bytes[(i * 2) * width + j] = lhsIdx + j; \
-+ bytes[(i * 2 + 1) * width + j] = rhsIdx + j; \
-+ } \
-+ } \
-+ return SimdConstant::CreateX16(bytes); \
-+ }
-+ INTERLEAVE(INTERLEAVE_LOW_8x16, 1, 0, 8)
-+ INTERLEAVE(INTERLEAVE_HIGH_8x16, 1, 8, 8)
-+ INTERLEAVE(INTERLEAVE_LOW_16x8, 2, 0, 4)
-+ INTERLEAVE(INTERLEAVE_HIGH_16x8, 2, 8, 4)
-+ INTERLEAVE(INTERLEAVE_LOW_32x4, 4, 0, 2)
-+ INTERLEAVE(INTERLEAVE_HIGH_32x4, 4, 8, 2)
-+ INTERLEAVE(INTERLEAVE_LOW_64x2, 8, 0, 1)
-+ INTERLEAVE(INTERLEAVE_HIGH_64x2, 8, 8, 1)
-+#undef INTERLEAVE
-+ default:
-+ break;
-+ }
-+ }
-+ // PERMUTE_8x16, SHUFFLE_BLEND_8x16, etc: control should have raw byte indices.
-+ // Force to Int8x16 type to avoid assertions from mismatched types.
-+ if (s.control.type() == SimdConstant::Int8x16) {
-+ return s.control;
-+ }
-+ // Fallback: re-create as Int8x16 from raw bytes.
-+ memcpy(bytes, s.control.bytes(), 16);
-+ return SimdConstant::CreateX16(bytes);
-+}
-+
-+#endif // ENABLE_WASM_SIMD
-+
-+void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
-+#ifdef ENABLE_WASM_SIMD
-+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
-+ SimdShuffle s = ins->shuffle();
-+ switch (s.opd) {
-+ case SimdShuffle::Operand::LEFT:
-+ case SimdShuffle::Operand::RIGHT: {
-+ // Single-operand permute: the analysis has identified that only one
-+ // input matters (the other is zero or unused).
-+ LAllocation src;
-+ if (s.opd == SimdShuffle::Operand::LEFT) {
-+ src = useRegisterAtStart(ins->lhs());
-+ } else {
-+ src = useRegisterAtStart(ins->rhs());
-+ }
-+ auto* lir =
-+ new (alloc()) LWasmPermuteSimd128(src, *s.permuteOp, s.control);
-+ define(lir, ins);
-+ break;
-+ }
-+ case SimdShuffle::Operand::BOTH:
-+ case SimdShuffle::Operand::BOTH_SWAPPED: {
-+ SimdConstant ctrl = ReconstructShuffleBytes(s);
-+ LAllocation lhs, rhs;
-+ if (s.opd == SimdShuffle::Operand::BOTH_SWAPPED) {
-+ lhs = useRegisterAtStart(ins->rhs());
-+ rhs = useRegisterAtStart(ins->lhs());
-+ } else {
-+ lhs = useRegisterAtStart(ins->lhs());
-+ rhs = useRegisterAtStart(ins->rhs());
-+ }
-+ auto* lir = new (alloc()) LWasmShuffleSimd128(
-+ lhs, rhs, *s.shuffleOp, ctrl);
-+ define(lir, ins);
-+ break;
-+ }
-+ }
-+#else
-+ MOZ_CRASH("No SIMD");
-+#endif
-+}
-+void LIRGenerator::visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128* ins) {
-+#ifdef ENABLE_WASM_SIMD
-+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
-+ if (ins->rhs()->type() == MIRType::Int64) {
-+ auto* lir = new (alloc()) LWasmReplaceInt64LaneSimd128(
-+ useRegisterAtStart(ins->lhs()), useInt64Register(ins->rhs()));
-+ defineReuseInput(lir, ins, LWasmReplaceInt64LaneSimd128::LhsIndex);
-+ } else {
-+ auto* lir = new (alloc()) LWasmReplaceLaneSimd128(
-+ useRegisterAtStart(ins->lhs()), useRegister(ins->rhs()));
-+ defineReuseInput(lir, ins, LWasmReplaceLaneSimd128::LhsIndex);
-+ }
-+#else
-+ MOZ_CRASH("No SIMD");
-+#endif
-+}
-+void LIRGenerator::visitWasmScalarToSimd128(MWasmScalarToSimd128* ins) {
-+#ifdef ENABLE_WASM_SIMD
-+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
-+ if (ins->input()->type() == MIRType::Int64) {
-+ auto* lir =
-+ new (alloc()) LWasmInt64ToSimd128(useInt64RegisterAtStart(ins->input()));
-+ define(lir, ins);
-+ } else {
-+ auto* lir =
-+ new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input()));
-+ define(lir, ins);
-+ }
-+#else
-+ MOZ_CRASH("No SIMD");
-+#endif
-+}
-+void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
-+#ifdef ENABLE_WASM_SIMD
-+ MOZ_ASSERT(ins->type() == MIRType::Simd128);
-+ auto* lir = new (alloc())
-+ LWasmUnarySimd128(useRegisterAtStart(ins->input()),
-+ LDefinition::BogusTemp());
-+ define(lir, ins);
-+#else
-+ MOZ_CRASH("No SIMD");
-+#endif
-+}
-+#ifdef ENABLE_WASM_SIMD
-+bool LIRGeneratorPPC64::canFoldReduceSimd128AndBranch(wasm::SimdOp op) {
-+ switch (op) {
-+ case wasm::SimdOp::V128AnyTrue:
-+ case wasm::SimdOp::I8x16AllTrue:
-+ case wasm::SimdOp::I16x8AllTrue:
-+ case wasm::SimdOp::I32x4AllTrue:
-+ case wasm::SimdOp::I64x2AllTrue:
-+ return true;
-+ default:
-+ return false;
-+ }
-+}
-+
-+bool LIRGeneratorPPC64::canEmitWasmReduceSimd128AtUses(
-+ MWasmReduceSimd128* ins) {
-+ if (!ins->canEmitAtUses()) {
-+ return false;
-+ }
-+ if (ins->type() != MIRType::Int32) {
-+ return false;
-+ }
-+ if (!canFoldReduceSimd128AndBranch(ins->simdOp())) {
-+ return false;
-+ }
-+ MUseIterator iter(ins->usesBegin());
-+ if (iter == ins->usesEnd()) {
-+ return true;
-+ }
-+ MNode* node = iter->consumer();
-+ if (!node->isDefinition() || !node->toDefinition()->isTest()) {
-+ return false;
-+ }
-+ iter++;
-+ return iter == ins->usesEnd();
-+}
-+#endif // ENABLE_WASM_SIMD
-+
-+void LIRGenerator::visitWasmReduceSimd128(MWasmReduceSimd128* ins) {
-+#ifdef ENABLE_WASM_SIMD
-+ if (canEmitWasmReduceSimd128AtUses(ins)) {
-+ emitAtUses(ins);
-+ return;
-+ }
-+ if (ins->type() == MIRType::Int64) {
-+ auto* lir = new (alloc())
-+ LWasmReduceSimd128ToInt64(useRegisterAtStart(ins->input()));
-+ defineInt64(lir, ins);
-+ } else {
-+ auto* lir =
-+ new (alloc()) LWasmReduceSimd128(useRegisterAtStart(ins->input()));
-+ define(lir, ins);
-+ }
-+#else
-+ MOZ_CRASH("No SIMD");
-+#endif
-+}
-+void LIRGenerator::visitWasmLoadLaneSimd128(MWasmLoadLaneSimd128* ins) {
-+#ifdef ENABLE_WASM_SIMD
-+ LUse base = useRegisterAtStart(ins->base());
-+ LUse inputUse = useRegisterAtStart(ins->value());
-+ LAllocation memoryBase =
-+ ins->hasMemoryBase() ? LAllocation(useRegisterAtStart(ins->memoryBase()))
-+ : LGeneralReg(HeapReg);
-+ auto* lir = new (alloc()) LWasmLoadLaneSimd128(base, inputUse, memoryBase);
-+ define(lir, ins);
-+#else
-+ MOZ_CRASH("No SIMD");
-+#endif
-+}
-+void LIRGenerator::visitWasmStoreLaneSimd128(MWasmStoreLaneSimd128* ins) {
-+#ifdef ENABLE_WASM_SIMD
-+ LUse base = useRegisterAtStart(ins->base());
-+ LUse input = useRegisterAtStart(ins->value());
-+ LAllocation memoryBase =
-+ ins->hasMemoryBase() ? LAllocation(useRegisterAtStart(ins->memoryBase()))
-+ : LGeneralReg(HeapReg);
-+ auto* lir = new (alloc()) LWasmStoreLaneSimd128(base, input, memoryBase);
-+ add(lir, ins);
-+#else
-+ MOZ_CRASH("No SIMD");
-+#endif
-+}
-+
-+// PPC64 specializes compare+select for {U,}Int32 / {U,}Int64 compare with
-+// Int32 / Int64 result. The CodeGen visitor
-+// (CodeGenerator-ppc64.cpp:visitWasmCompareAndSelect) emits
-+// cmpw/cmplw/cmpd/cmpld + isel = 2 insns, replacing the ~5-7 insns the
-+// generic path would emit (boolean materialization + test + isel). FP
-+// specialization is not worthwhile — the generic FP select path already
-+// runs faster than the specialized integer one and PPC64 lacks a true
-+// fcsel equivalent (fsel only compares against zero).
-+bool LIRGeneratorShared::canSpecializeWasmCompareAndSelect(
-+ MCompare::CompareType compTy, MIRType insTy) {
-+ const bool insOk = insTy == MIRType::Int32 || insTy == MIRType::Int64;
-+ const bool cmpOk = compTy == MCompare::Compare_Int32 ||
-+ compTy == MCompare::Compare_UInt32 ||
-+ compTy == MCompare::Compare_Int64 ||
-+ compTy == MCompare::Compare_UInt64;
-+ return insOk && cmpOk;
-+}
-+
-+void LIRGeneratorShared::lowerWasmCompareAndSelect(MWasmSelect* ins,
-+ MDefinition* lhs,
-+ MDefinition* rhs,
-+ MCompare::CompareType compTy,
-+ JSOp jsop) {
-+ MOZ_ASSERT(canSpecializeWasmCompareAndSelect(compTy, ins->type()));
-+ auto* lir = new (alloc()) LWasmCompareAndSelect(
-+ useRegister(lhs), useRegister(rhs), useRegisterAtStart(ins->trueExpr()),
-+ useRegister(ins->falseExpr()), compTy, jsop);
-+ defineReuseInput(lir, ins, LWasmCompareAndSelect::IfTrueExprIndex);
-+}
-+
-+// MIR helpers needed by the linker
-+#ifdef ENABLE_WASM_SIMD
-+bool MWasmTernarySimd128::specializeBitselectConstantMaskAsShuffle(
-+ int8_t shuffle[16]) {
-+ return false;
-+}
-+#endif
-+
-+bool MWasmBinarySimd128::specializeForConstantRhs() { return false; }
-+
-+#ifdef ENABLE_WASM_SIMD
-+bool MWasmTernarySimd128::canRelaxBitselect() { return false; }
-+#endif
-+
-+#ifdef ENABLE_WASM_SIMD
-+bool MWasmBinarySimd128::canPmaddubsw() { return false; }
-+#endif
-+
-+} // namespace jit
-+} // namespace js
-diff --git a/js/src/jit/ppc64/Lowering-ppc64.h b/js/src/jit/ppc64/Lowering-ppc64.h
-new file mode 100644
-index 000000000000..9c3519a7bb23
---- /dev/null
-+++ b/js/src/jit/ppc64/Lowering-ppc64.h
-@@ -0,0 +1,105 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#ifndef jit_ppc64_Lowering_ppc64_h
-+#define jit_ppc64_Lowering_ppc64_h
-+
-+#include "jit/shared/Lowering-shared.h"
-+
-+namespace js {
-+namespace jit {
-+
-+class LIRGeneratorPPC64 : public LIRGeneratorShared {
-+ protected:
-+ LIRGeneratorPPC64(MIRGenerator* gen, MIRGraph& graph, LIRGraph& lirGraph)
-+ : LIRGeneratorShared(gen, graph, lirGraph) {}
-+
-+ LTableSwitch* newLTableSwitch(const LAllocation& in,
-+ const LDefinition& inputCopy);
-+ LTableSwitchV* newLTableSwitchV(const LBoxAllocation& in);
-+
-+ void lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
-+ MDefinition* lhs, MDefinition* rhs);
-+ template <class LInstr>
-+ void lowerForShiftInt64(LInstr* ins, MDefinition* mir, MDefinition* lhs,
-+ MDefinition* rhs);
-+ void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
-+ MDefinition* input);
-+ void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
-+ MDefinition* lhs, MDefinition* rhs);
-+ void lowerForALUInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES, 0>* ins,
-+ MDefinition* mir, MDefinition* input);
-+ void lowerForALUInt64(
-+ LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
-+ MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
-+ void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs,
-+ MDefinition* rhs);
-+ void lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
-+ MDefinition* input);
-+ void lowerForFPU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
-+ MDefinition* lhs, MDefinition* rhs);
-+
-+ template <size_t Temps>
-+ void lowerForCompareI64(LInstructionHelper<1, 2 * INT64_PIECES, Temps>* lir,
-+ MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
-+
-+ LBoxAllocation useBoxFixed(MDefinition* mir, Register reg1, Register reg2,
-+ bool useAtStart = false);
-+
-+ LAllocation useByteOpRegister(MDefinition* mir);
-+ LAllocation useByteOpRegisterAtStart(MDefinition* mir);
-+ LAllocation useByteOpRegisterOrNonDoubleConstant(MDefinition* mir);
-+ LDefinition tempByteOpRegister();
-+
-+ LDefinition tempToUnbox();
-+
-+ bool needTempForPostBarrier() { return true; }
-+
-+ void lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block,
-+ size_t lirIndex);
-+ void lowerInt64PhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block,
-+ size_t lirIndex);
-+ void defineInt64Phi(MPhi* phi, size_t lirIndex);
-+
-+ void lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs);
-+ void lowerDivI(MDiv* div);
-+ void lowerDivI64(MDiv* div);
-+ void lowerModI(MMod* mod);
-+ void lowerModI64(MMod* mod);
-+ void lowerUDiv(MDiv* div);
-+ void lowerUDivI64(MDiv* div);
-+ void lowerUMod(MMod* mod);
-+ void lowerUModI64(MMod* mod);
-+ void lowerUrshD(MUrsh* mir);
-+ void lowerPowOfTwoI(MPow* mir);
-+ void lowerBigIntPtrDiv(MBigIntPtrDiv* ins);
-+ void lowerBigIntPtrMod(MBigIntPtrMod* ins);
-+ void lowerBigIntPtrLsh(MBigIntPtrLsh* ins);
-+ void lowerBigIntPtrRsh(MBigIntPtrRsh* ins);
-+ void lowerTruncateDToInt32(MTruncateToInt32* ins);
-+ void lowerTruncateFToInt32(MTruncateToInt32* ins);
-+ void lowerBuiltinInt64ToFloatingPoint(MBuiltinInt64ToFloatingPoint* ins);
-+ void lowerWasmSelectI(MWasmSelect* select);
-+ void lowerWasmSelectI64(MWasmSelect* select);
-+ void lowerWasmBuiltinTruncateToInt64(MWasmBuiltinTruncateToInt64* ins);
-+ void lowerWasmBuiltinTruncateToInt32(MWasmBuiltinTruncateToInt32* ins);
-+ void lowerWasmBuiltinDivI64(MWasmBuiltinDivI64* div);
-+ void lowerWasmBuiltinModI64(MWasmBuiltinModI64* mod);
-+ void lowerAtomicLoad64(MLoadUnboxedScalar* ins);
-+ void lowerAtomicStore64(MStoreUnboxedScalar* ins);
-+
-+#ifdef ENABLE_WASM_SIMD
-+ bool canFoldReduceSimd128AndBranch(wasm::SimdOp op);
-+ bool canEmitWasmReduceSimd128AtUses(MWasmReduceSimd128* ins);
-+#endif
-+};
-+
-+typedef LIRGeneratorPPC64 LIRGeneratorSpecific;
-+
-+} // namespace jit
-+} // namespace js
-+
-+#endif /* jit_ppc64_Lowering_ppc64_h */
-diff --git a/js/src/jit/ppc64/MacroAssembler-ppc64-inl.h b/js/src/jit/ppc64/MacroAssembler-ppc64-inl.h
-new file mode 100644
-index 000000000000..f82ca36b4e40
---- /dev/null
-+++ b/js/src/jit/ppc64/MacroAssembler-ppc64-inl.h
-@@ -0,0 +1,6142 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#ifndef jit_ppc64_MacroAssembler_ppc64_inl_h
-+#define jit_ppc64_MacroAssembler_ppc64_inl_h
-+
-+#include "jit/ppc64/MacroAssembler-ppc64.h"
-+
-+namespace js {
-+namespace jit {
-+
-+//{{{ check_macroassembler_style
-+
-+// ===============================================================
-+// Move instructions
-+
-+void MacroAssembler::move64(Register64 src, Register64 dest) {
-+ movePtr(src.reg, dest.reg);
-+}
-+
-+void MacroAssembler::move64(Imm64 imm, Register64 dest) {
-+ movePtr(ImmWord(imm.value), dest.reg);
-+}
-+
-+void MacroAssembler::moveDoubleToGPR64(FloatRegister src, Register64 dest) {
-+ as_mfvsrd(dest.reg, src);
-+}
-+
-+void MacroAssembler::moveGPR64ToDouble(Register64 src, FloatRegister dest) {
-+ as_mtvsrd(dest, src.reg);
-+}
-+
-+void MacroAssembler::moveLowDoubleToGPR(FloatRegister src, Register dest) {
-+ MOZ_CRASH("Not supported for this target");
-+}
-+
-+void MacroAssembler::move64To32(Register64 src, Register dest) {
-+ as_extsw(dest, src.reg);
-+}
-+
-+void MacroAssembler::move32To64ZeroExtend(Register src, Register64 dest) {
-+ // clrldi dest, src, 32 — clear upper 32 bits.
-+ as_rldicl(dest.reg, src, 0, 32);
-+}
-+
-+void MacroAssembler::move8To64SignExtend(Register src, Register64 dest) {
-+ as_extsb(dest.reg, src);
-+}
-+
-+void MacroAssembler::move16To64SignExtend(Register src, Register64 dest) {
-+ as_extsh(dest.reg, src);
-+}
-+
-+void MacroAssembler::move32To64SignExtend(Register src, Register64 dest) {
-+ as_extsw(dest.reg, src);
-+}
-+
-+void MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) {
-+ // FPR holds double-format value (PPC convention). Convert to
-+ // single-precision bits in bits 0:31 of the VSR, then extract.
-+ as_xscvdpspn(ScratchDoubleReg, src);
-+ as_mfvsrd(dest, ScratchDoubleReg);
-+ x_srdi(dest, dest, 32);
-+}
-+
-+void MacroAssembler::moveGPRToFloat32(Register src, FloatRegister dest) {
-+ // Place raw single-precision bits in VSR bits 0:31, then convert
-+ // to double-precision format (matching PPC's FPR convention, like lfs).
-+ if (HasPOWER9()) {
-+ // mtvsrws splats the 32-bit word to both halves of the VSR.
-+ as_mtvsrws(dest, src);
-+ } else {
-+ // POWER8: shift GPR left 32 bits to place float bits in upper word,
-+ // then move to VSR. xscvspdpn reads from bits 0:31.
-+ UseScratchRegisterScope temps(*this);
-+ Register tmp = temps.Acquire();
-+ x_sldi(tmp, src, 32);
-+ as_mtvsrd(dest, tmp);
-+ }
-+ as_xscvspdpn(dest, dest);
-+}
-+
-+void MacroAssembler::moveFloat16ToGPR(FloatRegister src, Register dest) {
-+ MOZ_ASSERT(HasPOWER9());
-+ // src has FP16 in dw0 bits 48:63; rest of dw0 is 0 (per xscvdphp /
-+ // lxsihzx / mtvsrwz contract). mfvsrd reads dw0 → dest = 0x...0000_HHHH.
-+ // Mask defensively in case a future caller hands us a non-canonical FP16.
-+ as_mfvsrd(dest, src);
-+ as_rldicl(dest, dest, 0, 48); // clrldi 48: keep low 16 bits
-+}
-+
-+void MacroAssembler::moveGPRToFloat16(Register src, FloatRegister dest) {
-+ MOZ_ASSERT(HasPOWER9());
-+ // mtvsrwz zeros dw0 word 0 and copies src's low 32 to dw0 word 1; mask
-+ // src to its low 16 first so dw0 bits 32:47 stay zero (canonical FP16).
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ as_rldicl(scratch, src, 0, 48); // clrldi 48: keep only low 16
-+ as_mtvsrwz(dest, scratch);
-+}
-+
-+void MacroAssembler::move8ZeroExtend(Register src, Register dest) {
-+ // rlwinm dest, src, 0, 24, 31 — mask to low 8 bits.
-+ as_rlwinm(dest, src, 0, 24, 31);
-+}
-+
-+void MacroAssembler::move8SignExtend(Register src, Register dest) {
-+ as_extsb(dest, src);
-+}
-+
-+void MacroAssembler::move16SignExtend(Register src, Register dest) {
-+ as_extsh(dest, src);
-+}
-+
-+void MacroAssembler::move8SignExtendToPtr(Register src, Register dest) {
-+ as_extsb(dest, src);
-+}
-+
-+void MacroAssembler::move16SignExtendToPtr(Register src, Register dest) {
-+ as_extsh(dest, src);
-+}
-+
-+void MacroAssembler::move32SignExtendToPtr(Register src, Register dest) {
-+ as_extsw(dest, src);
-+}
-+
-+void MacroAssembler::move32ZeroExtendToPtr(Register src, Register dest) {
-+ as_rldicl(dest, src, 0, 32);
-+}
-+
-+// ===============================================================
-+// Load instructions
-+
-+void MacroAssembler::load32SignExtendToPtr(const Address& src, Register dest) {
-+ load32(src, dest);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::loadAbiReturnAddress(Register dest) { xs_mflr(dest); }
-+
-+// ===============================================================
-+// Logical instructions
-+
-+void MacroAssembler::not32(Register reg) {
-+ x_not(reg, reg);
-+ as_extsw(reg, reg);
-+}
-+
-+void MacroAssembler::notPtr(Register reg) { x_not(reg, reg); }
-+
-+void MacroAssembler::andPtr(Register src, Register dest) {
-+ as_and_(dest, dest, src);
-+}
-+
-+// If `mask` is a non-zero, non-all-ones contiguous run of 1-bits in a
-+// 32-bit value (LSB-numbering), set MB/ME to the BE bit positions
-+// (PPC convention: bit 0 = MSB) needed by `rlwinm SH=0` and return true.
-+// Otherwise return false. Run-time cost is at JIT emit time only.
-+static inline bool IsContigMask32(uint32_t mask, unsigned& mb, unsigned& me) {
-+ if (mask == 0 || mask == 0xFFFFFFFFu) return false;
-+ unsigned tz = (unsigned)__builtin_ctz(mask);
-+ uint32_t shifted = mask >> tz;
-+ if ((shifted & (shifted + 1)) != 0) return false; // Has a 0 between 1s.
-+ unsigned width = 32 - (unsigned)__builtin_clz(shifted);
-+ // LSB bits set: [tz, tz+width-1]. BE bits: [32-tz-width, 31-tz].
-+ mb = 32 - tz - width;
-+ me = 31 - tz;
-+ return true;
-+}
-+
-+// 64-bit contiguous-mask classification for AND-with-imm via PPC's
-+// rotate-and-mask family (SH=0). On success, sets `lsb` (LSB-numbering
-+// of lowest set bit) and `width` (number of contiguous 1-bits).
-+// Caller picks the encoding:
-+// - lsb == 0: low `width` bits set → rldicl
-+// (mb6=64-width)
-+// - lsb + width == 64: high `width` bits set → rldicr
-+// (me6=width-1)
-+// - lsb + width <= 32: contig mask within low 32 → rlwinm (zeros high
-+// 32)
-+// - otherwise (mid-run mask straddling bit 32 with lsb>0): no SH=0 single
-+// insn fits, return false to fall back to scratch+and.
-+static inline bool IsContigMask64(uint64_t mask, unsigned& lsb,
-+ unsigned& width) {
-+ if (mask == 0 || mask == ~uint64_t(0)) return false;
-+ unsigned tz = (unsigned)__builtin_ctzll(mask);
-+ uint64_t shifted = mask >> tz;
-+ if ((shifted & (shifted + 1)) != 0) return false; // Has a 0 between 1s.
-+ width = 64 - (unsigned)__builtin_clzll(shifted);
-+ lsb = tz;
-+ return true;
-+}
-+
-+void MacroAssembler::andPtr(Imm32 imm, Register dest) {
-+ // andi. handles 16-bit unsigned immediates in 1 insn (sets CR0).
-+ // For wider positive immediates, IsContigMask32 → rlwinm (1 insn,
-+ // also sets CR0). NOTE: andPtr sign-extends Imm32 to 64-bit before
-+ // ANDing, so contig-mask is only safe when the immediate is
-+ // non-negative (high bit clear) — rlwinm always zeros the high 32.
-+ uint32_t uimm = uint32_t(imm.value);
-+ if (is_uintN(uimm, 16)) {
-+ as_andi_rc(dest, dest, uimm);
-+ return;
-+ }
-+ unsigned mb, me;
-+ if (imm.value >= 0 && IsContigMask32(uimm, mb, me)) {
-+ as_rlwinm_rc(dest, dest, 0, mb, me);
-+ return;
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(uintptr_t(intptr_t(imm.value))), scratch);
-+ as_and_(dest, dest, scratch);
-+}
-+
-+void MacroAssembler::andPtr(Imm32 imm, Register src, Register dest) {
-+ if (src != dest) {
-+ xs_mr(dest, src);
-+ }
-+ andPtr(imm, dest);
-+}
-+
-+void MacroAssembler::and64(Imm64 imm, Register64 dest) {
-+ uint64_t u = imm.value;
-+ // 16-bit unsigned → andi. (1 insn).
-+ if (u <= 0xFFFFu) {
-+ as_andi_rc(dest.reg, dest.reg, uint16_t(u));
-+ return;
-+ }
-+ unsigned lsb, width;
-+ if (IsContigMask64(u, lsb, width)) {
-+ if (lsb == 0) {
-+ // low `width` bits set: rldicl SH=0 MB=64-width.
-+ as_rldicl_rc(dest.reg, dest.reg, 0, 64 - width);
-+ return;
-+ }
-+ if (lsb + width == 64) {
-+ // high `width` bits set: rldicr SH=0 ME=width-1.
-+ as_rldicr_rc(dest.reg, dest.reg, 0, width - 1);
-+ return;
-+ }
-+ if (lsb + width <= 32) {
-+ // contig mask within low 32: rlwinm SH=0 zeros bits 0..31 too.
-+ // BE positions: mb = 32 - lsb - width, me = 31 - lsb.
-+ as_rlwinm_rc(dest.reg, dest.reg, 0, 32 - lsb - width, 31 - lsb);
-+ return;
-+ }
-+ // mid-run mask straddling bit 32 (lsb>0, lsb+width>32, lsb+width<64):
-+ // not encodable as SH=0 mask. Fall through to scratch+and.
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(u), scratch);
-+ as_and_(dest.reg, dest.reg, scratch);
-+}
-+
-+void MacroAssembler::and64(Register64 src, Register64 dest) {
-+ as_and_(dest.reg, dest.reg, src.reg);
-+}
-+
-+void MacroAssembler::and32(Register src, Register dest) {
-+ as_and_(dest, dest, src);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::and32(Imm32 imm, Register dest) {
-+ uint32_t uimm = uint32_t(imm.value);
-+ if (is_uintN(uimm, 16)) {
-+ as_andi_rc(dest, dest, uimm);
-+ } else {
-+ unsigned mb, me;
-+ if (IsContigMask32(uimm, mb, me)) {
-+ // rlwinm.SH=0 ANDs with the contiguous mask; record form sets CR0
-+ // to match the side-effect of the andi. fast path above.
-+ as_rlwinm_rc(dest, dest, 0, mb, me);
-+ } else {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ move32(imm, scratch);
-+ as_and_(dest, dest, scratch);
-+ }
-+ }
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::and32(Imm32 imm, Register src, Register dest) {
-+ if (src != dest) {
-+ xs_mr(dest, src);
-+ }
-+ and32(imm, dest);
-+}
-+
-+void MacroAssembler::and32(Imm32 imm, const Address& dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(dest, scratch);
-+ and32(imm, scratch);
-+ store32(scratch, dest);
-+}
-+
-+void MacroAssembler::and32(const Address& src, Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(src, scratch);
-+ as_and_(dest, dest, scratch);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::or64(Imm64 imm, Register64 dest) {
-+ uint64_t u = imm.value;
-+ // ori/oris zero-extend their immediates and don't touch other bits, so
-+ // when imm fits in unsigned 32 (high 32 == 0) the pair handles it in
-+ // 1-2 insns with no scratch.
-+ if (u <= 0xFFFFFFFFu) {
-+ uint16_t lo = uint16_t(u);
-+ uint16_t hi = uint16_t(u >> 16);
-+ if (hi == 0) {
-+ as_ori(dest.reg, dest.reg, lo);
-+ } else if (lo == 0) {
-+ as_oris(dest.reg, dest.reg, hi);
-+ } else {
-+ as_ori(dest.reg, dest.reg, lo);
-+ as_oris(dest.reg, dest.reg, hi);
-+ }
-+ return;
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(u), scratch);
-+ as_or_(dest.reg, dest.reg, scratch);
-+}
-+
-+void MacroAssembler::or32(Register src, Register dest) {
-+ as_or_(dest, dest, src);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::or32(Imm32 imm, Register dest) {
-+ uint32_t uimm = uint32_t(imm.value);
-+ uint16_t lo = uimm & 0xFFFF;
-+ uint16_t hi = (uimm >> 16) & 0xFFFF;
-+ if (hi == 0) {
-+ as_ori(dest, dest, lo);
-+ } else if (lo == 0) {
-+ as_oris(dest, dest, hi);
-+ } else {
-+ // ori + oris pair handles arbitrary 32-bit unsigned imm in 2 insns
-+ // without a scratch GPR. ori/oris are non-record forms (don't touch
-+ // CR0), matching the behavior of the previous scratch+or_ path
-+ // (or_ is the record form, but the value-only result is what callers
-+ // observe through dest).
-+ as_ori(dest, dest, lo);
-+ as_oris(dest, dest, hi);
-+ }
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::or32(Imm32 imm, Register src, Register dest) {
-+ if (src != dest) {
-+ xs_mr(dest, src);
-+ }
-+ or32(imm, dest);
-+}
-+
-+void MacroAssembler::or32(Imm32 imm, const Address& dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(dest, scratch);
-+ or32(imm, scratch);
-+ store32(scratch, dest);
-+}
-+
-+void MacroAssembler::xor64(Imm64 imm, Register64 dest) {
-+ uint64_t u = imm.value;
-+ // xori/xoris zero-extend their immediates; for unsigned-32-fit values
-+ // they replace the scratch+xor sequence with 1-2 insns.
-+ if (u <= 0xFFFFFFFFu) {
-+ uint16_t lo = uint16_t(u);
-+ uint16_t hi = uint16_t(u >> 16);
-+ if (hi == 0) {
-+ as_xori(dest.reg, dest.reg, lo);
-+ } else if (lo == 0) {
-+ as_xoris(dest.reg, dest.reg, hi);
-+ } else {
-+ as_xori(dest.reg, dest.reg, lo);
-+ as_xoris(dest.reg, dest.reg, hi);
-+ }
-+ return;
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(u), scratch);
-+ as_xor_(dest.reg, dest.reg, scratch);
-+}
-+
-+void MacroAssembler::orPtr(Register src, Register dest) {
-+ as_or_(dest, dest, src);
-+}
-+
-+void MacroAssembler::orPtr(Imm32 imm, Register dest) {
-+ uint32_t uimm = uint32_t(imm.value);
-+ uint16_t lo = uimm & 0xFFFF;
-+ uint16_t hi = (uimm >> 16) & 0xFFFF;
-+ // ori/oris zero-extend their immediates, so for non-negative Imm32 (high
-+ // 32 of sign-extended value = 0) we can use ori+oris to OR the full
-+ // 32-bit pattern in 1-2 insns. Negative Imm32 sign-extends to set high
-+ // bits 32..63 in the OR — those bits would be lost with ori+oris alone.
-+ if (imm.value >= 0) {
-+ if (hi == 0) {
-+ as_ori(dest, dest, lo);
-+ } else if (lo == 0) {
-+ as_oris(dest, dest, hi);
-+ } else {
-+ as_ori(dest, dest, lo);
-+ as_oris(dest, dest, hi);
-+ }
-+ return;
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(uintptr_t(intptr_t(imm.value))), scratch);
-+ as_or_(dest, dest, scratch);
-+}
-+
-+void MacroAssembler::orPtr(Imm32 imm, Register src, Register dest) {
-+ if (src != dest) {
-+ xs_mr(dest, src);
-+ }
-+ orPtr(imm, dest);
-+}
-+
-+void MacroAssembler::or64(Register64 src, Register64 dest) {
-+ as_or_(dest.reg, dest.reg, src.reg);
-+}
-+
-+void MacroAssembler::xor64(Register64 src, Register64 dest) {
-+ as_xor_(dest.reg, dest.reg, src.reg);
-+}
-+
-+void MacroAssembler::xorPtr(Register src, Register dest) {
-+ as_xor_(dest, dest, src);
-+}
-+
-+void MacroAssembler::xorPtr(Imm32 imm, Register dest) {
-+ uint32_t uimm = uint32_t(imm.value);
-+ uint16_t lo = uimm & 0xFFFF;
-+ uint16_t hi = (uimm >> 16) & 0xFFFF;
-+ if (imm.value >= 0) {
-+ if (hi == 0) {
-+ as_xori(dest, dest, lo);
-+ } else if (lo == 0) {
-+ as_xoris(dest, dest, hi);
-+ } else {
-+ as_xori(dest, dest, lo);
-+ as_xoris(dest, dest, hi);
-+ }
-+ return;
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(uintptr_t(intptr_t(imm.value))), scratch);
-+ as_xor_(dest, dest, scratch);
-+}
-+
-+void MacroAssembler::xorPtr(Imm32 imm, Register src, Register dest) {
-+ if (src != dest) {
-+ xs_mr(dest, src);
-+ }
-+ xorPtr(imm, dest);
-+}
-+
-+void MacroAssembler::xor32(Register src, Register dest) {
-+ as_xor_(dest, dest, src);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::xor32(Imm32 imm, Register dest) {
-+ uint32_t uimm = uint32_t(imm.value);
-+ uint16_t lo = uimm & 0xFFFF;
-+ uint16_t hi = (uimm >> 16) & 0xFFFF;
-+ if (hi == 0) {
-+ as_xori(dest, dest, lo);
-+ } else if (lo == 0) {
-+ as_xoris(dest, dest, hi);
-+ } else {
-+ // xori + xoris pair — 2 insns, no scratch GPR.
-+ as_xori(dest, dest, lo);
-+ as_xoris(dest, dest, hi);
-+ }
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::xor32(Imm32 imm, Register src, Register dest) {
-+ if (src != dest) {
-+ xs_mr(dest, src);
-+ }
-+ xor32(imm, dest);
-+}
-+
-+void MacroAssembler::xor32(Imm32 imm, const Address& dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(dest, scratch);
-+ xor32(imm, scratch);
-+ store32(scratch, dest);
-+}
-+
-+void MacroAssembler::xor32(const Address& src, Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(src, scratch);
-+ xor32(scratch, dest);
-+}
-+
-+// ===============================================================
-+// Swap instructions
-+
-+void MacroAssembler::byteSwap16SignExtend(Register reg) {
-+ if (HasPOWER10()) {
-+ // brh byte-reverses every halfword in reg; extsh keeps just the
-+ // low halfword's byte-reversed value, sign-extended to 64 bits.
-+ as_brh(reg, reg);
-+ as_extsh(reg, reg);
-+ return;
-+ }
-+ // POWER8/9: rotate-and-mask synthesis. Swap bytes in low halfword via
-+ // (reg<<8)&0xFF00 | (reg>>8)&0xFF, then sign-extend.
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ as_rlwinm(scratch, reg, 8, 16, 23); // scratch = (reg<<8) & 0xFF00
-+ as_rlwinm(reg, reg, 24, 24, 31); // reg = (reg>>8) & 0xFF
-+ as_or_(reg, reg, scratch);
-+ as_extsh(reg, reg);
-+}
-+
-+void MacroAssembler::byteSwap16ZeroExtend(Register reg) {
-+ if (HasPOWER10()) {
-+ // brh byte-reverses every halfword; rldicl with sh=0,mb=48 zeroes
-+ // the upper 48 bits — no CR0 side effect (vs andi.).
-+ as_brh(reg, reg);
-+ as_rldicl(reg, reg, 0, 48);
-+ return;
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ // Both rlwinm forms zero-extend the 64-bit destination per ISA v3.0B
-+ // (mask M = MASK(MB+32, ME+32) is 0 above bit 31), so after the OR the
-+ // upper 48 bits are already zero — no follow-up clearing needed.
-+ as_rlwinm(scratch, reg, 8, 16, 23);
-+ as_rlwinm(reg, reg, 24, 24, 31);
-+ as_or_(reg, reg, scratch);
-+}
-+
-+void MacroAssembler::byteSwap32(Register reg) {
-+ if (HasPOWER10()) {
-+ // brw byte-reverses both 32-bit halves; extsw drops the upper half
-+ // and sign-extends the byte-reversed low word to 64 bits.
-+ as_brw(reg, reg);
-+ as_extsw(reg, reg);
-+ return;
-+ }
-+ // POWER8/9: rotate-with-insert synthesis (4 insns).
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ // scratch = rotate reg left 8, mask bytes 0,3
-+ as_rlwinm(scratch, reg, 8, 0, 31); // rotl32 by 8
-+ as_rlwimi(scratch, reg, 24, 0, 7); // insert byte 0
-+ as_rlwimi(scratch, reg, 24, 16, 23); // insert byte 2
-+ // Sign-extend to 64 bits (as 32-bit value).
-+ as_extsw(reg, scratch);
-+}
-+
-+void MacroAssembler::byteSwap64(Register64 reg64) {
-+ if (HasPOWER10()) {
-+ // 1 insn, no FPR round-trip.
-+ as_brd(reg64.reg, reg64.reg);
-+ } else if (HasPOWER9()) {
-+ as_mtvsrd(ScratchDoubleReg, reg64.reg);
-+ as_xxbrd(ScratchDoubleReg, ScratchDoubleReg);
-+ as_mfvsrd(reg64.reg, ScratchDoubleReg);
-+ } else {
-+ // POWER8: byte-swap via stack using stwbrx (word byte-reverse store).
-+ // stwbrx RS,RA,RB stores RS byte-reversed at RA+RB.
-+ // For 64-bit swap: store high word reversed at addr+0, low word at addr+4.
-+ Register r = reg64.reg;
-+ UseScratchRegisterScope temps(*this);
-+ Register tmp = temps.Acquire();
-+ as_stdu(StackPointer, StackPointer, -16);
-+ // Store low 32 bits byte-reversed at SP+12.
-+ as_addi(tmp, StackPointer, 12);
-+ as_stwbrx(r, r0, tmp); // r0 as RA = 0, so addr = tmp
-+ // Store high 32 bits byte-reversed at SP+8.
-+ x_srdi(r, r, 32);
-+ as_addi(tmp, StackPointer, 8);
-+ as_stwbrx(r, r0, tmp); // addr = tmp
-+ // Load reversed 64-bit value from SP+8.
-+ as_ld(r, StackPointer, 8);
-+ as_addi(StackPointer, StackPointer, 16);
-+ }
-+}
-+
-+// ===============================================================
-+// Arithmetic functions
-+
-+void MacroAssembler::addPtr(Register src, Register dest) {
-+ as_add(dest, dest, src);
-+}
-+
-+void MacroAssembler::addPtr(Imm32 imm, Register dest) {
-+ int32_t val = imm.value;
-+ if (is_intN(val, 16)) {
-+ as_addi(dest, dest, val);
-+ return;
-+ }
-+ if (HasPOWER10()) {
-+ // Imm32 always fits 34-bit signed; paddi does dest = dest + imm in one
-+ // prefixed instruction with no scratch.
-+ as_paddi(dest, dest, int64_t(val), /*R=*/false);
-+ return;
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(int64_t(val)), scratch);
-+ as_add(dest, dest, scratch);
-+}
-+
-+void MacroAssembler::addPtr(ImmWord imm, Register dest) {
-+ if (is_intN(int64_t(imm.value), 16)) {
-+ as_addi(dest, dest, int16_t(imm.value));
-+ return;
-+ }
-+ if (HasPOWER10() && is_intN((intptr_t)imm.value, 34)) {
-+ as_paddi(dest, dest, (int64_t)imm.value, /*R=*/false);
-+ return;
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(imm, scratch);
-+ as_add(dest, dest, scratch);
-+}
-+
-+void MacroAssembler::add64(Register64 src, Register64 dest) {
-+ as_add(dest.reg, dest.reg, src.reg);
-+}
-+
-+void MacroAssembler::add64(Imm32 imm, Register64 dest) {
-+ addPtr(Imm32(imm.value), dest.reg);
-+}
-+
-+void MacroAssembler::add64(Imm64 imm, Register64 dest) {
-+ if (is_intN(int64_t(imm.value), 16)) {
-+ as_addi(dest.reg, dest.reg, int16_t(imm.value));
-+ return;
-+ }
-+ if (HasPOWER10() && is_intN((int64_t)imm.value, 34)) {
-+ as_paddi(dest.reg, dest.reg, (int64_t)imm.value, /*R=*/false);
-+ return;
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(dest.reg != scratch);
-+ movePtr(ImmWord(imm.value), scratch);
-+ as_add(dest.reg, dest.reg, scratch);
-+}
-+
-+void MacroAssembler::add32(Register src, Register dest) {
-+ as_add(dest, dest, src);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::add32(Imm32 imm, Register dest) {
-+ if (is_intN(imm.value, 16)) {
-+ as_addi(dest, dest, imm.value);
-+ } else {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ move32(imm, scratch);
-+ as_add(dest, dest, scratch);
-+ }
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::add32(Imm32 imm, Register src, Register dest) {
-+ move32(src, dest);
-+ add32(imm, dest);
-+}
-+
-+void MacroAssembler::add32(Imm32 imm, const Address& dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(dest, scratch);
-+ add32(imm, scratch);
-+ store32(scratch, dest);
-+}
-+
-+void MacroAssembler::add32(const Address& src, Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(src, scratch);
-+ as_add(dest, dest, scratch);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::addPtr(Imm32 imm, const Address& dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(dest, scratch);
-+ addPtr(imm, scratch);
-+ storePtr(scratch, dest);
-+}
-+
-+void MacroAssembler::addPtr(const Address& src, Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(src, scratch);
-+ addPtr(scratch, dest);
-+}
-+
-+void MacroAssembler::addDouble(FloatRegister src, FloatRegister dest) {
-+ as_fadd(dest, dest, src);
-+}
-+
-+void MacroAssembler::addFloat32(FloatRegister src, FloatRegister dest) {
-+ as_fadds(dest, dest, src);
-+}
-+
-+CodeOffset MacroAssembler::sub32FromStackPtrWithPatch(Register dest) {
-+ CodeOffset offset = CodeOffset(currentOffset());
-+ emitLoad64Stanza(dest, 0);
-+ as_subf(dest, dest, StackPointer);
-+ return offset;
-+}
-+
-+void MacroAssembler::patchSub32FromStackPtr(CodeOffset offset, Imm32 imm) {
-+ Instruction* inst = (Instruction*)editSrc(BufferOffset(offset.offset()));
-+ UpdateLoad64Value(inst, uint64_t(int64_t(imm.value)));
-+}
-+
-+void MacroAssembler::subPtr(Register src, Register dest) {
-+ as_subf(dest, src, dest);
-+}
-+
-+void MacroAssembler::subPtr(Imm32 imm, Register dest) {
-+ if (is_intN(-int64_t(imm.value), 16)) {
-+ as_addi(dest, dest, -imm.value);
-+ return;
-+ }
-+ if (HasPOWER10()) {
-+ // -Imm32 fits 34-bit signed (worst case -INT32_MIN = +2^31, well within
-+ // ±2^33). paddi with the negated immediate does the subtract in 1 prefixed
-+ // insn with no scratch.
-+ as_paddi(dest, dest, -int64_t(imm.value), /*R=*/false);
-+ return;
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(int64_t(imm.value)), scratch);
-+ as_subf(dest, scratch, dest);
-+}
-+
-+void MacroAssembler::sub64(Register64 src, Register64 dest) {
-+ as_subf(dest.reg, src.reg, dest.reg);
-+}
-+
-+void MacroAssembler::sub64(Imm64 imm, Register64 dest) {
-+ if (is_intN(-int64_t(imm.value), 16)) {
-+ as_addi(dest.reg, dest.reg, int16_t(-int64_t(imm.value)));
-+ return;
-+ }
-+ if (HasPOWER10() && is_intN(-(int64_t)imm.value, 34)) {
-+ as_paddi(dest.reg, dest.reg, -(int64_t)imm.value, /*R=*/false);
-+ return;
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(dest.reg != scratch);
-+ movePtr(ImmWord(imm.value), scratch);
-+ as_subf(dest.reg, scratch, dest.reg);
-+}
-+
-+void MacroAssembler::sub32(Register src, Register dest) {
-+ as_subf(dest, src, dest);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::sub32(Imm32 imm, Register dest) {
-+ if (is_intN(-int64_t(imm.value), 16)) {
-+ as_addi(dest, dest, -imm.value);
-+ } else {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ move32(imm, scratch);
-+ as_subf(dest, scratch, dest);
-+ }
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::sub32(const Address& src, Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(src, scratch);
-+ as_subf(dest, scratch, dest);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::subPtr(Register src, const Address& dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(dest, scratch);
-+ as_subf(scratch, src, scratch);
-+ storePtr(scratch, dest);
-+}
-+
-+void MacroAssembler::subPtr(const Address& addr, Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(addr, scratch);
-+ as_subf(dest, scratch, dest);
-+}
-+
-+void MacroAssembler::subDouble(FloatRegister src, FloatRegister dest) {
-+ as_fsub(dest, dest, src);
-+}
-+
-+void MacroAssembler::subFloat32(FloatRegister src, FloatRegister dest) {
-+ as_fsubs(dest, dest, src);
-+}
-+
-+void MacroAssembler::mul64(const Register64& rhs, const Register64& srcDest) {
-+ as_mulld(srcDest.reg, srcDest.reg, rhs.reg);
-+}
-+
-+void MacroAssembler::mul64(Imm64 imm, const Register64& dest) {
-+ if (is_intN(int64_t(imm.value), 16)) {
-+ as_mulli(dest.reg, dest.reg, int16_t(imm.value));
-+ } else {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(dest.reg != scratch);
-+ movePtr(ImmWord(imm.value), scratch);
-+ as_mulld(dest.reg, dest.reg, scratch);
-+ }
-+}
-+
-+void MacroAssembler::mul64(Imm64 imm, const Register64& dest,
-+ const Register temp) {
-+ MOZ_ASSERT(temp == Register::Invalid());
-+ mul64(imm, dest);
-+}
-+
-+void MacroAssembler::mul64(const Register64& src, const Register64& dest,
-+ const Register temp) {
-+ MOZ_ASSERT(temp == Register::Invalid());
-+ as_mulld(dest.reg, dest.reg, src.reg);
-+}
-+
-+void MacroAssembler::mulPtr(Register rhs, Register srcDest) {
-+ as_mulld(srcDest, srcDest, rhs);
-+}
-+
-+void MacroAssembler::mulPtr(ImmWord rhs, Register srcDest) {
-+ if (is_intN(int64_t(rhs.value), 16)) {
-+ as_mulli(srcDest, srcDest, int16_t(rhs.value));
-+ return;
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(srcDest != scratch);
-+ movePtr(rhs, scratch);
-+ mulPtr(scratch, srcDest);
-+}
-+
-+void MacroAssembler::mulBy3(Register src, Register dest) {
-+ // mulli is the 16-bit-immediate form of mulld. 1 insn, no scratch,
-+ // src==dest aliasing safe (RA read before RT write).
-+ as_mulli(dest, src, 3);
-+}
-+
-+void MacroAssembler::mul32(Register rhs, Register srcDest) {
-+ as_mullw(srcDest, srcDest, rhs);
-+ as_extsw(srcDest, srcDest);
-+}
-+
-+void MacroAssembler::mul32(Imm32 imm, Register srcDest) {
-+ if (is_intN(imm.value, 16)) {
-+ as_mulli(srcDest, srcDest, imm.value);
-+ } else {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ move32(imm, scratch);
-+ as_mullw(srcDest, srcDest, scratch);
-+ }
-+ as_extsw(srcDest, srcDest);
-+}
-+
-+void MacroAssembler::mulHighUnsigned32(Imm32 imm, Register src, Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(src != scratch);
-+ move32(imm, scratch);
-+ as_mulhwu(dest, src, scratch);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::mulFloat32(FloatRegister src, FloatRegister dest) {
-+ as_fmuls(dest, dest, src);
-+}
-+
-+void MacroAssembler::mulDouble(FloatRegister src, FloatRegister dest) {
-+ as_fmul(dest, dest, src);
-+}
-+
-+void MacroAssembler::mulDoublePtr(ImmPtr imm, Register temp,
-+ FloatRegister dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(imm, scratch);
-+ as_lfd(ScratchDoubleReg, scratch, 0);
-+ as_fmul(dest, dest, ScratchDoubleReg);
-+}
-+
-+void MacroAssembler::inc64(AbsoluteAddress dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register addrReg = temps.Acquire();
-+ movePtr(ImmWord(uintptr_t(dest.addr)), addrReg);
-+ Register scratch = SecondScratchReg;
-+ as_ld(scratch, addrReg, 0);
-+ as_addi(scratch, scratch, 1);
-+ as_std(scratch, addrReg, 0);
-+}
-+
-+void MacroAssembler::divFloat32(FloatRegister src, FloatRegister dest) {
-+ as_fdivs(dest, dest, src);
-+}
-+
-+void MacroAssembler::divDouble(FloatRegister src, FloatRegister dest) {
-+ as_fdiv(dest, dest, src);
-+}
-+
-+void MacroAssembler::quotient32(Register lhs, Register rhs, Register dest,
-+ bool isUnsigned) {
-+ if (isUnsigned) {
-+ as_divwu(dest, lhs, rhs);
-+ } else {
-+ as_divw(dest, lhs, rhs);
-+ }
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::quotient64(Register lhs, Register rhs, Register dest,
-+ bool isUnsigned) {
-+ if (isUnsigned) {
-+ as_divdu(dest, lhs, rhs);
-+ } else {
-+ as_divd(dest, lhs, rhs);
-+ }
-+}
-+
-+void MacroAssembler::remainder32(Register lhs, Register rhs, Register dest,
-+ bool isUnsigned) {
-+ if (HasPOWER9()) {
-+ if (isUnsigned) {
-+ as_moduw(dest, lhs, rhs);
-+ } else {
-+ as_modsw(dest, lhs, rhs);
-+ }
-+ } else {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ if (isUnsigned) {
-+ as_divwu(scratch, lhs, rhs);
-+ as_mullw(scratch, scratch, rhs);
-+ } else {
-+ as_divw(scratch, lhs, rhs);
-+ as_mullw(scratch, scratch, rhs);
-+ }
-+ as_subf(dest, scratch, lhs);
-+ }
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::remainder64(Register lhs, Register rhs, Register dest,
-+ bool isUnsigned) {
-+ if (HasPOWER9()) {
-+ if (isUnsigned) {
-+ as_modud(dest, lhs, rhs);
-+ } else {
-+ as_modsd(dest, lhs, rhs);
-+ }
-+ } else {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ if (isUnsigned) {
-+ as_divdu(scratch, lhs, rhs);
-+ as_mulld(scratch, scratch, rhs);
-+ } else {
-+ as_divd(scratch, lhs, rhs);
-+ as_mulld(scratch, scratch, rhs);
-+ }
-+ as_subf(dest, scratch, lhs);
-+ }
-+}
-+
-+void MacroAssembler::neg64(Register64 reg) { as_neg(reg.reg, reg.reg); }
-+
-+void MacroAssembler::negPtr(Register reg) { as_neg(reg, reg); }
-+
-+void MacroAssembler::neg32(Register reg) {
-+ as_neg(reg, reg);
-+ as_extsw(reg, reg);
-+}
-+
-+void MacroAssembler::negateDouble(FloatRegister reg) { as_fneg(reg, reg); }
-+
-+void MacroAssembler::negateFloat(FloatRegister reg) { as_fneg(reg, reg); }
-+
-+void MacroAssembler::abs32(Register src, Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ as_srawi(scratch, src, 31);
-+ as_xor_(dest, src, scratch);
-+ as_subf(dest, scratch, dest);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::absFloat32(FloatRegister src, FloatRegister dest) {
-+ as_fabs(dest, src);
-+}
-+
-+void MacroAssembler::absDouble(FloatRegister src, FloatRegister dest) {
-+ as_fabs(dest, src);
-+}
-+
-+void MacroAssembler::sqrtFloat32(FloatRegister src, FloatRegister dest) {
-+ as_fsqrts(dest, src);
-+}
-+
-+void MacroAssembler::sqrtDouble(FloatRegister src, FloatRegister dest) {
-+ as_fsqrt(dest, src);
-+}
-+
-+void MacroAssembler::min32(Register lhs, Register rhs, Register dest) {
-+ as_cmpw(lhs, rhs);
-+ // isel rt, ra, rb, cond: rt = (CR[cond] set) ? ra : rb
-+ // LessThan set if lhs < rhs (signed), so pick lhs; else rhs = min.
-+ as_isel(dest, lhs, rhs, LessThan, cr0);
-+}
-+
-+void MacroAssembler::min32(Register lhs, Imm32 rhs, Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ move32(rhs, scratch);
-+ min32(lhs, scratch, dest);
-+}
-+
-+void MacroAssembler::max32(Register lhs, Register rhs, Register dest) {
-+ as_cmpw(lhs, rhs);
-+ // GT set if lhs > rhs (signed), so pick lhs; else rhs = max.
-+ as_isel(dest, lhs, rhs, GreaterThan, cr0);
-+}
-+
-+void MacroAssembler::max32(Register lhs, Imm32 rhs, Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ move32(rhs, scratch);
-+ max32(lhs, scratch, dest);
-+}
-+
-+void MacroAssembler::minPtr(Register lhs, Register rhs, Register dest) {
-+ as_cmpd(lhs, rhs);
-+ as_isel(dest, lhs, rhs, LessThan, cr0);
-+}
-+
-+void MacroAssembler::minPtr(Register lhs, ImmWord rhs, Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(rhs, scratch);
-+ minPtr(lhs, scratch, dest);
-+}
-+
-+void MacroAssembler::maxPtr(Register lhs, Register rhs, Register dest) {
-+ as_cmpd(lhs, rhs);
-+ as_isel(dest, lhs, rhs, GreaterThan, cr0);
-+}
-+
-+void MacroAssembler::maxPtr(Register lhs, ImmWord rhs, Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(rhs, scratch);
-+ maxPtr(lhs, scratch, dest);
-+}
-+
-+void MacroAssembler::minFloat32(FloatRegister other, FloatRegister srcDest,
-+ bool handleNaN) {
-+ if (HasPOWER9()) {
-+ // xsminjdp matches ECMA-262 Math.min semantics for ±0 and NaN.
-+ // Float32 values are stored as doubles in PPC FPRs; the J-form
-+ // result is bit-exact for values representable in float32 (which
-+ // includes every NaN/±0/±Inf corner case JS observes). 1 insn.
-+ as_xsminjdp(srcDest, srcDest, other);
-+ return;
-+ }
-+ Label done, nan, equal;
-+ as_fcmpu(srcDest, other);
-+ if (handleNaN) {
-+ ma_b(Assembler::DoubleUnordered, &nan);
-+ }
-+ // Handle +0 vs -0.
-+ ma_b(Assembler::DoubleEqual, &equal);
-+ ma_b(Assembler::DoubleLessThan, &done);
-+ as_fmr(srcDest, other);
-+ jump(&done);
-+
-+ bind(&equal);
-+ // Both operands are equal. Check if they're zero.
-+ loadConstantFloat32(0.0f, ScratchFloat32Reg);
-+ as_fcmpu(srcDest, ScratchFloat32Reg);
-+ // If not zero, they're identical; keep srcDest.
-+ ma_b(Assembler::DoubleNotEqual, &done);
-+ // Both are some combination of +0/-0. For min, result should be -0
-+ // if either is -0: -((-srcDest) - other) gives -0 when either is -0.
-+ as_fneg(ScratchFloat32Reg, srcDest);
-+ as_fsubs(ScratchFloat32Reg, ScratchFloat32Reg, other);
-+ as_fneg(srcDest, ScratchFloat32Reg);
-+ jump(&done);
-+
-+ if (handleNaN) {
-+ bind(&nan);
-+ as_fadds(srcDest, srcDest, other);
-+ }
-+ bind(&done);
-+}
-+
-+void MacroAssembler::minDouble(FloatRegister other, FloatRegister srcDest,
-+ bool handleNaN) {
-+ if (HasPOWER9()) {
-+ // xsminjdp matches ECMA-262 Math.min semantics exactly (covers
-+ // 19 corner cases including ±0 and NaN). 1 insn vs ~12 for the
-+ // fcmpu/branch fallback. POWER8 fallback follows.
-+ as_xsminjdp(srcDest, srcDest, other);
-+ return;
-+ }
-+ Label done, nan, equal;
-+ as_fcmpu(srcDest, other);
-+ if (handleNaN) {
-+ ma_b(Assembler::DoubleUnordered, &nan);
-+ }
-+ // Handle +0 vs -0.
-+ ma_b(Assembler::DoubleEqual, &equal);
-+ ma_b(Assembler::DoubleLessThan, &done);
-+ as_fmr(srcDest, other);
-+ jump(&done);
-+
-+ bind(&equal);
-+ loadConstantDouble(0.0, ScratchDoubleReg);
-+ as_fcmpu(srcDest, ScratchDoubleReg);
-+ ma_b(Assembler::DoubleNotEqual, &done);
-+ // -((-srcDest) - other) gives -0 when either is -0.
-+ as_fneg(ScratchDoubleReg, srcDest);
-+ as_fsub(ScratchDoubleReg, ScratchDoubleReg, other);
-+ as_fneg(srcDest, ScratchDoubleReg);
-+ jump(&done);
-+
-+ if (handleNaN) {
-+ bind(&nan);
-+ as_fadd(srcDest, srcDest, other);
-+ }
-+ bind(&done);
-+}
-+
-+void MacroAssembler::maxFloat32(FloatRegister other, FloatRegister srcDest,
-+ bool handleNaN) {
-+ if (HasPOWER9()) {
-+ // See minFloat32 above for the float32 ↔ J-form bit-exactness note.
-+ as_xsmaxjdp(srcDest, srcDest, other);
-+ return;
-+ }
-+ Label done, nan, equal;
-+ as_fcmpu(srcDest, other);
-+ if (handleNaN) {
-+ ma_b(Assembler::DoubleUnordered, &nan);
-+ }
-+ // Handle +0 vs -0.
-+ ma_b(Assembler::DoubleEqual, &equal);
-+ ma_b(Assembler::DoubleGreaterThan, &done);
-+ as_fmr(srcDest, other);
-+ jump(&done);
-+
-+ bind(&equal);
-+ loadConstantFloat32(0.0f, ScratchFloat32Reg);
-+ as_fcmpu(srcDest, ScratchFloat32Reg);
-+ ma_b(Assembler::DoubleNotEqual, &done);
-+ // -0 + -0 = -0 and -0 + 0 = +0.
-+ as_fadds(srcDest, srcDest, other);
-+ jump(&done);
-+
-+ if (handleNaN) {
-+ bind(&nan);
-+ as_fadds(srcDest, srcDest, other);
-+ }
-+ bind(&done);
-+}
-+
-+void MacroAssembler::maxDouble(FloatRegister other, FloatRegister srcDest,
-+ bool handleNaN) {
-+ if (HasPOWER9()) {
-+ // See minDouble above for the J-form semantics note.
-+ as_xsmaxjdp(srcDest, srcDest, other);
-+ return;
-+ }
-+ Label done, nan, equal;
-+ as_fcmpu(srcDest, other);
-+ if (handleNaN) {
-+ ma_b(Assembler::DoubleUnordered, &nan);
-+ }
-+ // Handle +0 vs -0.
-+ ma_b(Assembler::DoubleEqual, &equal);
-+ ma_b(Assembler::DoubleGreaterThan, &done);
-+ as_fmr(srcDest, other);
-+ jump(&done);
-+
-+ bind(&equal);
-+ loadConstantDouble(0.0, ScratchDoubleReg);
-+ as_fcmpu(srcDest, ScratchDoubleReg);
-+ ma_b(Assembler::DoubleNotEqual, &done);
-+ // -0 + -0 = -0 and -0 + 0 = +0.
-+ as_fadd(srcDest, srcDest, other);
-+ jump(&done);
-+
-+ if (handleNaN) {
-+ bind(&nan);
-+ as_fadd(srcDest, srcDest, other);
-+ }
-+ bind(&done);
-+}
-+
-+// ===============================================================
-+// Shift functions
-+
-+void MacroAssembler::lshift32(Register src, Register dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register masked = temps.Acquire();
-+ as_rlwinm(masked, src, 0, 27, 31);
-+ as_slw(dest, dest, masked);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::lshift32(Imm32 imm, Register dest) {
-+ lshift32(imm, dest, dest);
-+}
-+
-+void MacroAssembler::lshift32(Imm32 imm, Register src, Register dest) {
-+ x_slwi(dest, src, imm.value & 0x1f);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::flexibleLshift32(Register src, Register dest) {
-+ lshift32(src, dest);
-+}
-+
-+void MacroAssembler::lshift64(Register shift, Register64 dest) {
-+ // PPC64 sld uses a 7-bit shift field; shifts >= 64 produce 0.
-+ // Wasm i64.shl requires shift count modulo 64, so mask to 6 bits.
-+ UseScratchRegisterScope temps(asMasm());
-+ Register masked = temps.Acquire();
-+ as_rldicl(masked, shift, 0, 58); // clrldi: keep low 6 bits
-+ as_sld(dest.reg, dest.reg, masked);
-+}
-+
-+void MacroAssembler::lshift64(Imm32 imm, Register64 dest) {
-+ MOZ_ASSERT(0 <= imm.value && imm.value < 64);
-+ x_sldi(dest.reg, dest.reg, imm.value);
-+}
-+
-+void MacroAssembler::lshiftPtr(Register shift, Register dest) {
-+ as_sld(dest, dest, shift);
-+}
-+
-+void MacroAssembler::lshiftPtr(Imm32 imm, Register dest) {
-+ lshiftPtr(imm, dest, dest);
-+}
-+
-+void MacroAssembler::lshiftPtr(Imm32 imm, Register src, Register dest) {
-+ MOZ_ASSERT(0 <= imm.value && imm.value < 64);
-+ x_sldi(dest, src, imm.value);
-+}
-+
-+void MacroAssembler::flexibleLshiftPtr(Register shift, Register srcDest) {
-+ lshiftPtr(shift, srcDest);
-+}
-+
-+void MacroAssembler::rshift32(Register src, Register dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register masked = temps.Acquire();
-+ as_rlwinm(masked, src, 0, 27, 31);
-+ as_srw(dest, dest, masked);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::rshift32(Imm32 imm, Register dest) {
-+ rshift32(imm, dest, dest);
-+}
-+
-+void MacroAssembler::rshift32(Imm32 imm, Register src, Register dest) {
-+ x_srwi(dest, src, imm.value & 0x1f);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::flexibleRshift32(Register src, Register dest) {
-+ rshift32(src, dest);
-+}
-+
-+void MacroAssembler::rshift32Arithmetic(Register src, Register dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register masked = temps.Acquire();
-+ as_rlwinm(masked, src, 0, 27, 31);
-+ as_sraw(dest, dest, masked);
-+}
-+
-+void MacroAssembler::rshift32Arithmetic(Imm32 imm, Register dest) {
-+ rshift32Arithmetic(imm, dest, dest);
-+}
-+
-+void MacroAssembler::rshift32Arithmetic(Imm32 imm, Register src,
-+ Register dest) {
-+ as_srawi(dest, src, imm.value & 0x1f);
-+}
-+
-+void MacroAssembler::flexibleRshift32Arithmetic(Register src, Register dest) {
-+ rshift32Arithmetic(src, dest);
-+}
-+
-+void MacroAssembler::rshift64(Register shift, Register64 dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register masked = temps.Acquire();
-+ as_rldicl(masked, shift, 0, 58);
-+ as_srd(dest.reg, dest.reg, masked);
-+}
-+
-+void MacroAssembler::rshift64(Imm32 imm, Register64 dest) {
-+ MOZ_ASSERT(0 <= imm.value && imm.value < 64);
-+ x_srdi(dest.reg, dest.reg, imm.value);
-+}
-+
-+void MacroAssembler::rshift64Arithmetic(Imm32 imm, Register64 dest) {
-+ MOZ_ASSERT(0 <= imm.value && imm.value < 64);
-+ as_sradi(dest.reg, dest.reg, imm.value);
-+}
-+
-+void MacroAssembler::rshift64Arithmetic(Register shift, Register64 dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register masked = temps.Acquire();
-+ as_rldicl(masked, shift, 0, 58);
-+ as_srad(dest.reg, dest.reg, masked);
-+}
-+
-+void MacroAssembler::rshiftPtr(Register shift, Register dest) {
-+ as_srd(dest, dest, shift);
-+}
-+
-+void MacroAssembler::rshiftPtr(Imm32 imm, Register dest) {
-+ rshiftPtr(imm, dest, dest);
-+}
-+
-+void MacroAssembler::rshiftPtr(Imm32 imm, Register src, Register dest) {
-+ MOZ_ASSERT(0 <= imm.value && imm.value < 64);
-+ x_srdi(dest, src, imm.value);
-+}
-+
-+void MacroAssembler::flexibleRshiftPtr(Register shift, Register srcDest) {
-+ rshiftPtr(shift, srcDest);
-+}
-+
-+void MacroAssembler::rshiftPtrArithmetic(Imm32 imm, Register dest) {
-+ rshiftPtrArithmetic(imm, dest, dest);
-+}
-+
-+void MacroAssembler::rshiftPtrArithmetic(Imm32 imm, Register src,
-+ Register dest) {
-+ MOZ_ASSERT(0 <= imm.value && imm.value < 64);
-+ as_sradi(dest, src, imm.value);
-+}
-+
-+void MacroAssembler::rshiftPtrArithmetic(Register shift, Register dest) {
-+ as_srad(dest, dest, shift);
-+}
-+
-+void MacroAssembler::flexibleRshiftPtrArithmetic(Register shift,
-+ Register srcDest) {
-+ rshiftPtrArithmetic(shift, srcDest);
-+}
-+
-+// ===============================================================
-+// Rotation functions
-+
-+void MacroAssembler::rotateLeft(Register count, Register input, Register dest) {
-+ // PPC rotlw is rlwnm with full mask: rlwnm dest, input, count, 0, 31
-+ as_rlwnm(dest, input, count, 0, 31);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::rotateLeft(Imm32 count, Register input, Register dest) {
-+ as_rlwinm(dest, input, count.value & 31, 0, 31);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::rotateLeft64(Register count, Register64 src,
-+ Register64 dest, Register temp) {
-+ MOZ_ASSERT(temp == Register::Invalid());
-+ // rldcl dest, src, count, 0 — rotate left doubleword then clear left 0 bits.
-+ as_rldcl(dest.reg, src.reg, count, 0);
-+}
-+
-+void MacroAssembler::rotateLeft64(Imm32 count, Register64 src, Register64 dest,
-+ Register temp) {
-+ MOZ_ASSERT(temp == Register::Invalid());
-+ // rldicl dest, src, count, 0 — rotate left doubleword immediate then clear.
-+ as_rldicl(dest.reg, src.reg, count.value & 63, 0);
-+}
-+
-+void MacroAssembler::rotateRight(Register count, Register input,
-+ Register dest) {
-+ // rotateRight(n) = rotateLeft(32-n). When dest != input, the negated
-+ // count can land directly in dest, dropping the GPR scratch. dest may
-+ // alias count harmlessly (subfic reads count, then writes dest, then
-+ // rlwnm consumes the new dest as its rotate-count).
-+ if (dest != input) {
-+ as_subfic(dest, count, 32);
-+ as_rlwnm(dest, input, dest, 0, 31);
-+ as_extsw(dest, dest);
-+ return;
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ as_subfic(scratch, count, 32);
-+ as_rlwnm(dest, input, scratch, 0, 31);
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::rotateRight(Imm32 count, Register input, Register dest) {
-+ rotateLeft(Imm32((32 - count.value) & 31), input, dest);
-+}
-+
-+void MacroAssembler::rotateRight64(Register count, Register64 src,
-+ Register64 dest, Register temp) {
-+ MOZ_ASSERT(temp == Register::Invalid());
-+ // Same shape as rotateRight32: when dest != src, the negated count
-+ // can land directly in dest, dropping the GPR scratch.
-+ if (dest.reg != src.reg) {
-+ as_subfic(dest.reg, count, 64);
-+ as_rldcl(dest.reg, src.reg, dest.reg, 0);
-+ return;
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ as_subfic(scratch, count, 64);
-+ as_rldcl(dest.reg, src.reg, scratch, 0);
-+}
-+
-+void MacroAssembler::rotateRight64(Imm32 count, Register64 src, Register64 dest,
-+ Register temp) {
-+ MOZ_ASSERT(temp == Register::Invalid());
-+ rotateLeft64(Imm32((64 - count.value) & 63), src, dest, temp);
-+}
-+
-+// ===============================================================
-+// Bit counting functions
-+
-+void MacroAssembler::clz64(Register64 src, Register64 dest) {
-+ as_cntlzd(dest.reg, src.reg);
-+}
-+
-+void MacroAssembler::ctz64(Register64 src, Register64 dest) {
-+ if (HasPOWER9()) {
-+ as_cnttzd(dest.reg, src.reg);
-+ } else {
-+ UseScratchRegisterScope temps(*this);
-+ Register tmp = temps.Acquire();
-+ as_neg(tmp, src.reg);
-+ // and. (record form) sets CR0[eq] based on result; result is 0 iff src==0,
-+ // so this folds the explicit zero-check that would otherwise need cmpdi.
-+ as_and__rc(tmp, src.reg, tmp); // tmp = x & -x; CR0[eq] = (src == 0)
-+ as_cntlzd(tmp, tmp); // tmp = clz(isolated bit)
-+ as_subfic(dest.reg, tmp, 63); // dest = 63 - clz = ctz (for nonzero)
-+ xs_li(tmp, 64);
-+ as_isel(dest.reg, tmp, dest.reg, Equal); // CR0[eq] → 64 if src==0
-+ }
-+}
-+
-+void MacroAssembler::popcnt64(Register64 input, Register64 output,
-+ Register tmp) {
-+ as_popcntd(output.reg, input.reg);
-+}
-+
-+void MacroAssembler::clz32(Register src, Register dest, bool knownNotZero) {
-+ as_cntlzw(dest, src);
-+}
-+
-+void MacroAssembler::ctz32(Register src, Register dest, bool knownNotZero) {
-+ if (HasPOWER9()) {
-+ as_cnttzw(dest, src);
-+ } else {
-+ UseScratchRegisterScope temps(*this);
-+ Register tmp = temps.Acquire();
-+ as_neg(tmp, src);
-+ // and. record form folds the cmpwi src,0 that would otherwise be needed
-+ // to drive the isel below: tmp == 0 iff src == 0.
-+ if (knownNotZero) {
-+ as_and_(tmp, src, tmp);
-+ } else {
-+ as_and__rc(tmp, src, tmp); // CR0[eq] = (src == 0)
-+ }
-+ as_cntlzw(tmp, tmp);
-+ as_subfic(dest, tmp, 31);
-+ if (!knownNotZero) {
-+ xs_li(tmp, 32);
-+ as_isel(dest, tmp, dest, Equal); // CR0[eq] → 32 if src==0
-+ }
-+ }
-+}
-+
-+void MacroAssembler::popcnt32(Register input, Register output, Register tmp) {
-+ as_popcntw(output, input);
-+ // popcntw gives per-word results; on 64-bit the low word count is in
-+ // bits 32:63, so just mask to 32 bits.
-+ as_rlwinm(output, output, 0, 0, 31);
-+}
-+
-+// ===============================================================
-+// Condition functions
-+
-+void MacroAssembler::cmp8Set(Condition cond, Address lhs, Imm32 rhs,
-+ Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(scratch != lhs.base);
-+ bool isUnsigned = (cond & Assembler::ConditionUnsigned) != 0;
-+ if (isUnsigned) {
-+ load8ZeroExtend(lhs, scratch);
-+ Condition c = ma_cmp(scratch, Imm32(uint8_t(rhs.value)), cond, true);
-+ ma_cmp_set(dest, c);
-+ } else {
-+ load8SignExtend(lhs, scratch);
-+ Condition c = ma_cmp(scratch, Imm32(int8_t(rhs.value)), cond, true);
-+ ma_cmp_set(dest, c);
-+ }
-+}
-+
-+void MacroAssembler::cmp16Set(Condition cond, Address lhs, Imm32 rhs,
-+ Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(scratch != lhs.base);
-+ bool isUnsigned = (cond & Assembler::ConditionUnsigned) != 0;
-+ if (isUnsigned) {
-+ load16ZeroExtend(lhs, scratch);
-+ Condition c = ma_cmp(scratch, Imm32(uint16_t(rhs.value)), cond, true);
-+ ma_cmp_set(dest, c);
-+ } else {
-+ load16SignExtend(lhs, scratch);
-+ Condition c = ma_cmp(scratch, Imm32(int16_t(rhs.value)), cond, true);
-+ ma_cmp_set(dest, c);
-+ }
-+}
-+
-+template <typename T1, typename T2>
-+void MacroAssembler::cmp32Set(Condition cond, T1 lhs, T2 rhs, Register dest) {
-+ Condition c = ma_cmp(lhs, rhs, cond, true);
-+ ma_cmp_set(dest, c);
-+}
-+
-+void MacroAssembler::cmp64Set(Condition cond, Register64 lhs, Register64 rhs,
-+ Register dest) {
-+ Condition c = ma_cmp(lhs.reg, rhs.reg, cond);
-+ ma_cmp_set(dest, c);
-+}
-+
-+void MacroAssembler::cmp64Set(Condition cond, Register64 lhs, Imm64 rhs,
-+ Register dest) {
-+ Condition c = ma_cmp(lhs.reg, ImmWord(uint64_t(rhs.value)), cond);
-+ ma_cmp_set(dest, c);
-+}
-+
-+void MacroAssembler::cmp64Set(Condition cond, Address lhs, Register64 rhs,
-+ Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(lhs, scratch);
-+ Condition c = ma_cmp(scratch, rhs.reg, cond);
-+ ma_cmp_set(dest, c);
-+}
-+
-+void MacroAssembler::cmp64Set(Condition cond, Address lhs, Imm64 rhs,
-+ Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(lhs, scratch);
-+ Condition c = ma_cmp(scratch, ImmWord(uint64_t(rhs.value)), cond);
-+ ma_cmp_set(dest, c);
-+}
-+
-+template <typename T1, typename T2>
-+void MacroAssembler::cmpPtrSet(Condition cond, T1 lhs, T2 rhs, Register dest) {
-+ Condition c = ma_cmp(lhs, rhs, cond);
-+ ma_cmp_set(dest, c);
-+}
-+
-+// ===============================================================
-+// Branch functions
-+
-+void MacroAssembler::branch8(Condition cond, const Address& lhs, Imm32 rhs,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ // Mirror ARM64/LoongArch64/RISC-V: narrow the immediate to the 8-bit
-+ // memory operand width so both sides of the compare have matching bit
-+ // patterns regardless of how move32(Imm32) materializes the imm. Use
-+ // uint8 cast for equality / unsigned, int8 cast for signed relational.
-+ bool isEqOrNe = (cond == Assembler::Equal) || (cond == Assembler::NotEqual);
-+ bool isUnsigned = (cond & Assembler::ConditionUnsigned) != 0;
-+ Imm32 narrowed(0);
-+ if (isEqOrNe || isUnsigned) {
-+ load8ZeroExtend(lhs, scratch);
-+ narrowed = Imm32(uint8_t(rhs.value));
-+ } else {
-+ load8SignExtend(lhs, scratch);
-+ narrowed = Imm32(int8_t(rhs.value));
-+ }
-+ Condition c = ma_cmp(scratch, narrowed, cond, true);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branch8(Condition cond, const BaseIndex& lhs, Register rhs,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load8ZeroExtend(lhs, scratch);
-+ Condition c = ma_cmp(scratch, rhs, cond, true);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branch16(Condition cond, const Address& lhs, Imm32 rhs,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ // See branch8: narrow the immediate to 16 bits so both sides have matching
-+ // bit patterns. uint16 for equality / unsigned, int16 for signed relational.
-+ bool isEqOrNe = (cond == Assembler::Equal) || (cond == Assembler::NotEqual);
-+ bool isUnsigned = (cond & Assembler::ConditionUnsigned) != 0;
-+ Imm32 narrowed(0);
-+ if (isEqOrNe || isUnsigned) {
-+ load16ZeroExtend(lhs, scratch);
-+ narrowed = Imm32(uint16_t(rhs.value));
-+ } else {
-+ load16SignExtend(lhs, scratch);
-+ narrowed = Imm32(int16_t(rhs.value));
-+ }
-+ Condition c = ma_cmp(scratch, narrowed, cond, true);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branch32(Condition cond, Register lhs, Register rhs,
-+ Label* label) {
-+ Condition c = ma_cmp(lhs, rhs, cond, true);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branch32(Condition cond, Register lhs, Imm32 imm,
-+ Label* label) {
-+ Condition c = ma_cmp(lhs, imm, cond, true);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branch32(Condition cond, const Address& lhs, Register rhs,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(lhs, scratch);
-+ Condition c = ma_cmp(scratch, rhs, cond, true);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branch32(Condition cond, const Address& lhs, Imm32 rhs,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(lhs, scratch);
-+ Condition c = ma_cmp(scratch, rhs, cond, true);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs,
-+ Register rhs, Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord((uintptr_t)lhs.addr), scratch);
-+ load32(Address(scratch, 0), scratch);
-+ Condition c = ma_cmp(scratch, rhs, cond, true);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs,
-+ Imm32 rhs, Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord((uintptr_t)lhs.addr), scratch);
-+ load32(Address(scratch, 0), scratch);
-+ Condition c = ma_cmp(scratch, rhs, cond, true);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branch32(Condition cond, const BaseIndex& lhs, Imm32 rhs,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(lhs, scratch);
-+ Condition c = ma_cmp(scratch, rhs, cond, true);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branch32(Condition cond, wasm::SymbolicAddress addr,
-+ Imm32 imm, Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(addr, scratch);
-+ load32(Address(scratch, 0), scratch);
-+ Condition c = ma_cmp(scratch, imm, cond, true);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branch64(Condition cond, Register64 lhs, Imm64 val,
-+ Label* success, Label* fail) {
-+ Condition c = ma_cmp(lhs.reg, ImmWord(uint64_t(val.value)), cond);
-+ if (fail) {
-+ ma_b(c, success);
-+ jump(fail);
-+ } else {
-+ ma_b(c, success);
-+ }
-+}
-+
-+void MacroAssembler::branch64(Condition cond, Register64 lhs, Register64 rhs,
-+ Label* success, Label* fail) {
-+ Condition c = ma_cmp(lhs.reg, rhs.reg, cond);
-+ if (fail) {
-+ ma_b(c, success);
-+ jump(fail);
-+ } else {
-+ ma_b(c, success);
-+ }
-+}
-+
-+void MacroAssembler::branch64(Condition cond, const Address& lhs, Imm64 val,
-+ Label* success, Label* fail) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(lhs, scratch);
-+ Condition c = ma_cmp(scratch, ImmWord(uint64_t(val.value)), cond);
-+ if (fail) {
-+ ma_b(c, success);
-+ jump(fail);
-+ } else {
-+ ma_b(c, success);
-+ }
-+}
-+
-+void MacroAssembler::branch64(Condition cond, const Address& lhs,
-+ Register64 rhs, Label* success, Label* fail) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(lhs, scratch);
-+ Condition c = ma_cmp(scratch, rhs.reg, cond);
-+ if (fail) {
-+ ma_b(c, success);
-+ jump(fail);
-+ } else {
-+ ma_b(c, success);
-+ }
-+}
-+
-+void MacroAssembler::branch64(Condition cond, const Address& lhs,
-+ const Address& rhs, Register scratch,
-+ Label* label) {
-+ loadPtr(rhs, scratch);
-+ branch64(cond, lhs, Register64(scratch), label, nullptr);
-+}
-+
-+void MacroAssembler::branchPtr(Condition cond, Register lhs, Register rhs,
-+ Label* label) {
-+ Condition c = ma_cmp(lhs, rhs, cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchPtr(Condition cond, Register lhs, Imm32 rhs,
-+ Label* label) {
-+ Condition c = ma_cmp(lhs, ImmWord(int64_t(rhs.value)), cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmPtr rhs,
-+ Label* label) {
-+ Condition c = ma_cmp(lhs, rhs, cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmGCPtr rhs,
-+ Label* label) {
-+ Condition c = ma_cmp(lhs, rhs, cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmWord rhs,
-+ Label* label) {
-+ Condition c = ma_cmp(lhs, rhs, cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, Register rhs,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(lhs, scratch);
-+ Condition c = ma_cmp(scratch, rhs, cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmPtr rhs,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(lhs, scratch);
-+ Condition c = ma_cmp(scratch, rhs, cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmGCPtr rhs,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(lhs, scratch);
-+ Condition c = ma_cmp(scratch, rhs, cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmWord rhs,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(lhs, scratch);
-+ Condition c = ma_cmp(scratch, rhs, cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchPtr(Condition cond, const AbsoluteAddress& lhs,
-+ Register rhs, Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord((uintptr_t)lhs.addr), scratch);
-+ loadPtr(Address(scratch, 0), scratch);
-+ Condition c = ma_cmp(scratch, rhs, cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchPtr(Condition cond, const AbsoluteAddress& lhs,
-+ ImmWord rhs, Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord((uintptr_t)lhs.addr), scratch);
-+ loadPtr(Address(scratch, 0), scratch);
-+ Condition c = ma_cmp(scratch, rhs, cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchPtr(Condition cond, wasm::SymbolicAddress lhs,
-+ Register rhs, Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(lhs, scratch);
-+ loadPtr(Address(scratch, 0), scratch);
-+ Condition c = ma_cmp(scratch, rhs, cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs,
-+ Register rhs, Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(lhs, scratch);
-+ Condition c = ma_cmp(scratch, rhs, cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs,
-+ ImmWord rhs, Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(lhs, scratch);
-+ Condition c = ma_cmp(scratch, rhs, cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchPrivatePtr(Condition cond, const Address& lhs,
-+ Register rhs, Label* label) {
-+ branchPtr(cond, lhs, rhs, label);
-+}
-+
-+void MacroAssembler::branchFloat(DoubleCondition cond, FloatRegister lhs,
-+ FloatRegister rhs, Label* label) {
-+ as_fcmpu(lhs, rhs);
-+ ma_b(cond, label);
-+}
-+
-+void MacroAssembler::branchTruncateFloat32MaybeModUint32(FloatRegister src,
-+ Register dest,
-+ Label* fail) {
-+ // Convert float32 to int64 (truncating toward zero), fail on NaN/overflow.
-+ as_fctidz(ScratchDoubleReg, src);
-+ as_mfvsrd(dest, ScratchDoubleReg);
-+ // PPC64 fctidz saturates to INT64_MIN on negative overflow/NaN,
-+ // and to INT64_MAX on positive overflow. Check both.
-+ asMasm().branchPtr(Assembler::Equal, dest, ImmWord(int64_t(INT64_MIN)), fail);
-+ asMasm().branchPtr(Assembler::Equal, dest, ImmWord(int64_t(INT64_MAX)), fail);
-+ // Truncate to uint32 (keep low 32 bits).
-+ as_rldicl(dest, dest, 0, 32);
-+}
-+
-+void MacroAssembler::branchTruncateFloat32ToInt32(FloatRegister src,
-+ Register dest, Label* fail) {
-+ convertFloat32ToInt32(src, dest, fail, false);
-+}
-+
-+void MacroAssembler::branchDouble(DoubleCondition cond, FloatRegister lhs,
-+ FloatRegister rhs, Label* label) {
-+ as_fcmpu(lhs, rhs);
-+ ma_b(cond, label);
-+}
-+
-+void MacroAssembler::branchTruncateDoubleMaybeModUint32(FloatRegister src,
-+ Register dest,
-+ Label* fail) {
-+ // Convert double to int64 (truncating toward zero), fail on NaN/overflow.
-+ as_fctidz(ScratchDoubleReg, src);
-+ as_mfvsrd(dest, ScratchDoubleReg);
-+ // PPC64 fctidz saturates to INT64_MIN on negative overflow/NaN,
-+ // and to INT64_MAX on positive overflow. Check both.
-+ asMasm().branchPtr(Assembler::Equal, dest, ImmWord(int64_t(INT64_MIN)), fail);
-+ asMasm().branchPtr(Assembler::Equal, dest, ImmWord(int64_t(INT64_MAX)), fail);
-+ // Truncate to uint32 (keep low 32 bits).
-+ as_rldicl(dest, dest, 0, 32);
-+}
-+
-+void MacroAssembler::branchTruncateDoubleToInt32(FloatRegister src,
-+ Register dest, Label* fail) {
-+ convertDoubleToInt32(src, dest, fail, false);
-+}
-+
-+void MacroAssembler::branchInt64NotInPtrRange(Register64 src, Label* label) {
-+ // No-op on 64-bit.
-+}
-+
-+void MacroAssembler::branchUInt64NotInPtrRange(Register64 src, Label* label) {
-+ // Branch if src >= 2^63 (sign bit set = out of signed ptr range).
-+ as_cmpdi(src.reg, 0);
-+ ma_b(Assembler::LessThan, label);
-+}
-+
-+template <typename T>
-+void MacroAssembler::branchAdd32(Condition cond, T src, Register dest,
-+ Label* overflow) {
-+ switch (cond) {
-+ case Overflow: {
-+ // Do raw 64-bit add (no sign extension) so we can detect 32-bit overflow.
-+ // Both inputs should already be sign-extended 32-bit values, so the
-+ // 64-bit result is mathematically correct. If extsw(result) != result,
-+ // the 32-bit add overflowed.
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ addPtr(src, dest);
-+ as_extsw(scratch, dest);
-+ as_cmpd(dest, scratch);
-+ as_extsw(dest, dest);
-+ ma_b(NotEqual, overflow);
-+ break;
-+ }
-+ case NonZero:
-+ case Zero:
-+ add32(src, dest);
-+ as_cmpdi(dest, 0);
-+ ma_b(cond == NonZero ? NotEqual : Equal, overflow);
-+ break;
-+ case Signed:
-+ case NotSigned:
-+ add32(src, dest);
-+ as_cmpdi(dest, 0);
-+ ma_b(cond == Signed ? LessThan : GreaterThanOrEqual, overflow);
-+ break;
-+ case CarryClear:
-+ case CarrySet: {
-+ // Unsigned 32-bit carry detection: save dest, do 32-bit add,
-+ // then unsigned-compare result with original. If result < original
-+ // (unsigned), a carry occurred.
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ move32(dest, scratch);
-+ add32(src, dest);
-+ as_cmplw(dest, scratch);
-+ ma_b(cond == CarrySet ? LessThan : GreaterThanOrEqual, overflow);
-+ break;
-+ }
-+ default:
-+ MOZ_CRASH("NYI");
-+ }
-+}
-+
-+template <typename T>
-+void MacroAssembler::branchSub32(Condition cond, T src, Register dest,
-+ Label* overflow) {
-+ switch (cond) {
-+ case Overflow: {
-+ // Do raw 64-bit sub (no sign extension) so we can detect 32-bit overflow.
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ subPtr(src, dest);
-+ as_extsw(scratch, dest);
-+ as_cmpd(dest, scratch);
-+ as_extsw(dest, dest);
-+ ma_b(NotEqual, overflow);
-+ break;
-+ }
-+ case NonZero:
-+ case Zero:
-+ sub32(src, dest);
-+ as_cmpdi(dest, 0);
-+ ma_b(cond == NonZero ? NotEqual : Equal, overflow);
-+ break;
-+ case Signed:
-+ case NotSigned:
-+ sub32(src, dest);
-+ as_cmpdi(dest, 0);
-+ ma_b(cond == Signed ? LessThan : GreaterThanOrEqual, overflow);
-+ break;
-+ default:
-+ MOZ_CRASH("NYI");
-+ }
-+}
-+
-+template <typename T>
-+void MacroAssembler::branchMul32(Condition cond, T src, Register dest,
-+ Label* overflow) {
-+ MOZ_ASSERT(cond == Overflow);
-+ // Do raw 64-bit multiply (no sign extension) so we can detect 32-bit
-+ // overflow. as_mulld gives full 64-bit low result; if extsw(result) !=
-+ // result, overflow. scratch is dead after the mulld (consumed as RB),
-+ // so the sign-extension round-trip reuses it instead of acquiring a
-+ // second scratch.
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ move32(src, scratch);
-+ as_mulld(dest, dest, scratch);
-+ as_extsw(scratch, dest);
-+ as_cmpd(dest, scratch);
-+ as_extsw(dest, dest);
-+ ma_b(NotEqual, overflow);
-+}
-+
-+template <typename T>
-+void MacroAssembler::branchRshift32(Condition cond, T src, Register dest,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Zero || cond == NonZero);
-+ rshift32(src, dest);
-+ branch32(cond == Zero ? Equal : NotEqual, dest, Imm32(0), label);
-+}
-+
-+void MacroAssembler::branchNeg32(Condition cond, Register reg, Label* label) {
-+ MOZ_ASSERT(cond == Overflow);
-+ neg32(reg);
-+ branch32(Equal, reg, Imm32(INT32_MIN), label);
-+}
-+
-+template <typename T>
-+void MacroAssembler::branchAddPtr(Condition cond, T src, Register dest,
-+ Label* label) {
-+ switch (cond) {
-+ case Overflow: {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(dest, scratch); // scratch = old_dest
-+ addPtr(src, dest); // dest = result = old_dest + src
-+ as_xor_(SecondScratchReg, dest,
-+ scratch); // SecondScratch = result ^ old_dest
-+ as_subf(scratch, scratch,
-+ dest); // scratch = result - old_dest = src_value
-+ as_xor_(scratch, scratch, dest); // scratch = src_value ^ result
-+ // (old_dest ^ result) & (src_value ^ result): bit 63 set iff overflow.
-+ // and. record form sets CR0[lt]=(bit 63 set), folding the cmpdi.
-+ as_and__rc(scratch, scratch, SecondScratchReg);
-+ ma_b(LessThan, label);
-+ break;
-+ }
-+ case NonZero:
-+ case Zero:
-+ addPtr(src, dest);
-+ as_cmpdi(dest, 0);
-+ ma_b(cond == NonZero ? NotEqual : Equal, label);
-+ break;
-+ case Signed:
-+ case NotSigned:
-+ addPtr(src, dest);
-+ as_cmpdi(dest, 0);
-+ ma_b(cond == Signed ? LessThan : GreaterThanOrEqual, label);
-+ break;
-+ case CarryClear:
-+ case CarrySet: {
-+ // Unsigned 64-bit carry detection: save dest, do 64-bit add,
-+ // then unsigned-compare result with original. If result < original
-+ // (unsigned), a carry occurred.
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(dest, scratch);
-+ addPtr(src, dest);
-+ as_cmpld(dest, scratch);
-+ ma_b(cond == CarrySet ? LessThan : GreaterThanOrEqual, label);
-+ break;
-+ }
-+ default:
-+ MOZ_CRASH("NYI");
-+ }
-+}
-+
-+template <typename T>
-+void MacroAssembler::branchSubPtr(Condition cond, T src, Register dest,
-+ Label* label) {
-+ switch (cond) {
-+ case Overflow: {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(dest, scratch); // scratch = old_dest
-+ subPtr(src, dest); // dest = result = old_dest - src
-+ // Overflow if (old_dest ^ src_value) & (old_dest ^ result) has bit 63
-+ // set.
-+ as_subf(SecondScratchReg, dest,
-+ scratch); // SecondScratch = old_dest - result = src_value
-+ as_xor_(SecondScratchReg, scratch,
-+ SecondScratchReg); // old_dest ^ src_value
-+ as_xor_(scratch, scratch, dest); // old_dest ^ result
-+ // Record-form AND sets CR0 to the signed compare of the result vs 0,
-+ // so a separate cmpdi is unnecessary; LessThan reads CR0.LT.
-+ as_and__rc(scratch, scratch, SecondScratchReg);
-+ ma_b(LessThan, label);
-+ break;
-+ }
-+ case NonZero:
-+ case Zero:
-+ subPtr(src, dest);
-+ as_cmpdi(dest, 0);
-+ ma_b(cond == NonZero ? NotEqual : Equal, label);
-+ break;
-+ case Signed:
-+ case NotSigned:
-+ subPtr(src, dest);
-+ as_cmpdi(dest, 0);
-+ ma_b(cond == Signed ? LessThan : GreaterThanOrEqual, label);
-+ break;
-+ default:
-+ MOZ_CRASH("NYI");
-+ }
-+}
-+
-+void MacroAssembler::branchMulPtr(Condition cond, Register src, Register dest,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Assembler::Overflow);
-+ as_mulldo(dest, dest, src);
-+ ma_b(Overflow, label);
-+}
-+
-+void MacroAssembler::branchNegPtr(Condition cond, Register reg, Label* label) {
-+ MOZ_ASSERT(cond == Overflow);
-+ negPtr(reg);
-+ branchPtr(Assembler::Equal, reg, ImmWord(intptr_t(INTPTR_MIN)), label);
-+}
-+
-+void MacroAssembler::decBranchPtr(Condition cond, Register lhs, Imm32 rhs,
-+ Label* label) {
-+ subPtr(rhs, lhs);
-+ branchPtr(cond, lhs, Imm32(0), label);
-+}
-+
-+void MacroAssembler::branchTest32(Condition cond, Register lhs, Register rhs,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
-+ cond == NotSigned);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ if (lhs != rhs) {
-+ as_and_(scratch, lhs, rhs);
-+ as_extsw_rc(scratch, scratch); // CR0 set on sign-extended i32; folds cmpdi
-+ } else {
-+ as_extsw_rc(scratch, lhs);
-+ }
-+ Condition base = static_cast<Condition>(cond & ~Assembler::ConditionZero);
-+ ma_b(base, label);
-+}
-+
-+void MacroAssembler::branchTest32(Condition cond, Register lhs, Imm32 rhs,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
-+ cond == NotSigned);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ if (is_uintN(rhs.value, 16)) {
-+ as_andi_rc(scratch, lhs, rhs.value);
-+ // andi_rc sets CR0 on the masked value, but only the low 16 bits matter
-+ // since rhs is a 16-bit unsigned mask — sign of the i32 result is always
-+ // 0, so CR0[lt] is always 0. For Signed/NotSigned conditions the answer
-+ // is fixed; for Zero/NonZero CR0[eq] is correct.
-+ } else {
-+ move32(rhs, scratch);
-+ as_and_(scratch, lhs, scratch);
-+ as_extsw_rc(scratch, scratch); // CR0 set on sign-extended i32; folds cmpdi
-+ }
-+ Condition base = static_cast<Condition>(cond & ~Assembler::ConditionZero);
-+ ma_b(base, label);
-+}
-+
-+void MacroAssembler::branchTest32(Condition cond, const Address& lhs, Imm32 rhs,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
-+ cond == NotSigned);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(lhs, scratch);
-+ // and32 picks up the rlwinm contig-mask fast path for non-16-bit-fit
-+ // immediates that are a contiguous run of 1-bits (common: tag masks,
-+ // header bit-fields). It also emits the trailing extsw.
-+ and32(rhs, scratch);
-+ as_cmpdi(scratch, 0);
-+ Condition base = static_cast<Condition>(cond & ~Assembler::ConditionZero);
-+ ma_b(base, label);
-+}
-+
-+void MacroAssembler::branchTest32(Condition cond, const AbsoluteAddress& lhs,
-+ Imm32 rhs, Label* label) {
-+ MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
-+ cond == NotSigned);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord((uintptr_t)lhs.addr), scratch);
-+ load32(Address(scratch, 0), scratch);
-+ and32(rhs, scratch);
-+ as_cmpdi(scratch, 0);
-+ Condition base = static_cast<Condition>(cond & ~Assembler::ConditionZero);
-+ ma_b(base, label);
-+}
-+
-+void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Register rhs,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
-+ cond == NotSigned);
-+ if (lhs == rhs) {
-+ as_cmpdi(lhs, 0);
-+ } else {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ // Record-form AND sets CR0; no follow-up cmpdi needed.
-+ as_and__rc(scratch, lhs, rhs);
-+ }
-+ Condition base = static_cast<Condition>(cond & ~Assembler::ConditionZero);
-+ ma_b(base, label);
-+}
-+
-+void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Imm32 rhs,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
-+ cond == NotSigned);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ if (is_uintN(rhs.value, 16)) {
-+ as_andi_rc(scratch, lhs, rhs.value);
-+ } else {
-+ move32(rhs, scratch);
-+ as_and__rc(scratch, lhs, scratch); // record form folds the cmpdi
-+ }
-+ Condition base = static_cast<Condition>(cond & ~Assembler::ConditionZero);
-+ ma_b(base, label);
-+}
-+
-+void MacroAssembler::branchTestPtr(Condition cond, Register lhs, ImmWord rhs,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
-+ cond == NotSigned);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(rhs, scratch);
-+ as_and__rc(scratch, lhs, scratch);
-+ Condition base = static_cast<Condition>(cond & ~Assembler::ConditionZero);
-+ ma_b(base, label);
-+}
-+
-+void MacroAssembler::branchTestPtr(Condition cond, const Address& lhs,
-+ Imm32 rhs, Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(lhs, scratch);
-+ // andPtr picks up the rlwinm contig-mask fast path for non-16-bit-fit
-+ // immediates that are a contiguous run of 1-bits.
-+ andPtr(rhs, scratch);
-+ as_cmpdi(scratch, 0);
-+ Condition base = static_cast<Condition>(cond & ~Assembler::ConditionZero);
-+ ma_b(base, label);
-+}
-+
-+void MacroAssembler::branchTest64(Condition cond, Register64 lhs,
-+ Register64 rhs, Register temp, Label* success,
-+ Label* fail) {
-+ MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
-+ cond == NotSigned);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ as_and__rc(scratch, lhs.reg, rhs.reg);
-+ Condition base = static_cast<Condition>(cond & ~Assembler::ConditionZero);
-+ if (fail) {
-+ ma_b(base, success);
-+ jump(fail);
-+ } else {
-+ ma_b(base, success);
-+ }
-+}
-+
-+void MacroAssembler::branchTest64(Condition cond, Register64 lhs, Imm64 rhs,
-+ Label* success, Label* fail) {
-+ MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
-+ cond == NotSigned);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(uint64_t(rhs.value)), scratch);
-+ as_and__rc(scratch, lhs.reg, scratch);
-+ Condition base = static_cast<Condition>(cond & ~Assembler::ConditionZero);
-+ if (fail) {
-+ ma_b(base, success);
-+ jump(fail);
-+ } else {
-+ ma_b(base, success);
-+ }
-+}
-+
-+// ===============================================================
-+// Value-type branch functions
-+
-+void MacroAssembler::branchTestUndefined(Condition cond, Register tag,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ Condition c = ma_cmp(tag, ImmTag(JSVAL_TAG_UNDEFINED), cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchTestUndefined(Condition cond,
-+ const ValueOperand& value,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ splitTag(value, scratch);
-+ branchTestTag(cond, scratch, ImmTag(JSVAL_TAG_UNDEFINED), label);
-+}
-+
-+void MacroAssembler::branchTestUndefined(Condition cond, const Address& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_UNDEFINED), label);
-+}
-+
-+void MacroAssembler::branchTestUndefined(Condition cond,
-+ const BaseIndex& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_UNDEFINED), label);
-+}
-+
-+void MacroAssembler::branchTestInt32(Condition cond, Register tag,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ Condition c = ma_cmp(tag, ImmTag(JSVAL_TAG_INT32), cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchTestInt32(Condition cond, const ValueOperand& value,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ splitTag(value, scratch);
-+ branchTestTag(cond, scratch, ImmTag(JSVAL_TAG_INT32), label);
-+}
-+
-+void MacroAssembler::branchTestInt32(Condition cond, const Address& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_INT32), label);
-+}
-+
-+void MacroAssembler::branchTestInt32(Condition cond, const BaseIndex& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_INT32), label);
-+}
-+
-+void MacroAssembler::branchTestInt32Truthy(bool b, const ValueOperand& value,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ unboxInt32(value, scratch);
-+ as_cmpwi(scratch, 0);
-+ ma_b(b ? NotEqual : Equal, label);
-+}
-+
-+void MacroAssembler::branchTestDouble(Condition cond, Register tag,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ Condition actual = (cond == Equal) ? BelowOrEqual : Above;
-+ Condition c = ma_cmp(tag, ImmTag(JSVAL_TAG_MAX_DOUBLE), actual);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchTestDouble(Condition cond, const ValueOperand& value,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ splitTag(value, scratch);
-+ branchTestDouble(cond, scratch, label);
-+}
-+
-+void MacroAssembler::branchTestDouble(Condition cond, const Address& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestDouble(cond, tag, label);
-+}
-+
-+void MacroAssembler::branchTestDouble(Condition cond, const BaseIndex& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestDouble(cond, tag, label);
-+}
-+
-+void MacroAssembler::branchTestDoubleTruthy(bool b, FloatRegister value,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ xs_li(scratch, 0);
-+ as_mtvsrd(ScratchDoubleReg, scratch);
-+ as_fcmpu(value, ScratchDoubleReg);
-+ DoubleCondition cond = b ? DoubleNotEqual : DoubleEqualOrUnordered;
-+ ma_b(cond, label);
-+}
-+
-+void MacroAssembler::branchTestNumber(Condition cond, Register tag,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ Condition actual = (cond == Equal) ? BelowOrEqual : Above;
-+ Condition c = ma_cmp(tag, Imm32(JS::detail::ValueUpperInclNumberTag), actual);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchTestNumber(Condition cond, const ValueOperand& value,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ splitTag(value, scratch);
-+ branchTestNumber(cond, scratch, label);
-+}
-+
-+void MacroAssembler::branchTestBoolean(Condition cond, Register tag,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ Condition c = ma_cmp(tag, ImmTag(JSVAL_TAG_BOOLEAN), cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchTestBoolean(Condition cond,
-+ const ValueOperand& value,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ splitTag(value, scratch);
-+ branchTestTag(cond, scratch, ImmTag(JSVAL_TAG_BOOLEAN), label);
-+}
-+
-+void MacroAssembler::branchTestBoolean(Condition cond, const Address& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_BOOLEAN), label);
-+}
-+
-+void MacroAssembler::branchTestBoolean(Condition cond, const BaseIndex& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_BOOLEAN), label);
-+}
-+
-+void MacroAssembler::branchTestBooleanTruthy(bool b, const ValueOperand& value,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ unboxBoolean(value, scratch);
-+ as_cmpwi(scratch, 0);
-+ ma_b(b ? NotEqual : Equal, label);
-+}
-+
-+void MacroAssembler::branchTestString(Condition cond, Register tag,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ Condition c = ma_cmp(tag, ImmTag(JSVAL_TAG_STRING), cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchTestString(Condition cond, const ValueOperand& value,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ splitTag(value, scratch);
-+ branchTestTag(cond, scratch, ImmTag(JSVAL_TAG_STRING), label);
-+}
-+
-+void MacroAssembler::branchTestString(Condition cond, const Address& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_STRING), label);
-+}
-+
-+void MacroAssembler::branchTestString(Condition cond, const BaseIndex& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_STRING), label);
-+}
-+
-+void MacroAssembler::branchTestStringTruthy(bool b, const ValueOperand& value,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ unboxString(value, scratch);
-+ load32(Address(scratch, JSString::offsetOfLength()), scratch);
-+ as_cmpwi(scratch, 0);
-+ ma_b(b ? NotEqual : Equal, label);
-+}
-+
-+void MacroAssembler::branchTestSymbol(Condition cond, Register tag,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ Condition c = ma_cmp(tag, ImmTag(JSVAL_TAG_SYMBOL), cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchTestSymbol(Condition cond, const ValueOperand& value,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ splitTag(value, scratch);
-+ branchTestTag(cond, scratch, ImmTag(JSVAL_TAG_SYMBOL), label);
-+}
-+
-+void MacroAssembler::branchTestSymbol(Condition cond, const BaseIndex& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_SYMBOL), label);
-+}
-+
-+void MacroAssembler::branchTestSymbol(Condition cond, const Address& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_SYMBOL), label);
-+}
-+
-+void MacroAssembler::branchTestBigInt(Condition cond, Register tag,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ Condition c = ma_cmp(tag, ImmTag(JSVAL_TAG_BIGINT), cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchTestBigInt(Condition cond, const ValueOperand& value,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ splitTag(value, scratch);
-+ branchTestTag(cond, scratch, ImmTag(JSVAL_TAG_BIGINT), label);
-+}
-+
-+void MacroAssembler::branchTestBigInt(Condition cond, const Address& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_BIGINT), label);
-+}
-+
-+void MacroAssembler::branchTestBigInt(Condition cond, const BaseIndex& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_BIGINT), label);
-+}
-+
-+void MacroAssembler::branchTestBigIntTruthy(bool b, const ValueOperand& value,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ unboxBigInt(value, scratch);
-+ load32(Address(scratch, BigInt::offsetOfDigitLength()), scratch);
-+ as_cmpwi(scratch, 0);
-+ ma_b(b ? NotEqual : Equal, label);
-+}
-+
-+void MacroAssembler::branchTestNull(Condition cond, Register tag,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ Condition c = ma_cmp(tag, ImmTag(JSVAL_TAG_NULL), cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchTestNull(Condition cond, const ValueOperand& value,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ splitTag(value, scratch);
-+ branchTestTag(cond, scratch, ImmTag(JSVAL_TAG_NULL), label);
-+}
-+
-+void MacroAssembler::branchTestNull(Condition cond, const Address& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_NULL), label);
-+}
-+
-+void MacroAssembler::branchTestNull(Condition cond, const BaseIndex& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_NULL), label);
-+}
-+
-+void MacroAssembler::branchTestObject(Condition cond, Register tag,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ Condition c = ma_cmp(tag, ImmTag(JSVAL_TAG_OBJECT), cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchTestObject(Condition cond, const ValueOperand& value,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ splitTag(value, scratch);
-+ branchTestTag(cond, scratch, ImmTag(JSVAL_TAG_OBJECT), label);
-+}
-+
-+void MacroAssembler::branchTestObject(Condition cond, const Address& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_OBJECT), label);
-+}
-+
-+void MacroAssembler::branchTestObject(Condition cond, const BaseIndex& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_OBJECT), label);
-+}
-+
-+void MacroAssembler::branchTestPrimitive(Condition cond,
-+ const ValueOperand& value,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ splitTag(value, scratch);
-+ branchTestPrimitive(cond, scratch, label);
-+}
-+
-+void MacroAssembler::branchTestGCThing(Condition cond, const Address& address,
-+ Label* label) {
-+ branchTestGCThingImpl(cond, address, label);
-+}
-+
-+void MacroAssembler::branchTestGCThing(Condition cond, const BaseIndex& address,
-+ Label* label) {
-+ branchTestGCThingImpl(cond, address, label);
-+}
-+
-+void MacroAssembler::branchTestGCThing(Condition cond,
-+ const ValueOperand& address,
-+ Label* label) {
-+ branchTestGCThingImpl(cond, address, label);
-+}
-+
-+template <typename T>
-+void MacroAssembler::branchTestGCThingImpl(Condition cond, const T& address,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ Condition actual = (cond == Equal) ? AboveOrEqual : Below;
-+ Condition c =
-+ ma_cmp(tag, Imm32(JS::detail::ValueLowerInclGCThingTag), actual);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchTestPrimitive(Condition cond, Register tag,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ Condition actual = (cond == Equal) ? Below : AboveOrEqual;
-+ Condition c =
-+ ma_cmp(tag, Imm32(JS::detail::ValueUpperExclPrimitiveTag), actual);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchTestMagic(Condition cond, Register tag,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ Condition c = ma_cmp(tag, ImmTag(JSVAL_TAG_MAGIC), cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchTestMagic(Condition cond, const Address& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_MAGIC), label);
-+}
-+
-+void MacroAssembler::branchTestMagic(Condition cond, const BaseIndex& address,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(address, scratch);
-+ branchTestTag(cond, tag, ImmTag(JSVAL_TAG_MAGIC), label);
-+}
-+
-+void MacroAssembler::branchTestMagic(Condition cond, const ValueOperand& value,
-+ Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ splitTag(value, scratch);
-+ branchTestTag(cond, scratch, ImmTag(JSVAL_TAG_MAGIC), label);
-+}
-+
-+void MacroAssembler::branchTestMagic(Condition cond, const Address& valaddr,
-+ JSWhyMagic why, Label* label) {
-+ uint64_t magic = MagicValue(why).asRawBits();
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(valaddr, scratch);
-+ Condition c = ma_cmp(scratch, ImmWord(magic), cond);
-+ ma_b(c, label);
-+}
-+
-+void MacroAssembler::branchTestMagic(Condition cond, const BaseIndex& valaddr,
-+ JSWhyMagic why, Label* label) {
-+ uint64_t magic = MagicValue(why).asRawBits();
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(valaddr, scratch);
-+ Condition c = ma_cmp(scratch, ImmWord(magic), cond);
-+ ma_b(c, label);
-+}
-+
-+template <typename T>
-+void MacroAssembler::branchTestValue(Condition cond, const T& lhs,
-+ const ValueOperand& rhs, Label* label) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(lhs, scratch);
-+ Condition c = ma_cmp(scratch, rhs.valueReg(), cond);
-+ ma_b(c, label);
-+}
-+
-+// ===============================================================
-+// Test-set functions
-+
-+template <typename T>
-+void MacroAssembler::testNumberSet(Condition cond, const T& src,
-+ Register dest) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(src, scratch);
-+ Condition actual = (cond == Equal) ? BelowOrEqual : Above;
-+ Condition c = ma_cmp(tag, Imm32(JS::detail::ValueUpperInclNumberTag), actual);
-+ ma_cmp_set(dest, c);
-+}
-+
-+template <typename T>
-+void MacroAssembler::testBooleanSet(Condition cond, const T& src,
-+ Register dest) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(src, scratch);
-+ Condition c = ma_cmp(tag, ImmTag(JSVAL_TAG_BOOLEAN), cond);
-+ ma_cmp_set(dest, c);
-+}
-+
-+template <typename T>
-+void MacroAssembler::testStringSet(Condition cond, const T& src,
-+ Register dest) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(src, scratch);
-+ Condition c = ma_cmp(tag, ImmTag(JSVAL_TAG_STRING), cond);
-+ ma_cmp_set(dest, c);
-+}
-+
-+template <typename T>
-+void MacroAssembler::testSymbolSet(Condition cond, const T& src,
-+ Register dest) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(src, scratch);
-+ Condition c = ma_cmp(tag, ImmTag(JSVAL_TAG_SYMBOL), cond);
-+ ma_cmp_set(dest, c);
-+}
-+
-+template <typename T>
-+void MacroAssembler::testBigIntSet(Condition cond, const T& src,
-+ Register dest) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ Register tag = extractTag(src, scratch);
-+ Condition c = ma_cmp(tag, ImmTag(JSVAL_TAG_BIGINT), cond);
-+ ma_cmp_set(dest, c);
-+}
-+
-+// ===============================================================
-+// Computed address / conditional move / conditional load
-+
-+void MacroAssembler::branchToComputedAddress(const BaseIndex& addr) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(addr, scratch);
-+ branch(scratch);
-+}
-+
-+void MacroAssembler::cmp32Move32(Condition cond, Register lhs, Imm32 rhs,
-+ Register src, Register dest) {
-+ Condition c = ma_cmp(lhs, rhs, cond, true);
-+ ma_cmp_move(dest, src, c);
-+}
-+
-+void MacroAssembler::cmp32Move32(Condition cond, Register lhs, Register rhs,
-+ Register src, Register dest) {
-+ Condition c = ma_cmp(lhs, rhs, cond, true);
-+ ma_cmp_move(dest, src, c);
-+}
-+
-+void MacroAssembler::cmp32Move32(Condition cond, Register lhs,
-+ const Address& rhs, Register src,
-+ Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(rhs, scratch);
-+ Condition c = ma_cmp(lhs, scratch, cond, true);
-+ ma_cmp_move(dest, src, c);
-+}
-+
-+void MacroAssembler::cmp32MovePtr(Condition cond, Register lhs, Imm32 rhs,
-+ Register src, Register dest) {
-+ Condition c = ma_cmp(lhs, rhs, cond, true);
-+ ma_cmp_move(dest, src, c);
-+}
-+
-+void MacroAssembler::cmpPtrMovePtr(Condition cond, Register lhs, Imm32 rhs,
-+ Register src, Register dest) {
-+ Condition c = ma_cmp(lhs, ImmWord(int64_t(rhs.value)), cond);
-+ ma_cmp_move(dest, src, c);
-+}
-+
-+void MacroAssembler::cmpPtrMovePtr(Condition cond, Register lhs, Register rhs,
-+ Register src, Register dest) {
-+ Condition c = ma_cmp(lhs, rhs, cond);
-+ ma_cmp_move(dest, src, c);
-+}
-+
-+void MacroAssembler::cmpPtrMovePtr(Condition cond, Register lhs,
-+ const Address& rhs, Register src,
-+ Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(rhs, scratch);
-+ Condition c = ma_cmp(lhs, scratch, cond);
-+ ma_cmp_move(dest, src, c);
-+}
-+
-+void MacroAssembler::cmp32Load32(Condition cond, Register lhs,
-+ const Address& rhs, const Address& src,
-+ Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(rhs, scratch);
-+ Condition c = ma_cmp(lhs, scratch, cond, true);
-+ // Conditional load: load into scratch, then isel.
-+ load32(src, scratch);
-+ ma_cmp_move(dest, scratch, c);
-+}
-+
-+void MacroAssembler::cmp32Load32(Condition cond, Register lhs, Register rhs,
-+ const Address& src, Register dest) {
-+ Condition c = ma_cmp(lhs, rhs, cond, true);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(src, scratch);
-+ ma_cmp_move(dest, scratch, c);
-+}
-+
-+void MacroAssembler::cmp32Load32(Condition cond, Register lhs, Imm32 rhs,
-+ const Address& src, Register dest) {
-+ Condition c = ma_cmp(lhs, rhs, cond, true);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(src, scratch);
-+ ma_cmp_move(dest, scratch, c);
-+}
-+
-+void MacroAssembler::cmp32LoadPtr(Condition cond, const Address& lhs, Imm32 rhs,
-+ const Address& src, Register dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(lhs, scratch);
-+ Condition c = ma_cmp(scratch, rhs, cond, true);
-+ loadPtr(src, scratch);
-+ ma_cmp_move(dest, scratch, c);
-+}
-+
-+void MacroAssembler::test32LoadPtr(Condition cond, const Address& addr,
-+ Imm32 mask, const Address& src,
-+ Register dest) {
-+ MOZ_ASSERT(cond == Zero || cond == NonZero);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(addr, scratch);
-+ if (is_uintN(mask.value, 16)) {
-+ as_andi_rc(scratch, scratch, mask.value);
-+ } else {
-+ // Use a nested scope so scratch2 is released before loadPtr below.
-+ UseScratchRegisterScope temps2(asMasm());
-+ Register scratch2 = temps2.Acquire();
-+ move32(mask, scratch2);
-+ as_and__rc(scratch, scratch, scratch2); // record form folds the cmpdi
-+ }
-+ Condition base = static_cast<Condition>(cond & ~Assembler::ConditionZero);
-+ loadPtr(src, scratch);
-+ ma_cmp_move(dest, scratch, base);
-+}
-+
-+void MacroAssembler::test32MovePtr(Condition cond, Register operand, Imm32 mask,
-+ Register src, Register dest) {
-+ MOZ_ASSERT(cond == Zero || cond == NonZero);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ if (is_uintN(mask.value, 16)) {
-+ as_andi_rc(scratch, operand, mask.value);
-+ } else {
-+ move32(mask, scratch);
-+ as_and__rc(scratch, operand, scratch); // record form folds the cmpdi
-+ }
-+ Condition base = static_cast<Condition>(cond & ~Assembler::ConditionZero);
-+ ma_cmp_move(dest, src, base);
-+}
-+
-+void MacroAssembler::test32MovePtr(Condition cond, const Address& addr,
-+ Imm32 mask, Register src, Register dest) {
-+ MOZ_ASSERT(cond == Zero || cond == NonZero);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(addr, scratch);
-+ and32(mask, scratch);
-+ as_cmpdi(scratch, 0);
-+ Condition base = static_cast<Condition>(cond & ~Assembler::ConditionZero);
-+ ma_cmp_move(dest, src, base);
-+}
-+
-+// ===============================================================
-+// Spectre mitigations
-+
-+void MacroAssembler::spectreMovePtr(Condition cond, Register src,
-+ Register dest) {
-+ // Assumes compare already issued.
-+ Condition base = static_cast<Condition>(
-+ cond & ~(Assembler::ConditionUnsigned | Assembler::ConditionZero));
-+ ma_cmp_move(dest, src, base);
-+}
-+
-+void MacroAssembler::spectreZeroRegister(Condition cond, Register scratch,
-+ Register dest) {
-+ // Assumes compare already issued. Zero dest if condition is true.
-+ Condition origBase = static_cast<Condition>(
-+ cond & ~(Assembler::ConditionUnsigned | Assembler::ConditionZero));
-+ // If original condition is true, we want dest=0.
-+ // isel: if condition true, select zero; else keep dest.
-+ xs_li(scratch, 0);
-+ ma_cmp_move(dest, scratch, origBase);
-+}
-+
-+void MacroAssembler::spectreBoundsCheck32(Register index, Register length,
-+ Register maybeScratch,
-+ Label* failure) {
-+ Condition c = ma_cmp(index, length, Below, true);
-+ if (failure) {
-+ ma_b(InvertCondition(c), failure);
-+ }
-+ if (maybeScratch != InvalidReg) {
-+ xs_li(maybeScratch, 0);
-+ ma_cmp_move(index, maybeScratch, InvertCondition(c));
-+ }
-+}
-+
-+void MacroAssembler::spectreBoundsCheck32(Register index, const Address& length,
-+ Register maybeScratch,
-+ Label* failure) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(length, scratch);
-+ spectreBoundsCheck32(index, scratch, maybeScratch, failure);
-+}
-+
-+void MacroAssembler::spectreBoundsCheckPtr(Register index, Register length,
-+ Register maybeScratch,
-+ Label* failure) {
-+ Condition c = ma_cmp(index, length, Below);
-+ if (failure) {
-+ ma_b(InvertCondition(c), failure);
-+ }
-+ if (maybeScratch != InvalidReg) {
-+ xs_li(maybeScratch, 0);
-+ ma_cmp_move(index, maybeScratch, InvertCondition(c));
-+ }
-+}
-+
-+void MacroAssembler::spectreBoundsCheckPtr(Register index,
-+ const Address& length,
-+ Register maybeScratch,
-+ Label* failure) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(length, scratch);
-+ spectreBoundsCheckPtr(index, scratch, maybeScratch, failure);
-+}
-+
-+// ===============================================================
-+// Memory access primitives
-+
-+FaultingCodeOffset MacroAssembler::storeFloat32(FloatRegister src,
-+ const Address& addr) {
-+ MOZ_ASSERT(addr.base != r0);
-+ if (is_intN(addr.offset, 16)) {
-+ return FaultingCodeOffset(as_stfs(src, addr.base, addr.offset).getOffset());
-+ }
-+ if (HasPOWER10() && is_intN((intptr_t)addr.offset, 34)) {
-+ return FaultingCodeOffset(
-+ as_pstfs(src, addr.base, (int64_t)addr.offset, /*R=*/false)
-+ .getOffset());
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(addr.offset), scratch);
-+ return FaultingCodeOffset(as_stfsx(src, addr.base, scratch).getOffset());
-+}
-+
-+FaultingCodeOffset MacroAssembler::storeFloat32(FloatRegister src,
-+ const BaseIndex& addr) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ computeEffectiveAddress(addr, scratch);
-+ return FaultingCodeOffset(as_stfs(src, scratch, 0).getOffset());
-+}
-+
-+FaultingCodeOffset MacroAssembler::storeDouble(FloatRegister src,
-+ const Address& addr) {
-+ MOZ_ASSERT(addr.base != r0);
-+ if (is_intN(addr.offset, 16)) {
-+ return FaultingCodeOffset(as_stfd(src, addr.base, addr.offset).getOffset());
-+ }
-+ if (HasPOWER10() && is_intN((intptr_t)addr.offset, 34)) {
-+ return FaultingCodeOffset(
-+ as_pstfd(src, addr.base, (int64_t)addr.offset, /*R=*/false)
-+ .getOffset());
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(addr.offset), scratch);
-+ return FaultingCodeOffset(as_stfdx(src, addr.base, scratch).getOffset());
-+}
-+
-+FaultingCodeOffset MacroAssembler::storeDouble(FloatRegister src,
-+ const BaseIndex& addr) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ computeEffectiveAddress(addr, scratch);
-+ return FaultingCodeOffset(as_stfd(src, scratch, 0).getOffset());
-+}
-+
-+FaultingCodeOffset MacroAssembler::storeFloat16(FloatRegister src,
-+ const Address& dest,
-+ Register temp) {
-+ MOZ_ASSERT(HasPOWER9());
-+ if (dest.offset == 0) {
-+ return FaultingCodeOffset(as_stxsihx(src, r0, dest.base).getOffset());
-+ }
-+ if (is_intN(dest.offset, 16)) {
-+ as_addi(temp, dest.base, dest.offset);
-+ return FaultingCodeOffset(as_stxsihx(src, r0, temp).getOffset());
-+ }
-+ movePtr(ImmWord(dest.offset), temp);
-+ return FaultingCodeOffset(as_stxsihx(src, dest.base, temp).getOffset());
-+}
-+
-+FaultingCodeOffset MacroAssembler::storeFloat16(FloatRegister src,
-+ const BaseIndex& dest,
-+ Register temp) {
-+ MOZ_ASSERT(HasPOWER9());
-+ computeEffectiveAddress(dest, temp);
-+ return FaultingCodeOffset(as_stxsihx(src, r0, temp).getOffset());
-+}
-+
-+void MacroAssembler::memoryBarrier(MemoryBarrier barrier) {
-+ if (barrier.isNone()) {
-+ return;
-+ }
-+ if (barrier.hasStoreLoad() || barrier.hasSync()) {
-+ as_sync();
-+ } else {
-+ as_lwsync();
-+ }
-+}
-+
-+// ===============================================================
-+// Clamping functions
-+
-+void MacroAssembler::clampIntToUint8(Register reg) {
-+ // Clamp to [0, 255].
-+ Label done;
-+ as_cmpwi(reg, 255);
-+ ma_b(LessThanOrEqual, &done);
-+ move32(Imm32(255), reg);
-+ bind(&done);
-+ Label positive;
-+ as_cmpwi(reg, 0);
-+ ma_b(GreaterThanOrEqual, &positive);
-+ move32(Imm32(0), reg);
-+ bind(&positive);
-+}
-+
-+// ===============================================================
-+// Unboxing
-+
-+void MacroAssembler::fallibleUnboxPtr(const ValueOperand& src, Register dest,
-+ JSValueType type, Label* fail) {
-+ MOZ_ASSERT(type == JSVAL_TYPE_OBJECT || type == JSVAL_TYPE_STRING ||
-+ type == JSVAL_TYPE_SYMBOL || type == JSVAL_TYPE_BIGINT);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ splitTag(src, scratch);
-+ Condition c = ma_cmp(scratch, ImmTag(JSVAL_TYPE_TO_TAG(type)), NotEqual);
-+ ma_b(c, fail);
-+ unboxNonDouble(src, dest, type);
-+}
-+
-+void MacroAssembler::fallibleUnboxPtr(const Address& src, Register dest,
-+ JSValueType type, Label* fail) {
-+ loadValue(src, ValueOperand(dest));
-+ fallibleUnboxPtr(ValueOperand(dest), dest, type, fail);
-+}
-+
-+void MacroAssembler::fallibleUnboxPtr(const BaseIndex& src, Register dest,
-+ JSValueType type, Label* fail) {
-+ loadValue(src, ValueOperand(dest));
-+ fallibleUnboxPtr(ValueOperand(dest), dest, type, fail);
-+}
-+
-+void MacroAssembler::wasmAddSubI128HI64(Register lhsLo, Register lhsHi,
-+ Register rhsLo, Register rhsHi,
-+ Register output, bool isAdd) {
-+ MOZ_RELEASE_ASSERT(output != lhsLo && output != lhsHi && output != rhsLo &&
-+ output != rhsHi);
-+ if (isAdd) {
-+ // addc sets CA (carry), adde uses it.
-+ as_addc(output, lhsLo, rhsLo); // output = lhsLo + rhsLo, CA = carry
-+ as_adde(output, lhsHi, rhsHi); // output = lhsHi + rhsHi + CA
-+ } else {
-+ // subfc: rd = rb - ra, sets CA (borrow complement).
-+ // subfe: rd = rb + ~ra + CA.
-+ as_subfc(output, rhsLo, lhsLo); // output = lhsLo - rhsLo, CA = ~borrow
-+ as_subfe(output, rhsHi, lhsHi); // output = lhsHi - rhsHi - borrow
-+ }
-+}
-+
-+void MacroAssembler::wasmMulI64WideHI64(Register lhs, Register rhs,
-+ Register output, bool isSigned) {
-+ if (isSigned) {
-+ as_mulhd(output, lhs, rhs);
-+ } else {
-+ as_mulhdu(output, lhs, rhs);
-+ }
-+}
-+
-+//}}} check_macroassembler_style
-+
-+void MacroAssemblerPPC64Compat::incrementInt32Value(const Address& addr) {
-+ asMasm().add32(Imm32(1), addr);
-+}
-+
-+void MacroAssemblerPPC64Compat::retn(Imm32 n) {
-+ // Load return address from [SP,0] first, then adjust SP, then return.
-+ // Must load RA before adjusting SP (like loong64), since the RA is at
-+ // the current top of stack, not at SP+n.
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ as_ld(scratch, StackPointer, 0);
-+ if (n.value != 0) {
-+ asMasm().addPtr(Imm32(n.value), StackPointer);
-+ }
-+ xs_mtlr(scratch);
-+ as_blr();
-+}
-+
-+// ===============================================================
-+// Template specializations (outside check_macroassembler_style)
-+
-+template <>
-+inline void MacroAssembler::cmpPtrSet(Assembler::Condition cond, Address lhs,
-+ ImmPtr rhs, Register dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ loadPtr(lhs, scratch);
-+ Assembler::Condition c = ma_cmp(scratch, rhs, cond);
-+ ma_cmp_set(dest, c);
-+}
-+
-+template <>
-+inline void MacroAssembler::cmpPtrSet(Assembler::Condition cond, Register lhs,
-+ Address rhs, Register dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ loadPtr(rhs, scratch);
-+ Assembler::Condition c = ma_cmp(lhs, scratch, cond);
-+ ma_cmp_set(dest, c);
-+}
-+
-+template <>
-+inline void MacroAssembler::cmpPtrSet(Assembler::Condition cond, Address lhs,
-+ Register rhs, Register dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ loadPtr(lhs, scratch);
-+ Assembler::Condition c = ma_cmp(scratch, rhs, cond);
-+ ma_cmp_set(dest, c);
-+}
-+
-+template <>
-+inline void MacroAssembler::cmp32Set(Assembler::Condition cond, Register lhs,
-+ Address rhs, Register dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ load32(rhs, scratch);
-+ Assembler::Condition c = ma_cmp(lhs, scratch, cond, true);
-+ ma_cmp_set(dest, c);
-+}
-+
-+template <>
-+inline void MacroAssembler::cmp32Set(Assembler::Condition cond, Address lhs,
-+ Register rhs, Register dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ load32(lhs, scratch);
-+ Assembler::Condition c = ma_cmp(scratch, rhs, cond, true);
-+ ma_cmp_set(dest, c);
-+}
-+
-+template <>
-+inline void MacroAssembler::cmp32Set(Assembler::Condition cond, Address lhs,
-+ Imm32 rhs, Register dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ load32(lhs, scratch);
-+ Assembler::Condition c = ma_cmp(scratch, rhs, cond, true);
-+ ma_cmp_set(dest, c);
-+}
-+
-+//{{{ check_macroassembler_style
-+// ===============================================================
-+// SIMD load/store (128-bit)
-+
-+FaultingCodeOffset MacroAssembler::loadUnalignedSimd128(const Address& src,
-+ FloatRegister dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ if (HasPOWER10() && is_intN((intptr_t)src.offset, 34)) {
-+ // POWER10 prefixed load — natural-LE byte order, no GPR scratch.
-+ return FaultingCodeOffset(
-+ as_plxv(dest.encoding(), src.base, (int64_t)src.offset, /*R=*/false)
-+ .getOffset());
-+ }
-+ if (HasPOWER9()) {
-+ // POWER9: lxvx (X-form, indexed) loads 128 bits in correct LE order.
-+ Register scratch = temps.Acquire();
-+ if (src.offset == 0) {
-+ // RA=0 means "use 0 as base" in indexed forms, so use r0 encoding.
-+ return FaultingCodeOffset(as_lxvx(dest, r0, src.base).getOffset());
-+ }
-+ movePtr(ImmWord(src.offset), scratch);
-+ return FaultingCodeOffset(as_lxvx(dest, src.base, scratch).getOffset());
-+ }
-+ // POWER8: lxvd2x loads with doubleword swap on LE. Fix with xxpermdi.
-+ Register scratch = temps.Acquire();
-+ FaultingCodeOffset fco;
-+ if (src.offset == 0) {
-+ fco = FaultingCodeOffset(as_lxvd2x(dest, r0, src.base).getOffset());
-+ } else {
-+ movePtr(ImmWord(src.offset), scratch);
-+ fco = FaultingCodeOffset(as_lxvd2x(dest, src.base, scratch).getOffset());
-+ }
-+ as_xxpermdi(dest, dest, dest, 2);
-+ return fco;
-+}
-+
-+FaultingCodeOffset MacroAssembler::loadUnalignedSimd128(const BaseIndex& src,
-+ FloatRegister dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ computeScaledAddress(src, scratch);
-+ if (src.offset != 0) {
-+ // addPtr picks up POWER10 paddi (1 prefixed insn) when available;
-+ // falls back to movePtr+add on P9/P8. Drops the explicit scratch2.
-+ addPtr(ImmWord(src.offset), scratch);
-+ }
-+ if (HasPOWER9()) {
-+ return FaultingCodeOffset(as_lxvx(dest, r0, scratch).getOffset());
-+ }
-+ FaultingCodeOffset fco(as_lxvd2x(dest, r0, scratch).getOffset());
-+ as_xxpermdi(dest, dest, dest, 2);
-+ return fco;
-+}
-+
-+FaultingCodeOffset MacroAssembler::storeUnalignedSimd128(FloatRegister src,
-+ const Address& dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ if (HasPOWER10() && is_intN((intptr_t)dest.offset, 34)) {
-+ // POWER10 prefixed store — natural-LE byte order, no GPR scratch.
-+ return FaultingCodeOffset(
-+ as_pstxv(src.encoding(), dest.base, (int64_t)dest.offset, /*R=*/false)
-+ .getOffset());
-+ }
-+ if (HasPOWER9()) {
-+ Register scratch = temps.Acquire();
-+ if (dest.offset == 0) {
-+ return FaultingCodeOffset(as_stxvx(src, r0, dest.base).getOffset());
-+ }
-+ movePtr(ImmWord(dest.offset), scratch);
-+ return FaultingCodeOffset(as_stxvx(src, dest.base, scratch).getOffset());
-+ }
-+ // POWER8: stxvd2x stores with doubleword swap on LE.
-+ // Swap before store, then swap back to restore the register.
-+ ScratchSimd128Scope scratch128(*this);
-+ as_xxpermdi(scratch128, src, src, 2);
-+ Register scratch = temps.Acquire();
-+ FaultingCodeOffset fco;
-+ if (dest.offset == 0) {
-+ fco = FaultingCodeOffset(as_stxvd2x(scratch128, r0, dest.base).getOffset());
-+ } else {
-+ movePtr(ImmWord(dest.offset), scratch);
-+ fco = FaultingCodeOffset(
-+ as_stxvd2x(scratch128, dest.base, scratch).getOffset());
-+ }
-+ return fco;
-+}
-+
-+FaultingCodeOffset MacroAssembler::storeUnalignedSimd128(
-+ FloatRegister src, const BaseIndex& dest) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ computeScaledAddress(dest, scratch);
-+ if (dest.offset != 0) {
-+ addPtr(ImmWord(dest.offset), scratch);
-+ }
-+ if (HasPOWER9()) {
-+ return FaultingCodeOffset(as_stxvx(src, r0, scratch).getOffset());
-+ }
-+ ScratchSimd128Scope scratch128(*this);
-+ as_xxpermdi(scratch128, src, src, 2);
-+ return FaultingCodeOffset(as_stxvd2x(scratch128, r0, scratch).getOffset());
-+}
-+
-+// ===============================================================
-+// SIMD operations
-+//
-+// Scratch register conventions for SIMD helpers (read this before writing
-+// a new one):
-+//
-+// 1. `ScratchSimd128Scope scratch(*this)` — acquires v0 (= VR0 = VSR32,
-+// non-allocatable). Constructed as {FloatRegisters::f0, Simd128} so
-+// encoding() = 0 + 32 = 32 (per Architecture-ppc64.h). Default temp.
-+// One scope at a time per helper. Safe to pass to any VMX/VSX
-+// instruction; the allocator never places a live v128 in v0.
-+//
-+// 2. **Do NOT** write to VR1..VR31 (= VSR33..VSR63) without a Lowering
-+// temp. VR1..VR31 are allocatable; a live wasm v128 may be sitting in
-+// any of them. Use `ScratchSimd128Scope` (rule 1) or a Lowering temp.
-+//
-+// 3. **Red-zone stash** — use `RedZoneStashSimd128` / `RedZoneRestoreSimd128`
-+// (declared just below) when a helper genuinely needs >1 SIMD scratch
-+// AND adding a Lowering temp would require LIR + MIR + CodeGen changes.
-+// ELFv2 reserves 288 bytes below SP; we use at most 32 (two 16-byte
-+// slots). Live users: `extAddPairwiseInt*` (2 slots), `swizzleInt8x16`
-+// (1 slot), `dotInt8x16Int7x16ThenAdd` 4-arg (1 slot). If you find
-+// yourself wanting a 3rd slot or nested save/restore, prefer a Lowering
-+// temp instead — the red-zone approach is tolerable because it's
-+// self-contained to a single helper. The `MOZ_ASSERT(slot < 2)` inside
-+// the helpers enforces this at test time.
-+//
-+// Simd128 lives in VR-namespace (VSR32-63), so VMX ops address Simd128
-+// FloatRegisters directly with no staging. Encoding is 32-63; the VMX
-+// VR field is 5-bit (0-31), so we mask with `& 31`.
-+
-+// Two 16-byte Simd128 slots available in the ELFv2 red zone for short-lived
-+// SIMD spills (see point 3 of the SIMD conventions preamble above).
-+static constexpr int kRedZoneSimd128MaxSlots = 2;
-+
-+static inline void RedZoneStashSimd128(MacroAssembler& masm, FloatRegister src,
-+ int slot) {
-+ MOZ_ASSERT(slot >= 0 && slot < kRedZoneSimd128MaxSlots);
-+ masm.storeUnalignedSimd128(src, Address(StackPointer, -16 * (slot + 1)));
-+}
-+
-+static inline void RedZoneRestoreSimd128(MacroAssembler& masm, int slot,
-+ FloatRegister dest) {
-+ MOZ_ASSERT(slot >= 0 && slot < kRedZoneSimd128MaxSlots);
-+ masm.loadUnalignedSimd128(Address(StackPointer, -16 * (slot + 1)), dest);
-+}
-+
-+typedef void (*VmxBinaryFn)(Assembler&, uint8_t, uint8_t, uint8_t);
-+
-+static void EmitVmxBinary(MacroAssembler& masm, VmxBinaryFn vmxOp,
-+ FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ vmxOp(static_cast<Assembler&>(masm), dest.encoding() & 31,
-+ lhs.encoding() & 31, rhs.encoding() & 31);
-+}
-+
-+// Macro for defining VMX binary wrappers.
-+#define VMX_BINARY_WRAPPER(vmxInst) \
-+ [](Assembler& a, uint8_t vrt, uint8_t vra, uint8_t vrb) { \
-+ a.as_##vmxInst(vrt, vra, vrb); \
-+ }
-+
-+// Emit op directly on Simd128 dest, then xxlnor in place.
-+template <typename VmxBinaryFnT>
-+static void EmitVmxBinaryNot(MacroAssembler& masm, VmxBinaryFnT vmxOp,
-+ FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ vmxOp(static_cast<Assembler&>(masm), dest.encoding() & 31,
-+ lhs.encoding() & 31, rhs.encoding() & 31);
-+ masm.as_xxlnor(dest, dest, dest);
-+}
-+
-+// Integer SIMD compare helper. VMX compare instructions produce all-ones
-+// for true, all-zeros for false per element.
-+// Available VMX compares: vcmpequ* (eq), vcmpgts* (signed gt), vcmpgtu*
-+// (unsigned gt). Other conditions derived by swapping operands or
-+// complementing.
-+template <typename EqFn, typename GtsFn, typename GtuFn>
-+static void EmitVmxCompare(MacroAssembler& masm, Assembler::Condition cond,
-+ FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest, EqFn eqFn, GtsFn gtsFn,
-+ GtuFn gtuFn) {
-+ switch (cond) {
-+ case Assembler::Equal:
-+ EmitVmxBinary(masm, eqFn, lhs, rhs, dest);
-+ break;
-+ case Assembler::NotEqual:
-+ EmitVmxBinaryNot(masm, eqFn, lhs, rhs, dest);
-+ break;
-+ case Assembler::GreaterThan:
-+ EmitVmxBinary(masm, gtsFn, lhs, rhs, dest);
-+ break;
-+ case Assembler::GreaterThanOrEqual:
-+ // !(rhs > lhs)
-+ EmitVmxBinaryNot(masm, gtsFn, rhs, lhs, dest);
-+ break;
-+ case Assembler::LessThan:
-+ // rhs > lhs (swap)
-+ EmitVmxBinary(masm, gtsFn, rhs, lhs, dest);
-+ break;
-+ case Assembler::LessThanOrEqual:
-+ // !(lhs > rhs)
-+ EmitVmxBinaryNot(masm, gtsFn, lhs, rhs, dest);
-+ break;
-+ case Assembler::Above:
-+ EmitVmxBinary(masm, gtuFn, lhs, rhs, dest);
-+ break;
-+ case Assembler::AboveOrEqual:
-+ EmitVmxBinaryNot(masm, gtuFn, rhs, lhs, dest);
-+ break;
-+ case Assembler::Below:
-+ EmitVmxBinary(masm, gtuFn, rhs, lhs, dest);
-+ break;
-+ case Assembler::BelowOrEqual:
-+ EmitVmxBinaryNot(masm, gtuFn, lhs, rhs, dest);
-+ break;
-+ default:
-+ MOZ_CRASH("Unexpected SIMD integer condition");
-+ }
-+}
-+
-+// Emit ternary VMX op directly on Simd128 regs, no staging.
-+typedef void (*VmxTernaryFn)(Assembler&, uint8_t, uint8_t, uint8_t, uint8_t);
-+
-+static void EmitVmxTernary(MacroAssembler& masm, VmxTernaryFn vmxOp,
-+ FloatRegister a, FloatRegister b, FloatRegister c,
-+ FloatRegister dest) {
-+ vmxOp(static_cast<Assembler&>(masm), dest.encoding() & 31, a.encoding() & 31,
-+ b.encoding() & 31, c.encoding() & 31);
-+}
-+
-+// Emit unary VMX op directly on Simd128 regs, no staging.
-+typedef void (*VmxUnaryFn)(Assembler&, uint8_t, uint8_t);
-+
-+static void EmitVmxUnary(MacroAssembler& masm, VmxUnaryFn vmxOp,
-+ FloatRegister src, FloatRegister dest) {
-+ vmxOp(static_cast<Assembler&>(masm), dest.encoding() & 31,
-+ src.encoding() & 31);
-+}
-+
-+// Helper: create a zero SIMD register using xxlxor.
-+static void ZeroSimd128(MacroAssembler& masm, FloatRegister dest) {
-+ masm.as_xxlxor(dest, dest, dest);
-+}
-+
-+void MacroAssembler::moveSimd128(FloatRegister src, FloatRegister dest) {
-+ if (src != dest) {
-+ as_xxlor(dest, src, src);
-+ }
-+}
-+
-+void MacroAssembler::loadConstantSimd128(const SimdConstant& v,
-+ FloatRegister dest) {
-+ // Load 128-bit constant from inline constant pool.
-+ // Clobbers SecondScratchReg (r12).
-+ loadFromPoolSimd128(dest, v);
-+}
-+
-+// PPC64 LE lane mapping:
-+// Wasm lane K = memory byte K = register byte (15-K).
-+// mfvsrd extracts register bits[0:63] = BE dword 0 = Wasm lanes 8-15 (bytes).
-+// For VMX byte ops, BE byte index = 15 - wasm_lane.
-+// For VMX halfword ops, BE halfword index = 7 - wasm_halfword.
-+// For VSX word ops (xxspltw), BE word index = 3 - wasm_word.
-+// For doubleword ops, BE dword index = 1 - wasm_dword.
-+
-+void MacroAssembler::splatX16(Register src, FloatRegister dest) {
-+ // mtvsrd writes src into BE 0..63 of dest (low byte at BE byte 7);
-+ // vspltb then splats that byte over all 16 lanes. dest aliases as
-+ // both source and destination — vspltb tolerates this. No extra
-+ // scratch register required, so callers that already hold a
-+ // ScratchSimd128Scope (extAddPairwise*, var-shift narrow forms) do
-+ // not see a nested-acquire collision.
-+ as_mtvsrd(dest, src);
-+ as_vspltb(dest, dest, 7);
-+}
-+
-+void MacroAssembler::splatX8(Register src, FloatRegister dest) {
-+ // Same shape as splatX16 with halfword granularity. mtvsrd places
-+ // the low 16 bits at BE halfword 3 (= BE bytes 6..7); vsplth picks
-+ // it up and splats across 8 lanes. vsplth reads only the chosen
-+ // halfword, so negative i32 inputs do not need a 16-bit pre-mask
-+ // (which the previous GPR-replicate path required).
-+ as_mtvsrd(dest, src);
-+ as_vsplth(dest, dest, 3);
-+}
-+
-+void MacroAssembler::splatX4(Register src, FloatRegister dest) {
-+ if (HasPOWER9()) {
-+ as_mtvsrws(dest, src);
-+ } else {
-+ as_mtvsrd(dest, src);
-+ as_xxspltw(dest, dest, 1);
-+ }
-+}
-+
-+void MacroAssembler::splatX4(FloatRegister src, FloatRegister dest) {
-+ // src is a double-precision FPR holding a float value (the JIT keeps
-+ // FP32 in DP-equivalent form on PPC64). Convert DP→SP into BE word 0
-+ // (xscvdpspn lays the single at bits[0:31] / BE word 0), then splat
-+ // word 0 to all four lanes.
-+ as_xscvdpspn(dest, src);
-+ as_xxspltw(dest, dest, 0);
-+}
-+
-+void MacroAssembler::splatX2(FloatRegister src, FloatRegister dest) {
-+ // Splat scalar double to both doubleword lanes.
-+ // Scalar value is in register bits[0:63] (BE dword 0).
-+ // xxpermdi dm=0: dest = [src.dw0, src.dw0]
-+ as_xxpermdi(dest, src, src, 0);
-+}
-+
-+// Helpers: splat Imm32 into SIMD register at various element widths.
-+// VMX shift instructions read the shift count from EACH element independently,
-+// so the count must be replicated to every byte/halfword/word as appropriate.
-+//
-+// Fast path for small constants: vspltis{b,h,w} (POWER7+) splats a 5-bit
-+// signed immediate to all lanes in 1 insn with no pool entry. For values
-+// outside [-16, 15] we fall back to the inline-pool path.
-+static void SplatImm8(MacroAssembler& masm, Imm32 imm, FloatRegister dest) {
-+ int8_t val = (int8_t)imm.value;
-+ if (val >= -16 && val <= 15) {
-+ masm.as_vspltisb(dest.encoding() & 31, val);
-+ return;
-+ }
-+ if (HasPOWER9()) {
-+ // P9 xxspltib handles the full 8-bit range in 1 insn.
-+ masm.as_xxspltib(dest, (uint8_t)val);
-+ return;
-+ }
-+ int8_t bytes[16];
-+ for (int i = 0; i < 16; i++) bytes[i] = val;
-+ masm.loadConstantSimd128(SimdConstant::CreateX16(bytes), dest);
-+}
-+
-+static void SplatImm16(MacroAssembler& masm, Imm32 imm, FloatRegister dest) {
-+ int16_t val = (int16_t)imm.value;
-+ if (val >= -16 && val <= 15) {
-+ masm.as_vspltish(dest.encoding() & 31, (int8_t)val);
-+ return;
-+ }
-+ int16_t halfs[8];
-+ for (int i = 0; i < 8; i++) halfs[i] = val;
-+ masm.loadConstantSimd128(SimdConstant::CreateX8(halfs), dest);
-+}
-+
-+static void SplatImm32(MacroAssembler& masm, Imm32 imm, FloatRegister dest) {
-+ int32_t val = imm.value;
-+ if (val >= -16 && val <= 15) {
-+ masm.as_vspltisw(dest.encoding() & 31, (int8_t)val);
-+ return;
-+ }
-+ int32_t words[4] = {val, val, val, val};
-+ masm.loadConstantSimd128(SimdConstant::CreateX4(words), dest);
-+}
-+
-+// ===============================================================
-+// Extract lane
-+
-+static void ExtractLaneToGPR(MacroAssembler& masm, uint32_t lane,
-+ FloatRegister src, Register dest,
-+ unsigned laneWidthBytes, unsigned laneWidthBits) {
-+ // Extract Wasm lane from vector register to GPR.
-+ // Wasm lane K → register byte offset (15 - K*laneWidthBytes).
-+ //
-+ // Strategy: use mfvsrd to get one 64-bit half of the register, then shift
-+ // and mask to isolate the lane.
-+ //
-+ // mfvsrd gets register bits[0:63] (BE dword 0) = Wasm lanes in the high
-+ // half of the register (high-numbered lanes in LE memory order).
-+ // For an N-bit lane at Wasm index L:
-+ // If L is in the high dword (L >= 8/laneWidthBytes):
-+ // use mfvsrd; lane is at GPR bit offset laneWidthBits*(L -
-+ // 8/laneWidthBytes) from LSB
-+ // Else (L in low dword):
-+ // swap dwords, then mfvsrd; lane is at GPR bit offset laneWidthBits*L
-+ // from LSB
-+
-+ unsigned lanesPerDword = 8 / laneWidthBytes;
-+
-+ if (lane >= lanesPerDword) {
-+ masm.as_mfvsrd(dest, src);
-+ unsigned shift = laneWidthBits * (lane - lanesPerDword);
-+ if (shift) {
-+ masm.x_srdi(dest, dest, shift);
-+ }
-+ } else {
-+ if (HasPOWER9()) {
-+ masm.as_mfvsrld(dest, src);
-+ } else {
-+ // POWER8: swap dwords to get dw1 into scalar position.
-+ // Avoid ScratchSimd128Scope — callers may already hold it.
-+ // Use xxpermdi directly on ScratchSimd128Reg (v0/VSR32, non-allocatable).
-+ masm.as_xxpermdi(ScratchSimd128Reg, src, src, 2);
-+ masm.as_mfvsrd(dest, ScratchSimd128Reg);
-+ }
-+ unsigned shift = laneWidthBits * lane;
-+ if (shift) {
-+ masm.x_srdi(dest, dest, shift);
-+ }
-+ }
-+}
-+
-+void MacroAssembler::unsignedExtractLaneInt8x16(uint32_t lane,
-+ FloatRegister src,
-+ Register dest) {
-+ MOZ_ASSERT(lane < 16);
-+ if (HasPOWER9()) {
-+ // vextractub puts VRB.BE_byte[UIM] at VRT.BE_byte[7] with the rest
-+ // zeroed; mfvsrd then reads BE bytes 0..7 → low byte of dest, high
-+ // bytes already 0. No mask needed.
-+ as_vextractub(ScratchSimd128Reg, src, 15 - lane);
-+ as_mfvsrd(dest, ScratchSimd128Reg);
-+ return;
-+ }
-+ ExtractLaneToGPR(*this, lane, src, dest, 1, 8);
-+ as_rldicl(dest, dest, 0, 56);
-+}
-+
-+void MacroAssembler::unsignedExtractLaneInt16x8(uint32_t lane,
-+ FloatRegister src,
-+ Register dest) {
-+ MOZ_ASSERT(lane < 8);
-+ if (HasPOWER9()) {
-+ as_vextractuh(ScratchSimd128Reg, src, 14 - 2 * lane);
-+ as_mfvsrd(dest, ScratchSimd128Reg);
-+ return;
-+ }
-+ ExtractLaneToGPR(*this, lane, src, dest, 2, 16);
-+ as_rldicl(dest, dest, 0, 48);
-+}
-+
-+void MacroAssembler::extractLaneFloat32x4(uint32_t lane, FloatRegister src,
-+ FloatRegister dest) {
-+ MOZ_ASSERT(lane < 4);
-+ // BE word index = 3 - lane. xxextractuw extracts a word by BE byte offset.
-+ // BE byte offset of BE word W = W*4. So offset = (3-lane)*4.
-+ // xxextractuw puts the extracted word into bits[32:63] of dest (the low
-+ // word of the scalar doubleword), then xscvspdpn converts SP→DP.
-+ // xxspltw replicates a word into all 4 positions. The scalar SP value
-+ // is then at bits[0:31] where xscvspdpn expects it.
-+ as_xxspltw(dest, src, 3 - lane);
-+ as_xscvspdpn(dest, dest);
-+}
-+
-+void MacroAssembler::extractLaneFloat64x2(uint32_t lane, FloatRegister src,
-+ FloatRegister dest) {
-+ MOZ_ASSERT(lane < 2);
-+ if (lane == 0) {
-+ // Lane 0 = LE low dword = BE dword 1. Need to swap to scalar position.
-+ as_xxpermdi(dest, src, src, 2);
-+ } else {
-+ // Lane 1 = LE high dword = BE dword 0 = scalar position.
-+ if (src != dest) {
-+ as_xxlor(dest, src, src);
-+ }
-+ }
-+}
-+
-+// ===============================================================
-+// Replace lane
-+
-+void MacroAssembler::replaceLaneInt8x16(unsigned lane, Register rhs,
-+ FloatRegister lhsDest) {
-+ MOZ_ASSERT(lane < 16);
-+ if (HasPOWER10()) {
-+ // 2 insns + 1 GPR scratch: load lane index, vinsbrx (right-indexed
-+ // = LE-natural). vinsbrx masks RA & 0xF, so the immediate fits.
-+ UseScratchRegisterScope temps(asMasm());
-+ Register idx = temps.Acquire();
-+ xs_li(idx, int16_t(lane));
-+ as_vinsbrx(lhsDest, idx, rhs);
-+ return;
-+ }
-+ if (HasPOWER9()) {
-+ // 2 insns + 1 VSR scratch: stage rhs in BE 0..63 of a scratch VSR
-+ // (low byte of rhs lands at BE byte 7), then vinsertb copies that
-+ // BE byte 7 into lhsDest's BE byte (15 - lane) = wasm lane L.
-+ ScratchSimd128Scope scratch(*this);
-+ as_mtvsrd(scratch, rhs);
-+ as_vinsertb(lhsDest, scratch, 15 - lane);
-+ return;
-+ }
-+ {
-+ // POWER8: extract dword, use rldimi to insert byte, write back.
-+ // Only needs 1 GPR scratch.
-+ UseScratchRegisterScope temps(asMasm());
-+ ScratchSimd128Scope scratch128(*this);
-+ Register tmp = temps.Acquire();
-+ unsigned dword = lane / 8;
-+ unsigned byteInDword = lane % 8;
-+ if (dword == 1) {
-+ as_mfvsrd(tmp, lhsDest);
-+ } else {
-+ as_xxpermdi(scratch128, lhsDest, lhsDest, 2);
-+ as_mfvsrd(tmp, scratch128);
-+ }
-+ // rldimi RT,RS,SH,MB: insert rotated RS bits into RT at positions
-+ // MB..63-SH. Insert rhs byte at bit offset 8*byteInDword from LSB:
-+ // SH = 8*byteInDword, MB = 56 - 8*byteInDword
-+ as_rldimi(tmp, rhs, 8 * byteInDword, 56 - 8 * byteInDword);
-+ as_mtvsrd(scratch128, tmp);
-+ // mtvsrd writes scratch128.dw0 from `tmp` and leaves scratch128.dw1
-+ // undefined. Both xxpermdi forms below select scratch128.dw0 only:
-+ // DM=0b01 → [scratch.dw0, lhsDest.dw1]
-+ // DM=0b00 → [lhsDest.dw0, scratch.dw0]
-+ // So the undefined dw1 is never read. INVARIANT: any future change
-+ // to either DM literal MUST first zero scratch128.dw1 via xxlxor or
-+ // adopt a different staging scheme; otherwise reads of dw1 produce
-+ // POWER9-zero / POWER8-undefined garbage in the output.
-+ if (dword == 1) {
-+ as_xxpermdi(lhsDest, scratch128, lhsDest, 1);
-+ } else {
-+ as_xxpermdi(lhsDest, lhsDest, scratch128, 0);
-+ }
-+ }
-+}
-+
-+void MacroAssembler::replaceLaneInt16x8(unsigned lane, Register rhs,
-+ FloatRegister lhsDest) {
-+ MOZ_ASSERT(lane < 8);
-+ if (HasPOWER10()) {
-+ // 2 insns + 1 GPR scratch: lane*2 → byte position, then vinshrx.
-+ UseScratchRegisterScope temps(asMasm());
-+ Register idx = temps.Acquire();
-+ xs_li(idx, int16_t(lane * 2));
-+ as_vinshrx(lhsDest, idx, rhs);
-+ return;
-+ }
-+ if (HasPOWER9()) {
-+ // 2 insns + 1 VSR scratch: stage rhs in BE 0..63 (low 16 of rhs
-+ // lands at BE bytes 6..7), then vinserth copies those two bytes
-+ // into lhsDest's BE bytes (14 - 2L)..(15 - 2L) = wasm lane L.
-+ ScratchSimd128Scope scratch(*this);
-+ as_mtvsrd(scratch, rhs);
-+ as_vinserth(lhsDest, scratch, 14 - 2 * lane);
-+ return;
-+ }
-+ {
-+ // POWER8: extract dword, rldimi to insert halfword, write back.
-+ // Same dw1-undef invariant as replaceLaneInt8x16 above.
-+ UseScratchRegisterScope temps(asMasm());
-+ ScratchSimd128Scope scratch128(*this);
-+ Register tmp = temps.Acquire();
-+ unsigned dword = lane / 4;
-+ unsigned hwInDword = lane % 4;
-+ if (dword == 1) {
-+ as_mfvsrd(tmp, lhsDest);
-+ } else {
-+ as_xxpermdi(scratch128, lhsDest, lhsDest, 2);
-+ as_mfvsrd(tmp, scratch128);
-+ }
-+ as_rldimi(tmp, rhs, 16 * hwInDword, 48 - 16 * hwInDword);
-+ as_mtvsrd(scratch128, tmp);
-+ if (dword == 1) {
-+ as_xxpermdi(lhsDest, scratch128, lhsDest, 1);
-+ } else {
-+ as_xxpermdi(lhsDest, lhsDest, scratch128, 0);
-+ }
-+ }
-+}
-+
-+void MacroAssembler::replaceLaneInt32x4(unsigned lane, Register rhs,
-+ FloatRegister lhsDest) {
-+ MOZ_ASSERT(lane < 4);
-+ if (HasPOWER10()) {
-+ // 1 insn, no scratch VSR. UIM is the BE byte offset.
-+ as_vinsw(lhsDest, rhs, (3 - lane) * 4);
-+ return;
-+ }
-+ if (HasPOWER9()) {
-+ // POWER9: xxinsertw inserts word from bits[32:63] of XB at BE byte
-+ // offset UIM in XT. mtvsrd puts GPR into bits[0:63]; low 32 bits
-+ // land at bits[32:63]. BE byte offset of Wasm word lane = (3-lane)*4.
-+ ScratchSimd128Scope scratch(*this);
-+ as_mtvsrd(scratch, rhs);
-+ as_xxinsertw(lhsDest, scratch, (3 - lane) * 4);
-+ return;
-+ }
-+ // POWER8: extract dword, rldimi to insert word, write back.
-+ // Modeled on replaceLaneInt16x8 below.
-+ UseScratchRegisterScope temps(asMasm());
-+ ScratchSimd128Scope scratch128(*this);
-+ Register tmp = temps.Acquire();
-+ unsigned dword = lane / 2; // 0 = lanes 0,1; 1 = lanes 2,3.
-+ unsigned wordInDword = lane % 2; // 0 = low LE word; 1 = high LE word.
-+ if (dword == 1) {
-+ as_mfvsrd(tmp, lhsDest);
-+ } else {
-+ as_xxpermdi(scratch128, lhsDest, lhsDest, 2);
-+ as_mfvsrd(tmp, scratch128);
-+ }
-+ as_rldimi(tmp, rhs, 32 * wordInDword, 32 - 32 * wordInDword);
-+ as_mtvsrd(scratch128, tmp);
-+ if (dword == 1) {
-+ as_xxpermdi(lhsDest, scratch128, lhsDest, 1);
-+ } else {
-+ as_xxpermdi(lhsDest, lhsDest, scratch128, 0);
-+ }
-+}
-+
-+void MacroAssembler::replaceLaneFloat32x4(unsigned lane, FloatRegister rhs,
-+ FloatRegister lhsDest) {
-+ MOZ_ASSERT(lane < 4);
-+ if (HasPOWER9()) {
-+ ScratchSimd128Scope scratch(*this);
-+ as_xscvdpspn(scratch, rhs);
-+ as_xxinsertw(lhsDest, scratch, (3 - lane) * 4);
-+ return;
-+ }
-+ // POWER8: convert double rhs to single (lands in BE bits 0..31 of FPR),
-+ // extract bits to a GPR, then route through the integer insert path.
-+ UseScratchRegisterScope temps(asMasm());
-+ Register rhsBits = temps.Acquire();
-+ {
-+ ScratchSimd128Scope scratch(*this);
-+ as_xscvdpspn(scratch, rhs);
-+ as_mfvsrd(rhsBits, scratch); // single is in high 32 bits of GPR
-+ x_srdi(rhsBits, rhsBits, 32); // single → low 32 bits
-+ }
-+ // Inline the int-insert sequence (can't call replaceLaneInt32x4 from
-+ // here because we're already inside a UseScratchRegisterScope and
-+ // need to acquire a separate tmp).
-+ ScratchSimd128Scope scratch128(*this);
-+ Register tmp = temps.Acquire();
-+ unsigned dword = lane / 2;
-+ unsigned wordInDword = lane % 2;
-+ if (dword == 1) {
-+ as_mfvsrd(tmp, lhsDest);
-+ } else {
-+ as_xxpermdi(scratch128, lhsDest, lhsDest, 2);
-+ as_mfvsrd(tmp, scratch128);
-+ }
-+ as_rldimi(tmp, rhsBits, 32 * wordInDword, 32 - 32 * wordInDword);
-+ as_mtvsrd(scratch128, tmp);
-+ if (dword == 1) {
-+ as_xxpermdi(lhsDest, scratch128, lhsDest, 1);
-+ } else {
-+ as_xxpermdi(lhsDest, lhsDest, scratch128, 0);
-+ }
-+}
-+
-+void MacroAssembler::replaceLaneFloat64x2(unsigned lane, FloatRegister rhs,
-+ FloatRegister lhsDest) {
-+ MOZ_ASSERT(lane < 2);
-+ // xxpermdi to place the scalar double into the correct lane.
-+ if (lane == 0) {
-+ // Replace LE low dword (= dw1). Keep lhsDest dw0 (lane 1).
-+ // rhs scalar is in dw0. dm=0b00: [lhsDest.dw0, rhs.dw0]
-+ as_xxpermdi(lhsDest, lhsDest, rhs, 0);
-+ } else {
-+ // Replace LE high dword (= dw0). Keep lhsDest dw1 (lane 0).
-+ // rhs scalar is in dw0. dm=0b01: [rhs.dw0, lhsDest.dw1]
-+ as_xxpermdi(lhsDest, rhs, lhsDest, 1);
-+ }
-+}
-+
-+void MacroAssembler::shuffleInt8x16(const uint8_t lanes[16], FloatRegister rhs,
-+ FloatRegister lhsDest) {
-+ shuffleInt8x16(lanes, lhsDest, rhs, lhsDest);
-+}
-+
-+void MacroAssembler::shuffleInt8x16(const uint8_t lanes[16], FloatRegister lhs,
-+ FloatRegister rhs, FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ // PPC64 vperm uses BE byte indices: VRA[0]=MSB, VRA[15]=LSB, VRB[16..31].
-+ // Convert Wasm LE lane indices to vperm control: lhs lane N = BE index
-+ // (15-N), rhs lane N = BE index (31-N) = (47 - (N+16)).
-+ int8_t ctrl[16];
-+ for (unsigned i = 0; i < 16; i++) {
-+ uint8_t src = lanes[i];
-+ if (src < 16) {
-+ ctrl[i] = 15 - src;
-+ } else {
-+ ctrl[i] = 47 - src;
-+ }
-+ }
-+ loadConstantSimd128(SimdConstant::CreateX16(ctrl), scratch);
-+ // vperm directly on Simd128 regs.
-+ as_vperm(dest.encoding() & 31, lhs.encoding() & 31, rhs.encoding() & 31,
-+ scratch.encoding() & 31);
-+}
-+
-+void MacroAssembler::laneSelectSimd128(FloatRegister mask, FloatRegister lhs,
-+ FloatRegister rhs, FloatRegister dest) {
-+ // xxsel: XC=0→XA, XC=1→XB → XT = (XA & ~XC) | (XB & XC)
-+ // laneSelect: dest = (lhs & mask) | (rhs & ~mask)
-+ // Need XA=rhs, XB=lhs, XC=mask.
-+ as_xxsel(dest, rhs, lhs, mask);
-+}
-+
-+void MacroAssembler::interleaveHighInt8x16(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ // On LE, vmrghb(rhs, lhs) gives Wasm interleave_high.
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vmrghb), rhs, lhs, dest);
-+}
-+
-+void MacroAssembler::interleaveHighInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vmrghh), rhs, lhs, dest);
-+}
-+
-+void MacroAssembler::interleaveHighInt32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vmrghw), rhs, lhs, dest);
-+}
-+
-+void MacroAssembler::interleaveHighInt64x2(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ // xxpermdi DM=0: [XA.dw0, XB.dw0] = merge high dwords.
-+ // On LE: dw0 = high Wasm lane (lane 1).
-+ as_xxpermdi(dest, rhs, lhs, 0);
-+}
-+
-+void MacroAssembler::interleaveLowInt8x16(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vmrglb), rhs, lhs, dest);
-+}
-+
-+void MacroAssembler::interleaveLowInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vmrglh), rhs, lhs, dest);
-+}
-+
-+void MacroAssembler::interleaveLowInt32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vmrglw), rhs, lhs, dest);
-+}
-+
-+void MacroAssembler::interleaveLowInt64x2(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ // xxpermdi DM=3: [XA.dw1, XB.dw1] = merge low dwords.
-+ as_xxpermdi(dest, rhs, lhs, 3);
-+}
-+
-+void MacroAssembler::concatAndRightShiftSimd128(FloatRegister lhs,
-+ FloatRegister rhs,
-+ FloatRegister dest,
-+ uint32_t shift) {
-+ // vsldoi(VRA, VRB, SH) extracts 16 bytes starting at byte SH of the
-+ // big-endian concatenation VRA||VRB. Endianness mapping for the Wasm
-+ // `v128.shuffle` right-shift-concat semantic:
-+ // Wasm: result[i] = (i + shift < 16) ? rhs[i + shift]
-+ // : lhs[i + shift - 16]
-+ // PPC LE: vsldoi(rhs, lhs, shift) produces exactly that — the LE byte
-+ // layout reverses from BE, so passing (rhs, lhs, shift) here is the LE
-+ // equivalent of (lhs, rhs, 16 - shift) on BE.
-+ MOZ_ASSERT(shift < 16);
-+ if (shift == 0) {
-+ moveSimd128(rhs, dest);
-+ return;
-+ }
-+ // vsldoi VRT,VRA,VRB,SH: result[i] = (VRA||VRB)[SH+i]
-+ // Emit vsldoi directly on Simd128 regs (VRA = lhs = high part, VRB =
-+ // rhs = low part). The VMX emitter masks `& 31` internally to extract
-+ // the 5-bit VR field from the Simd128 encoding.
-+ as_vsldoi(dest, lhs, rhs, shift);
-+}
-+
-+void MacroAssembler::leftShiftSimd128(Imm32 count, FloatRegister src,
-+ FloatRegister dest) {
-+ MOZ_ASSERT(count.value < 16);
-+ if (count.value == 0) {
-+ moveSimd128(src, dest);
-+ return;
-+ }
-+ // vslo shifts left by bytes (count in bits 121-124 of VRB, i.e. byte 15 bits
-+ // 1-4). vsl shifts left by bits (count in bits 125-127 of VRB, i.e. byte 15
-+ // bits 5-7). For byte shift: splatX4(count*8, scratch), then vslo.
-+ ScratchSimd128Scope scratch(*this);
-+ SplatImm32(*this, Imm32(count.value * 8), scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vslo), src, scratch, dest);
-+}
-+
-+void MacroAssembler::rightShiftSimd128(Imm32 count, FloatRegister src,
-+ FloatRegister dest) {
-+ MOZ_ASSERT(count.value < 16);
-+ if (count.value == 0) {
-+ moveSimd128(src, dest);
-+ return;
-+ }
-+ ScratchSimd128Scope scratch(*this);
-+ SplatImm32(*this, Imm32(count.value * 8), scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsro), src, scratch, dest);
-+}
-+
-+void MacroAssembler::zeroExtend8x16To16x8(FloatRegister src,
-+ FloatRegister dest) {
-+ // Unsigned widen low: interleave low bytes with zero bytes.
-+ // On LE, vmrglb(zero, src) interleaves the low 8 bytes of src with zeros.
-+ // Use ScratchSimd128Reg as the zero. Order matters: read src into the
-+ // merge BEFORE writing dest (which might alias src). vmrglb reads
-+ // vra+vrb, writes vrt — single-cycle issue.
-+ ScratchSimd128Scope zero(*this);
-+ as_xxlxor(zero, zero, zero);
-+ as_vmrglb(dest.encoding() & 31, zero.encoding() & 31, src.encoding() & 31);
-+}
-+
-+void MacroAssembler::zeroExtend8x16To32x4(FloatRegister src,
-+ FloatRegister dest) {
-+ zeroExtend8x16To16x8(src, dest);
-+ zeroExtend16x8To32x4(dest, dest);
-+}
-+
-+void MacroAssembler::zeroExtend8x16To64x2(FloatRegister src,
-+ FloatRegister dest) {
-+ zeroExtend8x16To32x4(src, dest);
-+ zeroExtend32x4To64x2(dest, dest);
-+}
-+
-+void MacroAssembler::zeroExtend16x8To32x4(FloatRegister src,
-+ FloatRegister dest) {
-+ // Unsigned widen low: interleave low halfwords with zero halfwords.
-+ ScratchSimd128Scope zero(*this);
-+ as_xxlxor(zero, zero, zero);
-+ as_vmrglh(dest.encoding() & 31, zero.encoding() & 31, src.encoding() & 31);
-+}
-+
-+void MacroAssembler::zeroExtend16x8To64x2(FloatRegister src,
-+ FloatRegister dest) {
-+ zeroExtend16x8To32x4(src, dest);
-+ zeroExtend32x4To64x2(dest, dest);
-+}
-+
-+void MacroAssembler::zeroExtend32x4To64x2(FloatRegister src,
-+ FloatRegister dest) {
-+ // Unsigned widen low: interleave low words with zero words.
-+ ScratchSimd128Scope zero(*this);
-+ as_xxlxor(zero, zero, zero);
-+ as_vmrglw(dest.encoding() & 31, zero.encoding() & 31, src.encoding() & 31);
-+}
-+
-+void MacroAssembler::reverseInt16x8(FloatRegister src, FloatRegister dest) {
-+ const uint8_t lanes[] = {14, 15, 12, 13, 10, 11, 8, 9,
-+ 6, 7, 4, 5, 2, 3, 0, 1};
-+ shuffleInt8x16(lanes, src, src, dest);
-+}
-+
-+void MacroAssembler::reverseInt32x4(FloatRegister src, FloatRegister dest) {
-+ const uint8_t lanes[] = {12, 13, 14, 15, 8, 9, 10, 11,
-+ 4, 5, 6, 7, 0, 1, 2, 3};
-+ shuffleInt8x16(lanes, src, src, dest);
-+}
-+
-+void MacroAssembler::reverseInt64x2(FloatRegister src, FloatRegister dest) {
-+ as_xxpermdi(dest, src, src, 2);
-+}
-+
-+void MacroAssembler::swizzleInt8x16Relaxed(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ swizzleInt8x16(lhs, rhs, dest);
-+}
-+
-+// extMul{Low,High}Int{8x16,16x8} use POWER8+ widening multiplies
-+// (vmul{e,o}{s,u}{b,h}) plus a halfword/word merge to map BE-indexed
-+// even/odd products into Wasm lane order on PPC64 LE.
-+//
-+// Lane mapping:
-+// For Low (Wasm lanes from LE bytes/HW 0..N/2-1 = BE 15..N/2):
-+// vmrgl{h,w}(even_products, odd_products) places the right products
-+// at BE result indices, which on LE map to Wasm lanes 0..N/2-1.
-+// For High (Wasm lanes from LE indices N/2..N-1 = BE N/2-1..0):
-+// vmrgh{h,w} takes the upper-half BE indices instead.
-+//
-+// Aliasing safety: vmul* reads both operands before writing, so
-+// `dest = vmulo* lhs, rhs` is safe even when dest aliases lhs/rhs.
-+// We use one scratch for the even-product half because vmrgl{h,w}
-+// reads dest after the odd multiply.
-+
-+void MacroAssembler::extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ uint8_t l = lhs.encoding() & 31, r = rhs.encoding() & 31;
-+ uint8_t d = dest.encoding() & 31, s = scratch.encoding() & 31;
-+ as_vmulesb(s, l, r);
-+ as_vmulosb(d, l, r);
-+ as_vmrglh(d, s, d);
-+}
-+
-+void MacroAssembler::extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ uint8_t l = lhs.encoding() & 31, r = rhs.encoding() & 31;
-+ uint8_t d = dest.encoding() & 31, s = scratch.encoding() & 31;
-+ as_vmulesb(s, l, r);
-+ as_vmulosb(d, l, r);
-+ as_vmrghh(d, s, d);
-+}
-+
-+void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister lhs,
-+ FloatRegister rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ uint8_t l = lhs.encoding() & 31, r = rhs.encoding() & 31;
-+ uint8_t d = dest.encoding() & 31, s = scratch.encoding() & 31;
-+ as_vmuleub(s, l, r);
-+ as_vmuloub(d, l, r);
-+ as_vmrglh(d, s, d);
-+}
-+
-+void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister lhs,
-+ FloatRegister rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ uint8_t l = lhs.encoding() & 31, r = rhs.encoding() & 31;
-+ uint8_t d = dest.encoding() & 31, s = scratch.encoding() & 31;
-+ as_vmuleub(s, l, r);
-+ as_vmuloub(d, l, r);
-+ as_vmrghh(d, s, d);
-+}
-+
-+void MacroAssembler::extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ uint8_t l = lhs.encoding() & 31, r = rhs.encoding() & 31;
-+ uint8_t d = dest.encoding() & 31, s = scratch.encoding() & 31;
-+ as_vmulesh(s, l, r);
-+ as_vmulosh(d, l, r);
-+ as_vmrglw(d, s, d);
-+}
-+
-+void MacroAssembler::extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ uint8_t l = lhs.encoding() & 31, r = rhs.encoding() & 31;
-+ uint8_t d = dest.encoding() & 31, s = scratch.encoding() & 31;
-+ as_vmulesh(s, l, r);
-+ as_vmulosh(d, l, r);
-+ as_vmrghw(d, s, d);
-+}
-+
-+void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister lhs,
-+ FloatRegister rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ uint8_t l = lhs.encoding() & 31, r = rhs.encoding() & 31;
-+ uint8_t d = dest.encoding() & 31, s = scratch.encoding() & 31;
-+ as_vmuleuh(s, l, r);
-+ as_vmulouh(d, l, r);
-+ as_vmrglw(d, s, d);
-+}
-+
-+void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister lhs,
-+ FloatRegister rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ uint8_t l = lhs.encoding() & 31, r = rhs.encoding() & 31;
-+ uint8_t d = dest.encoding() & 31, s = scratch.encoding() & 31;
-+ as_vmuleuh(s, l, r);
-+ as_vmulouh(d, l, r);
-+ as_vmrghw(d, s, d);
-+}
-+
-+// ExtMul{Low,High}Int32x4 use vmul{e,o}{s,u}w (POWER8+) plus xxpermdi
-+// to combine the two i64 partial products into Wasm lane order on PPC64
-+// LE. xxpermdi accepts the full 6-bit VSR encoding so it works directly
-+// on Simd128 regs (encoding 32-63) without any VR staging.
-+//
-+// Aliasing safe: both vmul* reads complete before the second one writes
-+// dest, and xxpermdi reads both inputs before writing.
-+
-+static void EmitExtMulInt32x4(
-+ MacroAssembler& masm, FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest, void (*mulEven)(Assembler&, uint8_t, uint8_t, uint8_t),
-+ void (*mulOdd)(Assembler&, uint8_t, uint8_t, uint8_t), uint8_t dm) {
-+ ScratchSimd128Scope scratch(masm);
-+ uint8_t l = lhs.encoding() & 31;
-+ uint8_t r = rhs.encoding() & 31;
-+ uint8_t d = dest.encoding() & 31;
-+ uint8_t s = scratch.encoding() & 31;
-+ mulEven(static_cast<Assembler&>(masm), s, l, r);
-+ mulOdd(static_cast<Assembler&>(masm), d, l, r);
-+ masm.as_xxpermdi(dest, scratch, dest, dm);
-+}
-+
-+void MacroAssembler::extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitExtMulInt32x4(
-+ *this, lhs, rhs, dest,
-+ [](Assembler& a, uint8_t t, uint8_t x, uint8_t y) {
-+ a.as_vmulesw(t, x, y);
-+ },
-+ [](Assembler& a, uint8_t t, uint8_t x, uint8_t y) {
-+ a.as_vmulosw(t, x, y);
-+ },
-+ 3);
-+}
-+
-+void MacroAssembler::extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitExtMulInt32x4(
-+ *this, lhs, rhs, dest,
-+ [](Assembler& a, uint8_t t, uint8_t x, uint8_t y) {
-+ a.as_vmulesw(t, x, y);
-+ },
-+ [](Assembler& a, uint8_t t, uint8_t x, uint8_t y) {
-+ a.as_vmulosw(t, x, y);
-+ },
-+ 0);
-+}
-+
-+void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister lhs,
-+ FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitExtMulInt32x4(
-+ *this, lhs, rhs, dest,
-+ [](Assembler& a, uint8_t t, uint8_t x, uint8_t y) {
-+ a.as_vmuleuw(t, x, y);
-+ },
-+ [](Assembler& a, uint8_t t, uint8_t x, uint8_t y) {
-+ a.as_vmulouw(t, x, y);
-+ },
-+ 3);
-+}
-+
-+void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister lhs,
-+ FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitExtMulInt32x4(
-+ *this, lhs, rhs, dest,
-+ [](Assembler& a, uint8_t t, uint8_t x, uint8_t y) {
-+ a.as_vmuleuw(t, x, y);
-+ },
-+ [](Assembler& a, uint8_t t, uint8_t x, uint8_t y) {
-+ a.as_vmulouw(t, x, y);
-+ },
-+ 0);
-+}
-+
-+void MacroAssembler::q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ // Q15 multiply-round-saturate: vmhraddshs(a, b, zero) computes
-+ // saturate((a[i]*b[i] + 0x4000) >> 15) for each halfword.
-+ ScratchSimd128Scope scratch(*this);
-+ ZeroSimd128(*this, scratch);
-+ EmitVmxTernary(
-+ *this,
-+ [](Assembler& a, uint8_t vrt, uint8_t vra, uint8_t vrb, uint8_t vrc) {
-+ a.as_vmhraddshs(vrt, vra, vrb, vrc);
-+ },
-+ lhs, rhs, scratch, dest);
-+}
-+
-+// neg = 0 - src. Use ScratchSimd128Reg (= VR0, non-allocatable) as the
-+// zero source so the register allocator sees no clobbered VRs.
-+// 2 insns: xxlxor scratch + vsubuXm dest, scratch, src. vneg{b,h}
-+// doesn't exist in any POWER ISA, hence the subtract.
-+void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ ZeroSimd128(*this, scratch);
-+ as_vsububm(dest.encoding() & 31, scratch.encoding() & 31,
-+ src.encoding() & 31);
-+}
-+
-+void MacroAssembler::negInt16x8(FloatRegister src, FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ ZeroSimd128(*this, scratch);
-+ as_vsubuhm(dest.encoding() & 31, scratch.encoding() & 31,
-+ src.encoding() & 31);
-+}
-+
-+void MacroAssembler::negInt32x4(FloatRegister src, FloatRegister dest) {
-+ if (HasPOWER9()) {
-+ EmitVmxUnary(
-+ *this,
-+ [](Assembler& a, uint8_t vrt, uint8_t vrb) { a.as_vnegw(vrt, vrb); },
-+ src, dest);
-+ return;
-+ }
-+ // POWER8 fallback: 0 - src via ScratchSimd128Reg (VR0).
-+ ScratchSimd128Scope scratch(*this);
-+ ZeroSimd128(*this, scratch);
-+ as_vsubuwm(dest.encoding() & 31, scratch.encoding() & 31,
-+ src.encoding() & 31);
-+}
-+
-+void MacroAssembler::negInt64x2(FloatRegister src, FloatRegister dest) {
-+ if (HasPOWER9()) {
-+ EmitVmxUnary(
-+ *this,
-+ [](Assembler& a, uint8_t vrt, uint8_t vrb) { a.as_vnegd(vrt, vrb); },
-+ src, dest);
-+ return;
-+ }
-+ // POWER8 fallback: 0 - src via ScratchSimd128Reg (VR0).
-+ ScratchSimd128Scope scratch(*this);
-+ ZeroSimd128(*this, scratch);
-+ as_vsubudm(dest.encoding() & 31, scratch.encoding() & 31,
-+ src.encoding() & 31);
-+}
-+#undef DEF_NEG_INTNxM_VSUB
-+
-+void MacroAssembler::unsignedAddSatInt8x16(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vaddubs), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::unsignedAddSatInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vadduhs), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::unsignedSubSatInt8x16(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsububs), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::unsignedSubSatInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsubuhs), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::unsignedMinInt8x16(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vminub), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::unsignedMinInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vminuh), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::unsignedMinInt32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vminuw), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::unsignedMaxInt8x16(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vmaxub), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::unsignedMaxInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vmaxuh), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::unsignedMaxInt32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vmaxuw), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::unsignedAverageInt8x16(FloatRegister lhs,
-+ FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vavgub), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::unsignedAverageInt16x8(FloatRegister lhs,
-+ FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vavguh), lhs, rhs, dest);
-+}
-+
-+// abs(x) = max(x, -x) per signed lane. No vabs{b,h,w,d} exists in any ISA.
-+// vneg{w,d} exists only on POWER9.
-+// We use ScratchSimd128Reg as a temp for -src. Order matters: compute
-+// -src into temp first (reads src), then max(src, temp) into dest (reads
-+// src + temp, writes dest). Safe even when dest == src because src is
-+// read before dest is written by vmaxsX.
-+
-+void MacroAssembler::absInt8x16(FloatRegister src, FloatRegister dest) {
-+ ScratchSimd128Scope tmp(*this);
-+ as_xxlxor(tmp, tmp, tmp); // tmp = 0
-+ as_vsububm(tmp.encoding() & 31, tmp.encoding() & 31,
-+ src.encoding() & 31); // tmp = -src
-+ as_vmaxsb(dest.encoding() & 31, src.encoding() & 31,
-+ tmp.encoding() & 31); // dest = max(src, -src)
-+}
-+
-+void MacroAssembler::absInt16x8(FloatRegister src, FloatRegister dest) {
-+ ScratchSimd128Scope tmp(*this);
-+ as_xxlxor(tmp, tmp, tmp);
-+ as_vsubuhm(tmp.encoding() & 31, tmp.encoding() & 31, src.encoding() & 31);
-+ as_vmaxsh(dest.encoding() & 31, src.encoding() & 31, tmp.encoding() & 31);
-+}
-+
-+void MacroAssembler::absInt32x4(FloatRegister src, FloatRegister dest) {
-+ ScratchSimd128Scope tmp(*this);
-+ if (HasPOWER9()) {
-+ as_vnegw(tmp.encoding() & 31, src.encoding() & 31); // tmp = -src
-+ } else {
-+ as_xxlxor(tmp, tmp, tmp);
-+ as_vsubuwm(tmp.encoding() & 31, tmp.encoding() & 31, src.encoding() & 31);
-+ }
-+ as_vmaxsw(dest.encoding() & 31, src.encoding() & 31, tmp.encoding() & 31);
-+}
-+
-+void MacroAssembler::absInt64x2(FloatRegister src, FloatRegister dest) {
-+ ScratchSimd128Scope tmp(*this);
-+ if (HasPOWER9()) {
-+ as_vnegd(tmp.encoding() & 31, src.encoding() & 31); // tmp = -src
-+ } else {
-+ as_xxlxor(tmp, tmp, tmp);
-+ as_vsubudm(tmp.encoding() & 31, tmp.encoding() & 31, src.encoding() & 31);
-+ }
-+ as_vmaxsd(dest.encoding() & 31, src.encoding() & 31, tmp.encoding() & 31);
-+}
-+
-+void MacroAssembler::leftShiftInt8x16(Imm32 count, FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ SplatImm8(*this, count, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vslb), src, scratch, dest);
-+}
-+
-+void MacroAssembler::leftShiftInt16x8(Imm32 count, FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ SplatImm16(*this, count, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vslh), src, scratch, dest);
-+}
-+
-+void MacroAssembler::leftShiftInt32x4(Imm32 count, FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ SplatImm32(*this, count, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vslw), src, scratch, dest);
-+}
-+
-+void MacroAssembler::leftShiftInt64x2(Imm32 count, FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ SplatImm32(*this, count, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsld), src, scratch, dest);
-+}
-+
-+void MacroAssembler::rightShiftInt8x16(Imm32 count, FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ SplatImm8(*this, count, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsrab), src, scratch, dest);
-+}
-+
-+void MacroAssembler::unsignedRightShiftInt8x16(Imm32 count, FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ SplatImm8(*this, count, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsrb), src, scratch, dest);
-+}
-+
-+void MacroAssembler::rightShiftInt16x8(Imm32 count, FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ SplatImm16(*this, count, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsrah), src, scratch, dest);
-+}
-+
-+void MacroAssembler::unsignedRightShiftInt16x8(Imm32 count, FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ SplatImm16(*this, count, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsrh), src, scratch, dest);
-+}
-+
-+void MacroAssembler::rightShiftInt32x4(Imm32 count, FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ SplatImm32(*this, count, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsraw), src, scratch, dest);
-+}
-+
-+void MacroAssembler::unsignedRightShiftInt32x4(Imm32 count, FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ SplatImm32(*this, count, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsrw), src, scratch, dest);
-+}
-+
-+void MacroAssembler::rightShiftInt64x2(Imm32 count, FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ SplatImm32(*this, count, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsrad), src, scratch, dest);
-+}
-+
-+void MacroAssembler::unsignedRightShiftInt64x2(Imm32 count, FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ SplatImm32(*this, count, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsrd), src, scratch, dest);
-+}
-+
-+void MacroAssembler::bitwiseAndSimd128(FloatRegister rhs,
-+ FloatRegister lhsDest) {
-+ as_xxland(lhsDest, lhsDest, rhs);
-+}
-+
-+void MacroAssembler::bitwiseAndSimd128(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xxland(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::bitwiseOrSimd128(FloatRegister rhs,
-+ FloatRegister lhsDest) {
-+ as_xxlor(lhsDest, lhsDest, rhs);
-+}
-+
-+void MacroAssembler::bitwiseOrSimd128(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xxlor(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::bitwiseXorSimd128(FloatRegister rhs,
-+ FloatRegister lhsDest) {
-+ as_xxlxor(lhsDest, lhsDest, rhs);
-+}
-+
-+void MacroAssembler::bitwiseXorSimd128(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xxlxor(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::bitwiseNotSimd128(FloatRegister src, FloatRegister dest) {
-+ as_xxlnor(dest, src, src);
-+}
-+
-+void MacroAssembler::bitwiseNotAndSimd128(FloatRegister rhs,
-+ FloatRegister lhsDest) {
-+ // notand(lhs, rhs) = ~lhs & rhs = xxlandc(rhs, lhs)
-+ as_xxlandc(lhsDest, rhs, lhsDest);
-+}
-+
-+void MacroAssembler::anyTrueSimd128(FloatRegister src, Register dest) {
-+ // vcmpequd. (POWER8+) against zero sets CR6:
-+ // - CR6.LT (BE bit 24) = 1 iff the per-lane result is all-1s, i.e.
-+ // every doubleword of src equals zero (= src is all-zero).
-+ // - CR6.EQ (BE bit 26) = 1 iff no lane was equal (= any nonzero).
-+ // any-true = !all-zero = !CR6.LT.
-+ ScratchSimd128Scope scratch(*this);
-+ uint8_t s = scratch.encoding() & 31;
-+ as_xxlxor(scratch, scratch, scratch);
-+ as_vcmpequd_rc(s, src.encoding() & 31, s);
-+ if (HasPOWER10()) {
-+ // setbcr materialises (CR[BI] == 0) ? 1 : 0 directly into dest.
-+ // dest = (CR6.LT == 0) = "not all-zero" = any-true.
-+ as_setbcr(dest, Assembler::LessThan, cr6);
-+ return;
-+ }
-+ as_mfocrf(dest, cr6);
-+ // CR6.LT is at BE bit 24 of the GPR. rlwinm sh=25 rotates left 25:
-+ // bit (24 - 25) mod 32 = 31 (LSB). Mask 31..31 keeps just bit 31.
-+ as_rlwinm(dest, dest, 25, 31, 31);
-+ as_xori(dest, dest, 1);
-+}
-+
-+// vcmpequX. against zero sets CR6: LT = all input lanes were zero,
-+// EQ = no input lane was zero. The latter is exactly "all-true".
-+// mfocrf places CR6 at bits 24-27 of the low 32-bit half (LT=24, EQ=26).
-+// rlwinm rd,rd,27,31,31 extracts bit 26 (CR6.EQ) right-justified.
-+template <typename VmxCmpRcFn>
-+static void EmitAllTrueInt(MacroAssembler& masm, FloatRegister src,
-+ Register dest, VmxCmpRcFn vmxCmpRc) {
-+ ScratchSimd128Scope scratch(masm);
-+ ZeroSimd128(masm, scratch);
-+ uint8_t s = scratch.encoding() & 31;
-+ vmxCmpRc(static_cast<Assembler&>(masm), s, src.encoding() & 31, s);
-+ if (HasPOWER10()) {
-+ // setbc materialises CR6.EQ directly into dest (1 insn vs the 2-insn
-+ // mfocrf + rlwinm extract). Already wired in ma_cmp_set.
-+ masm.as_setbc(dest, Assembler::Equal, cr6);
-+ return;
-+ }
-+ masm.as_mfocrf(dest, cr6);
-+ masm.as_rlwinm(dest, dest, 27, 31, 31);
-+}
-+
-+void MacroAssembler::allTrueInt8x16(FloatRegister src, Register dest) {
-+ EmitAllTrueInt(*this, src, dest,
-+ [](Assembler& a, uint8_t t, uint8_t r, uint8_t b) {
-+ a.as_vcmpequb_rc(t, r, b);
-+ });
-+}
-+
-+void MacroAssembler::allTrueInt16x8(FloatRegister src, Register dest) {
-+ EmitAllTrueInt(*this, src, dest,
-+ [](Assembler& a, uint8_t t, uint8_t r, uint8_t b) {
-+ a.as_vcmpequh_rc(t, r, b);
-+ });
-+}
-+
-+void MacroAssembler::allTrueInt32x4(FloatRegister src, Register dest) {
-+ EmitAllTrueInt(*this, src, dest,
-+ [](Assembler& a, uint8_t t, uint8_t r, uint8_t b) {
-+ a.as_vcmpequw_rc(t, r, b);
-+ });
-+}
-+
-+void MacroAssembler::allTrueInt64x2(FloatRegister src, Register dest) {
-+ EmitAllTrueInt(*this, src, dest,
-+ [](Assembler& a, uint8_t t, uint8_t r, uint8_t b) {
-+ a.as_vcmpequd_rc(t, r, b);
-+ });
-+}
-+
-+void MacroAssembler::compareInt8x16(Assembler::Condition cond,
-+ FloatRegister rhs, FloatRegister lhsDest) {
-+ compareInt8x16(cond, lhsDest, rhs, lhsDest);
-+}
-+
-+void MacroAssembler::compareInt8x16(Assembler::Condition cond,
-+ FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ if (cond == Assembler::NotEqual && HasPOWER9()) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vcmpneb), lhs, rhs, dest);
-+ return;
-+ }
-+ EmitVmxCompare(*this, cond, lhs, rhs, dest, VMX_BINARY_WRAPPER(vcmpequb),
-+ VMX_BINARY_WRAPPER(vcmpgtsb), VMX_BINARY_WRAPPER(vcmpgtub));
-+}
-+
-+void MacroAssembler::compareInt16x8(Assembler::Condition cond,
-+ FloatRegister rhs, FloatRegister lhsDest) {
-+ compareInt16x8(cond, lhsDest, rhs, lhsDest);
-+}
-+
-+void MacroAssembler::compareInt16x8(Assembler::Condition cond,
-+ FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ if (cond == Assembler::NotEqual && HasPOWER9()) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vcmpneh), lhs, rhs, dest);
-+ return;
-+ }
-+ EmitVmxCompare(*this, cond, lhs, rhs, dest, VMX_BINARY_WRAPPER(vcmpequh),
-+ VMX_BINARY_WRAPPER(vcmpgtsh), VMX_BINARY_WRAPPER(vcmpgtuh));
-+}
-+
-+void MacroAssembler::compareInt32x4(Assembler::Condition cond,
-+ FloatRegister rhs, FloatRegister lhsDest) {
-+ compareInt32x4(cond, lhsDest, rhs, lhsDest);
-+}
-+
-+void MacroAssembler::compareInt32x4(Assembler::Condition cond,
-+ FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ if (cond == Assembler::NotEqual && HasPOWER9()) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vcmpnew), lhs, rhs, dest);
-+ return;
-+ }
-+ EmitVmxCompare(*this, cond, lhs, rhs, dest, VMX_BINARY_WRAPPER(vcmpequw),
-+ VMX_BINARY_WRAPPER(vcmpgtsw), VMX_BINARY_WRAPPER(vcmpgtuw));
-+}
-+
-+void MacroAssembler::compareFloat32x4(Assembler::Condition cond,
-+ FloatRegister rhs,
-+ FloatRegister lhsDest) {
-+ compareFloat32x4(cond, lhsDest, rhs, lhsDest);
-+}
-+
-+void MacroAssembler::compareFloat32x4(Assembler::Condition cond,
-+ FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ switch (cond) {
-+ case Assembler::Equal:
-+ as_xvcmpeqsp(dest, lhs, rhs);
-+ break;
-+ case Assembler::NotEqual:
-+ as_xvcmpeqsp(dest, lhs, rhs);
-+ bitwiseNotSimd128(dest, dest);
-+ break;
-+ case Assembler::GreaterThan:
-+ as_xvcmpgtsp(dest, lhs, rhs);
-+ break;
-+ case Assembler::GreaterThanOrEqual:
-+ as_xvcmpgesp(dest, lhs, rhs);
-+ break;
-+ case Assembler::LessThan:
-+ as_xvcmpgtsp(dest, rhs, lhs);
-+ break;
-+ case Assembler::LessThanOrEqual:
-+ as_xvcmpgesp(dest, rhs, lhs);
-+ break;
-+ default:
-+ MOZ_CRASH("Unexpected SIMD float condition");
-+ }
-+}
-+
-+void MacroAssembler::compareFloat64x2(Assembler::Condition cond,
-+ FloatRegister rhs,
-+ FloatRegister lhsDest) {
-+ compareFloat64x2(cond, lhsDest, rhs, lhsDest);
-+}
-+
-+void MacroAssembler::compareFloat64x2(Assembler::Condition cond,
-+ FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ switch (cond) {
-+ case Assembler::Equal:
-+ as_xvcmpeqdp(dest, lhs, rhs);
-+ break;
-+ case Assembler::NotEqual:
-+ as_xvcmpeqdp(dest, lhs, rhs);
-+ bitwiseNotSimd128(dest, dest);
-+ break;
-+ case Assembler::GreaterThan:
-+ as_xvcmpgtdp(dest, lhs, rhs);
-+ break;
-+ case Assembler::GreaterThanOrEqual:
-+ as_xvcmpgedp(dest, lhs, rhs);
-+ break;
-+ case Assembler::LessThan:
-+ as_xvcmpgtdp(dest, rhs, lhs);
-+ break;
-+ case Assembler::LessThanOrEqual:
-+ as_xvcmpgedp(dest, rhs, lhs);
-+ break;
-+ default:
-+ MOZ_CRASH("Unexpected SIMD float condition");
-+ }
-+}
-+
-+void MacroAssembler::negFloat32x4(FloatRegister src, FloatRegister dest) {
-+ as_xvnegsp(dest, src);
-+}
-+
-+void MacroAssembler::negFloat64x2(FloatRegister src, FloatRegister dest) {
-+ as_xvnegdp(dest, src);
-+}
-+
-+void MacroAssembler::absFloat32x4(FloatRegister src, FloatRegister dest) {
-+ as_xvabssp(dest, src);
-+}
-+
-+void MacroAssembler::absFloat64x2(FloatRegister src, FloatRegister dest) {
-+ as_xvabsdp(dest, src);
-+}
-+
-+// Per spec:
-+// result[k] = (s|u)ext_widen(src[2k]) + (s|u)ext_widen(src[2k+1])
-+// POWER lacks pairwise multiply-add. Emulate via vmulX{e,o}X(src, splat(1))
-+// + vadd. Both vmuls need `src` AND `splat(1)` available simultaneously.
-+//
-+// Available SIMD slots without involving Lowering:
-+// - ScratchSimd128Reg (VR0, non-allocatable)
-+// - dest, src
-+// That's 3 regs when dest != src — enough for {src, splat, intermediate}.
-+// When dest == src we stash src and the even product to the 288-byte ELFv2
-+// red zone and rebuild splat(1).
-+//
-+// (Earlier implementations of these helpers routed through hardcoded
-+// VR1/VR2/VR3 via xxlor_vsr — faster but stomped allocator-managed VRs
-+// and silently corrupted any live wasm v128 the allocator had placed
-+// there. ScratchSimd128Reg + red-zone stash is the safe contract.)
-+// Always-safe pattern: stash src to red zone so dest can be freely overwritten,
-+// stash even to red zone after first vmul so we can rebuild splat(1) for the
-+// second vmul. The splat-of-1 is now `vspltis{b,h}` (5-bit signed immediate
-+// splat) — 1 insn vs the 3-insn movePtr+mtvsrd+vsplt sequence the previous
-+// path used.
-+// Pattern: stash src to red zone slot 0 so dest can be freely overwritten;
-+// vmul-even (signed/unsigned) of src with splat(1) produces sign/zero-extended
-+// even-lane products into dest; stash that to slot 1 and rebuild scratch=src
-+// (slot 0) and dest=splat(1); vmul-odd produces the odd products; restore
-+// even from slot 1 and pairwise-add.
-+void MacroAssembler::extAddPairwiseInt8x16(FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ uint8_t s = scratch.encoding() & 31;
-+ uint8_t srcEnc = src.encoding() & 31;
-+ uint8_t destEnc = dest.encoding() & 31;
-+ RedZoneStashSimd128(*this, src, 0);
-+ as_vspltisb(s, 1);
-+ as_vmulesb(destEnc, srcEnc, s);
-+ RedZoneStashSimd128(*this, dest, 1);
-+ RedZoneRestoreSimd128(*this, 0, scratch);
-+ as_vspltisb(destEnc, 1);
-+ as_vmulosb(destEnc, s, destEnc);
-+ RedZoneRestoreSimd128(*this, 1, scratch);
-+ as_vadduhm(destEnc, destEnc, s);
-+}
-+
-+void MacroAssembler::unsignedExtAddPairwiseInt8x16(FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ uint8_t s = scratch.encoding() & 31;
-+ uint8_t srcEnc = src.encoding() & 31;
-+ uint8_t destEnc = dest.encoding() & 31;
-+ RedZoneStashSimd128(*this, src, 0);
-+ as_vspltisb(s, 1);
-+ as_vmuleub(destEnc, srcEnc, s);
-+ RedZoneStashSimd128(*this, dest, 1);
-+ RedZoneRestoreSimd128(*this, 0, scratch);
-+ as_vspltisb(destEnc, 1);
-+ as_vmuloub(destEnc, s, destEnc);
-+ RedZoneRestoreSimd128(*this, 1, scratch);
-+ as_vadduhm(destEnc, destEnc, s);
-+}
-+
-+// vmsumshm/vmsumuhm collapse the i16x8 → i32x4 pairwise-add into a single
-+// multiply-sum: VT.i32[k] = VRA.i16[2k]*VRB.i16[2k] +
-+// VRA.i16[2k+1]*VRB.i16[2k+1]
-+// + VRC.i32[k]. With VRB = splat(1) and VRC = 0 this is exactly the wasm
-+// i32x4.extadd_pairwise_i16x8_{s,u} contract. 3 insns when dest != src;
-+// LWasmUnarySimd128 uses useRegisterAtStart so dest may alias src — in that
-+// case we put splat(1) into scratch (preserving src in dest) and use a
-+// red-zone slot for the zero VRC operand.
-+void MacroAssembler::extAddPairwiseInt16x8(FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ if (dest != src) {
-+ as_xxlxor(scratch, scratch, scratch); // scratch = 0 (VRC addend)
-+ as_vspltish(dest.encoding() & 31, 1); // dest = splat(1) (VRB multiplier)
-+ as_vmsumshm(dest.encoding() & 31, src.encoding() & 31, dest.encoding() & 31,
-+ scratch.encoding() & 31);
-+ return;
-+ }
-+ // dest == src: load splat(1) into scratch instead, stash zero to the red
-+ // zone, restore zero into scratch after the splat is consumed... actually
-+ // simpler: use vmule/vmulo + vadd trio with red zone. Same shape as the
-+ // pre-vmsumshm fallback for i8x16.
-+ uint8_t s = scratch.encoding() & 31;
-+ uint8_t srcEnc = src.encoding() & 31;
-+ uint8_t destEnc = dest.encoding() & 31;
-+ RedZoneStashSimd128(*this, src, 0);
-+ as_vspltish(s, 1);
-+ as_vmulesh(destEnc, srcEnc, s);
-+ RedZoneStashSimd128(*this, dest, 1);
-+ RedZoneRestoreSimd128(*this, 0, scratch);
-+ as_vspltish(destEnc, 1);
-+ as_vmulosh(destEnc, s, destEnc);
-+ RedZoneRestoreSimd128(*this, 1, scratch);
-+ as_vadduwm(destEnc, destEnc, s);
-+}
-+
-+void MacroAssembler::unsignedExtAddPairwiseInt16x8(FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ if (dest != src) {
-+ as_xxlxor(scratch, scratch, scratch);
-+ as_vspltish(dest.encoding() & 31, 1);
-+ as_vmsumuhm(dest.encoding() & 31, src.encoding() & 31, dest.encoding() & 31,
-+ scratch.encoding() & 31);
-+ return;
-+ }
-+ uint8_t s = scratch.encoding() & 31;
-+ uint8_t srcEnc = src.encoding() & 31;
-+ uint8_t destEnc = dest.encoding() & 31;
-+ RedZoneStashSimd128(*this, src, 0);
-+ as_vspltish(s, 1);
-+ as_vmuleuh(destEnc, srcEnc, s);
-+ RedZoneStashSimd128(*this, dest, 1);
-+ RedZoneRestoreSimd128(*this, 0, scratch);
-+ as_vspltish(destEnc, 1);
-+ as_vmulouh(destEnc, s, destEnc);
-+ RedZoneRestoreSimd128(*this, 1, scratch);
-+ as_vadduwm(destEnc, destEnc, s);
-+}
-+
-+void MacroAssembler::sqrtFloat32x4(FloatRegister src, FloatRegister dest) {
-+ as_xvsqrtsp(dest, src);
-+}
-+
-+void MacroAssembler::sqrtFloat64x2(FloatRegister src, FloatRegister dest) {
-+ as_xvsqrtdp(dest, src);
-+}
-+
-+void MacroAssembler::convertInt32x4ToFloat32x4(FloatRegister src,
-+ FloatRegister dest) {
-+ as_xvcvsxwsp(dest, src);
-+}
-+
-+void MacroAssembler::unsignedConvertInt32x4ToFloat32x4(FloatRegister src,
-+ FloatRegister dest) {
-+ as_xvcvuxwsp(dest, src);
-+}
-+
-+// i32x4 (low 2 lanes) → f64x2. Wasm `f64x2.convert_low_i32x4_{s,u}`.
-+// xvcv{s,u}xwdp converts BE word 0 and BE word 2 of source to doubles in
-+// BE dwords 0 and 1. vmrglw places src.word_BE[2,3] at the read positions,
-+// matching the f32→f64 promote shape:
-+// vmrglw scratch, src, src ; BE words 2,3 of src → BE words 0,2 of
-+// scratch xvcv*xwdp dest, scratch ; convert both, place in BE dwords
-+// 0,1
-+// Output BE dwords land as [convert(input lane 1), convert(input lane 0)],
-+// which on PPC64LE storage IS the wasm output layout.
-+//
-+// 2 insns each, single ScratchSimd128 scope, no GPR or FPR scratch.
-+// All ops POWER7+. dest==src aliasing safe (vmrglw consumes src into
-+// scratch before dest is written).
-+void MacroAssembler::convertInt32x4ToFloat64x2(FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ as_vmrglw(scratch.encoding() & 31, src.encoding() & 31, src.encoding() & 31);
-+ as_xvcvsxwdp(dest, scratch);
-+}
-+
-+void MacroAssembler::unsignedConvertInt32x4ToFloat64x2(FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ as_vmrglw(scratch.encoding() & 31, src.encoding() & 31, src.encoding() & 31);
-+ as_xvcvuxwdp(dest, scratch);
-+}
-+
-+void MacroAssembler::truncSatFloat32x4ToInt32x4(FloatRegister src,
-+ FloatRegister dest) {
-+ // xvcvspsxws gives INT32_MIN for NaN, but Wasm requires 0.
-+ ScratchSimd128Scope scratch(*this);
-+ as_xvcmpeqsp(scratch, src, src); // ~0 for non-NaN, 0 for NaN
-+ as_xvcvspsxws(dest, src);
-+ as_xxland(dest, dest, scratch); // zero NaN lanes
-+}
-+
-+// Pack the two "interesting" 32-bit results that xvcv*xws / xvcvdpsp leaves
-+// at scratch.word_BE[0] (= A) and scratch.word_BE[2] (= B) into a zeroed dest
-+// as dest.word_BE = [0, 0, A, B]. This is the layout wasm requires for
-+// f64x2 → {i32x4 trunc_sat, f32x4 demote}. Writes dest, consumes scratch.
-+//
-+// POWER9 path (4 insns) uses xxinsertw/xxextractuw. POWER8 path (7 insns)
-+// goes via two GPR round-trips: extract A and B with mfvsrd, splice them
-+// into a single dword with rldimi, mtvsrd back into a SIMD reg, and
-+// xxpermdi the result into dest.dw1 while keeping dest.dw0 zero.
-+static inline void PackTwoWordsToLowHalf(MacroAssembler& masm,
-+ FloatRegister scratch,
-+ FloatRegister dest) {
-+ if (HasPOWER9()) {
-+ masm.as_xxinsertw(dest, scratch,
-+ 8); // dest.word_BE[2] ← scratch.word_BE[1] (= A)
-+ masm.as_xxextractuw(scratch, scratch,
-+ 8); // scratch.word_BE[1] ← scratch.word_BE[2] (= B)
-+ masm.as_xxinsertw(dest, scratch,
-+ 12); // dest.word_BE[3] ← scratch.word_BE[1] (= B)
-+ return;
-+ }
-+ // POWER8: xxinsertw/xxextractuw are ISA 3.0. Take a GPR detour instead.
-+ // scratch.dw_BE[0] = (A << 32) | A, scratch.dw_BE[1] = (B << 32) | B.
-+ UseScratchRegisterScope temps(masm);
-+ Register tmpA = temps.Acquire();
-+ Register tmpB = temps.Acquire();
-+ masm.as_mfvsrd(tmpA, scratch); // tmpA = (A << 32) | A
-+ masm.as_xxpermdi(scratch, scratch, scratch,
-+ 2); // swap dwords: now dw0 = (B<<32)|B
-+ masm.as_mfvsrd(tmpB, scratch); // tmpB = (B << 32) | B
-+ masm.x_srdi(tmpA, tmpA, 32); // tmpA = 0x00000000_AAAAAAAA
-+ masm.as_rldimi(tmpB, tmpA, 32,
-+ 0); // tmpB[0..31] = A; tmpB[32..63] = B (kept)
-+ masm.as_mtvsrd(scratch, tmpB); // scratch.dw_BE[0] = (A << 32) | B; dw1 = 0
-+ masm.as_xxpermdi(dest, dest, scratch,
-+ 0); // dest = {dest.dw0=0, scratch.dw0} = [0, 0, A, B]
-+}
-+
-+// fctiwz / fcmpu / fctiduz are X-form scalar FP instructions that only
-+// encode 5-bit FRT/FRB fields, so emitting them on a Simd128 reg
-+// (encoding 32+) would corrupt the opcode. Bridge through
-+// ScratchDoubleReg (FPR f0) for the conversion. Extract both lanes' GPR
-+// results before writing dest so that dest == src is safe.
-+//
-+// Avoid replaceLaneInt32x4 on the tail: on POWER8 it needs an extra
-+// GPR scratch, but r11 and r12 are already held as a/b here. Pack both
-+// int32s into `a` with rldimi, transfer via mtvsrd, then xxpermdi the
-+// DWs into the low half so wasm lane 0 (BE W3) holds a, lane 1 (W2) b.
-+void MacroAssembler::truncSatFloat64x2ToInt32x4(FloatRegister src,
-+ FloatRegister dest,
-+ FloatRegister temp) {
-+ // Wasm `i32x4.trunc_sat_f64x2_s_zero`. xvcvdpsxws saturates to INT32_MIN
-+ // on overflow/NaN (per ISA); wasm requires NaN → 0, so a per-dword NaN
-+ // mask via xvcmpeqdp clamps NaN lanes to 0 before laying out the result.
-+ // Output BE word positions need wasm lane order: lane 1 → BE word 2,
-+ // lane 0 → BE word 3. xvcvdpsxws lands its results at BE words 0 and 2
-+ // (with replication into 1/3); PackTwoWordsToLowHalf moves them into
-+ // the right positions while zeroing the rest.
-+ // dest==src safe: src is consumed by xvcvdpsxws and xvcmpeqdp before
-+ // dest is zeroed.
-+ ScratchSimd128Scope scratch(*this);
-+ as_xvcvdpsxws(scratch, src);
-+ as_xvcmpeqdp(dest, src,
-+ src); // NaN-mask: 0xFF...F per dword for non-NaN, 0 for NaN
-+ as_xxland(scratch, scratch, dest);
-+ as_xxlxor(dest, dest, dest);
-+ PackTwoWordsToLowHalf(*this, scratch, dest);
-+}
-+
-+void MacroAssembler::unsignedTruncSatFloat64x2ToInt32x4(FloatRegister src,
-+ FloatRegister dest,
-+ FloatRegister temp) {
-+ // Wasm `i32x4.trunc_sat_f64x2_u_zero`. xvcvdpuxws semantics already
-+ // match the wasm spec without any masking: NaN → 0, negative → 0,
-+ // positive overflow → UINT32_MAX. So no NaN mask needed; just position
-+ // the saturated results into BE words 2,3 with zeros at words 0,1.
-+ // dest==src safe: src consumed by xvcvdpuxws before dest is zeroed.
-+ ScratchSimd128Scope scratch(*this);
-+ as_xvcvdpuxws(scratch, src);
-+ as_xxlxor(dest, dest, dest);
-+ PackTwoWordsToLowHalf(*this, scratch, dest);
-+}
-+
-+void MacroAssembler::truncFloat32x4ToInt32x4Relaxed(FloatRegister src,
-+ FloatRegister dest) {
-+ truncSatFloat32x4ToInt32x4(src, dest);
-+}
-+
-+void MacroAssembler::unsignedTruncFloat32x4ToInt32x4Relaxed(
-+ FloatRegister src, FloatRegister dest) {
-+ unsignedTruncSatFloat32x4ToInt32x4(src, dest);
-+}
-+
-+void MacroAssembler::truncFloat64x2ToInt32x4Relaxed(FloatRegister src,
-+ FloatRegister dest) {
-+ truncSatFloat64x2ToInt32x4(src, dest, ScratchSimd128Reg);
-+}
-+
-+void MacroAssembler::unsignedTruncFloat64x2ToInt32x4Relaxed(
-+ FloatRegister src, FloatRegister dest) {
-+ unsignedTruncSatFloat64x2ToInt32x4(src, dest, ScratchSimd128Reg);
-+}
-+
-+// f64x2 → f32x4 (low 2 lanes; high lanes zero). Wasm `f32x4.demote_f64x2_zero`.
-+// xvcvdpsp converts both doubles in one shot, replicating each result across
-+// its dword: BE word lanes = [s(in.dw0), s(in.dw0), s(in.dw1), s(in.dw1)].
-+// On PPC64LE wasm storage (lxvx-loaded), input.dw_BE[0] = wasm lane 1 and
-+// input.dw_BE[1] = wasm lane 0, so we get [s(l1), s(l1), s(l0), s(l0)] in
-+// BE word order. We then zero dest and pack s(l1) into BE word 2 (wasm
-+// output lane 1) and s(l0) into BE word 3 (wasm output lane 0) via the
-+// shared PackTwoWordsToLowHalf helper, which has POWER9 and POWER8 paths.
-+//
-+// dest==src aliasing safe: src is consumed by xvcvdpsp before dest is zeroed.
-+void MacroAssembler::convertFloat64x2ToFloat32x4(FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ as_xvcvdpsp(scratch, src);
-+ ZeroSimd128(*this, dest);
-+ PackTwoWordsToLowHalf(*this, scratch, dest);
-+}
-+
-+// f32x4 (low 2 lanes) → f64x2. Wasm `f64x2.promote_low_f32x4`. xvcvspdp
-+// converts both BE word 0 and BE word 2 of its source to doubles in BE
-+// dwords 0 and 1 respectively. To get wasm lanes 0 and 1 (= input BE
-+// words 3 and 2) into those source positions, vmrglw merges low words:
-+// VRT.word[0] = VRA.word[2] = wasm lane 1, VRT.word[2] = VRA.word[3] =
-+// wasm lane 0 (with replicated copies in odd word slots that xvcvspdp
-+// ignores). Output BE dwords land as [double(lane1), double(lane0)],
-+// which on PPC64LE storage is exactly the wasm f64x2 output layout.
-+//
-+// dest==src aliasing safe: vmrglw consumes src into a separate scratch
-+// before dest is written.
-+//
-+// 2 insns, single ScratchSimd128 scope. All ops POWER7+.
-+void MacroAssembler::convertFloat32x4ToFloat64x2(FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ as_vmrglw(scratch.encoding() & 31, src.encoding() & 31, src.encoding() & 31);
-+ as_xvcvspdp(dest, scratch);
-+}
-+
-+void MacroAssembler::unsignedNarrowInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ // On LE, VMX pack swaps operand order vs Wasm convention.
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vpkshus), rhs, lhs, dest);
-+}
-+
-+void MacroAssembler::unsignedNarrowInt32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ // On LE, VMX pack swaps operand order vs Wasm convention.
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vpkswus), rhs, lhs, dest);
-+}
-+
-+void MacroAssembler::widenLowInt8x16(FloatRegister src, FloatRegister dest) {
-+ // On PPC64 LE, raw vupklsb unpacks the LOW Wasm lanes (not vupkhsb).
-+ // GCC vec_unpackh maps to vupklsb on LE (swapped from BE naming).
-+ // Raw vupklsb([1..8,-1..-8]) = [1,2,3,4,5,6,7,8].
-+ EmitVmxUnary(
-+ *this,
-+ [](Assembler& a, uint8_t vrt, uint8_t vrb) { a.as_vupklsb(vrt, vrb); },
-+ src, dest);
-+}
-+
-+void MacroAssembler::widenHighInt8x16(FloatRegister src, FloatRegister dest) {
-+ // On PPC64 LE, raw vupkhsb unpacks the HIGH Wasm lanes.
-+ EmitVmxUnary(
-+ *this,
-+ [](Assembler& a, uint8_t vrt, uint8_t vrb) { a.as_vupkhsb(vrt, vrb); },
-+ src, dest);
-+}
-+
-+void MacroAssembler::unsignedWidenLowInt8x16(FloatRegister src,
-+ FloatRegister dest) {
-+ zeroExtend8x16To16x8(src, dest);
-+}
-+
-+void MacroAssembler::unsignedWidenHighInt8x16(FloatRegister src,
-+ FloatRegister dest) {
-+ // vmrghb(zero, src) interleaves zero bytes with the BE-high half of src,
-+ // producing zero-extended halfwords of the LE-high (Wasm-high) lanes.
-+ ScratchSimd128Scope scratch(*this);
-+ as_xxlxor(scratch, scratch, scratch);
-+ as_vmrghb(dest.encoding() & 31, scratch.encoding() & 31, src.encoding() & 31);
-+}
-+
-+void MacroAssembler::widenLowInt16x8(FloatRegister src, FloatRegister dest) {
-+ // On PPC64 LE, raw vupklsh unpacks LOW Wasm lanes (GCC swaps h/l on LE).
-+ EmitVmxUnary(
-+ *this,
-+ [](Assembler& a, uint8_t vrt, uint8_t vrb) { a.as_vupklsh(vrt, vrb); },
-+ src, dest);
-+}
-+
-+void MacroAssembler::widenHighInt16x8(FloatRegister src, FloatRegister dest) {
-+ EmitVmxUnary(
-+ *this,
-+ [](Assembler& a, uint8_t vrt, uint8_t vrb) { a.as_vupkhsh(vrt, vrb); },
-+ src, dest);
-+}
-+
-+void MacroAssembler::unsignedWidenLowInt16x8(FloatRegister src,
-+ FloatRegister dest) {
-+ zeroExtend16x8To32x4(src, dest);
-+}
-+
-+void MacroAssembler::unsignedWidenHighInt16x8(FloatRegister src,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ as_xxlxor(scratch, scratch, scratch);
-+ as_vmrghh(dest.encoding() & 31, scratch.encoding() & 31, src.encoding() & 31);
-+}
-+
-+void MacroAssembler::widenLowInt32x4(FloatRegister src, FloatRegister dest) {
-+ // On PPC64 LE, raw vupklsw unpacks LOW Wasm lanes.
-+ EmitVmxUnary(
-+ *this,
-+ [](Assembler& a, uint8_t vrt, uint8_t vrb) { a.as_vupklsw(vrt, vrb); },
-+ src, dest);
-+}
-+
-+void MacroAssembler::unsignedWidenLowInt32x4(FloatRegister src,
-+ FloatRegister dest) {
-+ zeroExtend32x4To64x2(src, dest);
-+}
-+
-+void MacroAssembler::widenHighInt32x4(FloatRegister src, FloatRegister dest) {
-+ EmitVmxUnary(
-+ *this,
-+ [](Assembler& a, uint8_t vrt, uint8_t vrb) { a.as_vupkhsw(vrt, vrb); },
-+ src, dest);
-+}
-+
-+void MacroAssembler::unsignedWidenHighInt32x4(FloatRegister src,
-+ FloatRegister dest) {
-+ // i64x2.extend_high_i32x4_u: take high 2 i32 lanes of src, zero-extend
-+ // to i64 each. Use vmrghw to interleave a zero VR with src — same shape
-+ // as the (already-correct) unsignedWidenHighInt16x8 sibling above.
-+ ScratchSimd128Scope scratch(*this);
-+ ZeroSimd128(*this, scratch);
-+ as_vmrghw(dest.encoding() & 31, scratch.encoding() & 31, src.encoding() & 31);
-+}
-+
-+void MacroAssembler::pseudoMinFloat32x4(FloatRegister rhsOrRhsDest,
-+ FloatRegister lhsOrLhsDest) {
-+ // pmin: result[i] = rhs[i] < lhs[i] ? rhs[i] : lhs[i]
-+ // xvcmpgtsp(mask, lhs, rhs) → 1 where lhs > rhs (i.e., rhs < lhs)
-+ // xxsel: mask=1 → XB=rhs. mask=0 → XA=lhs.
-+ // Result goes to lhsOrLhsDest (second param).
-+ ScratchSimd128Scope scratch(*this);
-+ as_xvcmpgtsp(scratch, lhsOrLhsDest, rhsOrRhsDest);
-+ as_xxsel(lhsOrLhsDest, lhsOrLhsDest, rhsOrRhsDest, scratch);
-+}
-+
-+void MacroAssembler::pseudoMinFloat32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ // pmin(lhs, rhs) = rhs < lhs ? rhs : lhs
-+ // Inline to handle dest aliasing with either operand.
-+ ScratchSimd128Scope scratch(*this);
-+ as_xvcmpgtsp(scratch, lhs, rhs);
-+ // mask=1 where lhs > rhs. XC=1 → select XB=rhs. XC=0 → select XA=lhs.
-+ as_xxsel(dest, lhs, rhs, scratch);
-+}
-+
-+void MacroAssembler::pseudoMinFloat64x2(FloatRegister rhsOrRhsDest,
-+ FloatRegister lhsOrLhsDest) {
-+ ScratchSimd128Scope scratch(*this);
-+ as_xvcmpgtdp(scratch, lhsOrLhsDest, rhsOrRhsDest);
-+ as_xxsel(lhsOrLhsDest, lhsOrLhsDest, rhsOrRhsDest, scratch);
-+}
-+
-+void MacroAssembler::pseudoMinFloat64x2(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ as_xvcmpgtdp(scratch, lhs, rhs);
-+ as_xxsel(dest, lhs, rhs, scratch);
-+}
-+
-+void MacroAssembler::pseudoMaxFloat32x4(FloatRegister rhsOrRhsDest,
-+ FloatRegister lhsOrLhsDest) {
-+ ScratchSimd128Scope scratch(*this);
-+ as_xvcmpgtsp(scratch, rhsOrRhsDest, lhsOrLhsDest);
-+ as_xxsel(lhsOrLhsDest, lhsOrLhsDest, rhsOrRhsDest, scratch);
-+}
-+
-+void MacroAssembler::pseudoMaxFloat32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ // pmax(lhs, rhs) = lhs < rhs ? rhs : lhs
-+ ScratchSimd128Scope scratch(*this);
-+ as_xvcmpgtsp(scratch, rhs, lhs);
-+ // mask=1 where rhs > lhs (lhs < rhs). XC=1 → select XB=rhs. XC=0 → select
-+ // XA=lhs.
-+ as_xxsel(dest, lhs, rhs, scratch);
-+}
-+
-+void MacroAssembler::pseudoMaxFloat64x2(FloatRegister rhsOrRhsDest,
-+ FloatRegister lhsOrLhsDest) {
-+ ScratchSimd128Scope scratch(*this);
-+ as_xvcmpgtdp(scratch, rhsOrRhsDest, lhsOrLhsDest);
-+ as_xxsel(lhsOrLhsDest, lhsOrLhsDest, rhsOrRhsDest, scratch);
-+}
-+
-+void MacroAssembler::pseudoMaxFloat64x2(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ as_xvcmpgtdp(scratch, rhs, lhs);
-+ as_xxsel(dest, lhs, rhs, scratch);
-+}
-+
-+void MacroAssembler::dotInt8x16Int7x16(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ // result[k] = lhs[2k]*rhs[2k] + lhs[2k+1]*rhs[2k+1] for k=0..7.
-+ // vmulesb/vmulosb produce even/odd byte products as i16 in matching
-+ // halfword lanes; vadduhm sums them pairwise.
-+ ScratchSimd128Scope scratch(*this);
-+ uint8_t l = lhs.encoding() & 31;
-+ uint8_t r = rhs.encoding() & 31;
-+ uint8_t d = dest.encoding() & 31;
-+ uint8_t s = scratch.encoding() & 31;
-+ as_vmulesb(s, l, r);
-+ as_vmulosb(d, l, r);
-+ as_vadduhm(d, s, d);
-+}
-+
-+void MacroAssembler::ceilFloat32x4(FloatRegister src, FloatRegister dest) {
-+ as_xvrspip(dest, src);
-+}
-+
-+void MacroAssembler::ceilFloat64x2(FloatRegister src, FloatRegister dest) {
-+ as_xvrdpip(dest, src);
-+}
-+
-+void MacroAssembler::floorFloat32x4(FloatRegister src, FloatRegister dest) {
-+ as_xvrspim(dest, src);
-+}
-+
-+void MacroAssembler::floorFloat64x2(FloatRegister src, FloatRegister dest) {
-+ as_xvrdpim(dest, src);
-+}
-+
-+void MacroAssembler::truncFloat32x4(FloatRegister src, FloatRegister dest) {
-+ as_xvrspiz(dest, src);
-+}
-+
-+void MacroAssembler::truncFloat64x2(FloatRegister src, FloatRegister dest) {
-+ as_xvrdpiz(dest, src);
-+}
-+
-+void MacroAssembler::nearestFloat32x4(FloatRegister src, FloatRegister dest) {
-+ as_xvrspic(dest, src);
-+}
-+
-+void MacroAssembler::nearestFloat64x2(FloatRegister src, FloatRegister dest) {
-+ as_xvrdpic(dest, src);
-+}
-+
-+void MacroAssembler::fnmaFloat32x4(FloatRegister src1, FloatRegister src2,
-+ FloatRegister srcDest) {
-+ as_xvnmsubasp(srcDest, src1, src2);
-+}
-+
-+void MacroAssembler::fnmaFloat64x2(FloatRegister src1, FloatRegister src2,
-+ FloatRegister srcDest) {
-+ as_xvnmsubadp(srcDest, src1, src2);
-+}
-+
-+void MacroAssembler::minFloat32x4Relaxed(FloatRegister src,
-+ FloatRegister srcDest) {
-+ as_xvminsp(srcDest, srcDest, src);
-+}
-+
-+void MacroAssembler::minFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xvminsp(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::maxFloat32x4Relaxed(FloatRegister src,
-+ FloatRegister srcDest) {
-+ as_xvmaxsp(srcDest, srcDest, src);
-+}
-+
-+void MacroAssembler::maxFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xvmaxsp(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::minFloat64x2Relaxed(FloatRegister src,
-+ FloatRegister srcDest) {
-+ as_xvmindp(srcDest, srcDest, src);
-+}
-+
-+void MacroAssembler::minFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xvmindp(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::maxFloat64x2Relaxed(FloatRegister src,
-+ FloatRegister srcDest) {
-+ as_xvmaxdp(srcDest, srcDest, src);
-+}
-+
-+void MacroAssembler::maxFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xvmaxdp(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::q15MulrInt16x8Relaxed(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ q15MulrSatInt16x8(lhs, rhs, dest);
-+}
-+
-+// SIMD overloads accepting an extra FloatRegister temp (shared-header signature
-+// used by x86; on PPC64 the temp is unused for most of these).
-+void MacroAssembler::popcntInt8x16(FloatRegister src, FloatRegister dest,
-+ FloatRegister temp) {
-+ popcntInt8x16(src, dest);
-+}
-+
-+void MacroAssembler::unsignedTruncSatFloat32x4ToInt32x4(FloatRegister src,
-+ FloatRegister dest,
-+ FloatRegister temp) {
-+ unsignedTruncSatFloat32x4ToInt32x4(src, dest);
-+}
-+
-+void MacroAssembler::dotInt8x16Int7x16ThenAdd(FloatRegister lhs,
-+ FloatRegister rhs,
-+ FloatRegister dest,
-+ FloatRegister temp) {
-+ // dest += pairwise_widen_i16_to_i32(dot_i8x16(lhs, rhs)).
-+ //
-+ // Step 1: i16x8 dot of i8 byte pairs (vmulesb/vmulosb/vadduhm). Keeps
-+ // the existing signed-byte multiply semantics that match ARM64 sdot
-+ // and x86 vpdpbssd (vmsummbm would be signed×unsigned and diverge for
-+ // i7 lanes that bit-pattern as negative).
-+ //
-+ // Step 2: vmsumshm dest, dot, splat_hw(1), dest computes
-+ // dest.i32[k] = dest.i32[k] + dot.i16[2k]*1 + dot.i16[2k+1]*1
-+ // which is exactly pairwise widen + accumulate in a single insn.
-+ // splat_hw(1) is a single vspltish (5-bit SIMM splat to all 8 halfwords).
-+ ScratchSimd128Scope scratch(*this);
-+ uint8_t l = lhs.encoding() & 31;
-+ uint8_t r = rhs.encoding() & 31;
-+ uint8_t d = dest.encoding() & 31;
-+ uint8_t s = scratch.encoding() & 31;
-+ uint8_t t = temp.encoding() & 31;
-+
-+ as_vmulesb(s, l, r);
-+ as_vmulosb(t, l, r);
-+ as_vadduhm(t, s, t);
-+ as_vspltish(s, 1);
-+ as_vmsumshm(d, t, s, d);
-+}
-+
-+// SIMD ops ported from arm64- and x86/x64-shaped signatures.
-+
-+void MacroAssembler::permuteInt16x8(const uint16_t lanes[8], FloatRegister src,
-+ FloatRegister dest) {
-+ uint8_t shuffleLanes[16];
-+ for (unsigned i = 0; i < 8; i++) {
-+ shuffleLanes[i * 2] = lanes[i] * 2;
-+ shuffleLanes[i * 2 + 1] = lanes[i] * 2 + 1;
-+ }
-+ shuffleInt8x16(shuffleLanes, src, src, dest);
-+}
-+
-+void MacroAssembler::rotateRightSimd128(FloatRegister src, FloatRegister dest,
-+ uint32_t shift) {
-+ MOZ_ASSERT(shift < 16);
-+ if (shift == 0) {
-+ moveSimd128(src, dest);
-+ return;
-+ }
-+ // vsldoi VRT,VRA,VRB,SH: concatenate VRA||VRB, take bytes [SH..SH+15].
-+ // Rotate right by N = vsldoi(src, src, 16-N).
-+ as_vsldoi(dest, src, src, 16 - shift);
-+}
-+
-+void MacroAssembler::mulInt64x2(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest, FloatRegister temp1,
-+ FloatRegister temp2) {
-+ // POWER10 collapses the entire i64x2 multiply to a single vmulld.
-+ // POWER9/POWER8 fall back to the GPR round-trip path: extract each
-+ // lane pair into GPRs (mfvsrld for LE-dw0/Wasm-lane-0, mfvsrd for
-+ // LE-dw1/lane-1), multiply, and reassemble via mtvsrd + xxpermdi.
-+ if (HasPOWER10()) {
-+ as_vmulld(dest.encoding() & 31, lhs.encoding() & 31, rhs.encoding() & 31);
-+ return;
-+ }
-+ // Aliasing safety: stash the lane-0 product in ScratchSimd128 (which
-+ // is non-allocatable, so cannot alias lhs/rhs) and only write dest at
-+ // the very end, after both lhs and rhs have been fully consumed.
-+ ScratchSimd128Scope scratch(*this);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register a = temps.Acquire();
-+ Register b = temps.Acquire();
-+
-+ if (HasPOWER9()) {
-+ as_mfvsrld(a, lhs);
-+ as_mfvsrld(b, rhs);
-+ } else {
-+ as_xxpermdi(scratch, lhs, lhs, 2);
-+ as_mfvsrd(a, scratch);
-+ as_xxpermdi(scratch, rhs, rhs, 2);
-+ as_mfvsrd(b, scratch);
-+ }
-+ as_mulld(a, a, b);
-+ as_mtvsrd(scratch, a);
-+
-+ as_mfvsrd(a, lhs);
-+ as_mfvsrd(b, rhs);
-+ as_mulld(a, a, b);
-+ as_mtvsrd(dest, a);
-+ as_xxpermdi(dest, dest, scratch, 0);
-+}
-+
-+void MacroAssembler::bitwiseAndNotSimd128(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ // andnot(lhs, rhs) = lhs & ~rhs = xxlandc(lhs, rhs)
-+ as_xxlandc(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::bitwiseSelectSimd128(FloatRegister onTrue,
-+ FloatRegister onFalse,
-+ FloatRegister maskDest) {
-+ // result = (onTrue & mask) | (onFalse & ~mask)
-+ // xxsel: XC=0→XA, XC=1→XB → XT = (XA & ~XC) | (XB & XC)
-+ // Need XA=onFalse, XB=onTrue, XC=mask.
-+ as_xxsel(maskDest, onFalse, onTrue, maskDest);
-+}
-+
-+void MacroAssembler::popcntInt8x16(FloatRegister src, FloatRegister dest) {
-+ EmitVmxUnary(
-+ *this,
-+ [](Assembler& a, uint8_t vrt, uint8_t vrb) { a.as_vpopcntb(vrt, vrb); },
-+ src, dest);
-+}
-+
-+void MacroAssembler::bitmaskInt8x16(FloatRegister src, Register dest,
-+ FloatRegister temp) {
-+ if (HasPOWER10()) {
-+ // Single-instruction collapse on POWER10.
-+ as_vextractbm(dest, src);
-+ return;
-+ }
-+ // POWER8+ vbpermq-based bitmask: ctl[i] = (15-i)*8 produces the wasm-spec
-+ // bitmap (bit i = MSB of LE lane i) in dw0 low 16 bits.
-+ int8_t ctl[16] = {120, 112, 104, 96, 88, 80, 72, 64,
-+ 56, 48, 40, 32, 24, 16, 8, 0};
-+ loadConstantSimd128(SimdConstant::CreateX16(ctl), temp);
-+ as_vbpermq(temp.encoding() & 31, src.encoding() & 31, temp.encoding() & 31);
-+ as_mfvsrd(dest, temp);
-+}
-+
-+void MacroAssembler::bitmaskInt16x8(FloatRegister src, Register dest,
-+ FloatRegister temp) {
-+ if (HasPOWER10()) {
-+ as_vextracthm(dest, src);
-+ return;
-+ }
-+ // Same recipe as bitmaskInt8x16 but ctl picks halfword MSBs:
-+ // BE bit (14-2i)*8 for lane i, plus 8 ignore-bytes (high bit set).
-+ int8_t ctl[16] = {112, 96, 80, 64, 48, 32, 16, 0,
-+ -128, -128, -128, -128, -128, -128, -128, -128};
-+ loadConstantSimd128(SimdConstant::CreateX16(ctl), temp);
-+ as_vbpermq(temp.encoding() & 31, src.encoding() & 31, temp.encoding() & 31);
-+ as_mfvsrd(dest, temp);
-+}
-+
-+void MacroAssembler::bitmaskInt32x4(FloatRegister src, Register dest,
-+ FloatRegister temp) {
-+ if (HasPOWER10()) {
-+ as_vextractwm(dest, src);
-+ return;
-+ }
-+ // Same recipe as bitmaskInt8x16 but ctl picks word MSBs:
-+ // BE bit (12-4i)*8 for lane i, plus 12 ignore-bytes (high bit set).
-+ int8_t ctl[16] = {96, 64, 32, 0, -128, -128, -128, -128,
-+ -128, -128, -128, -128, -128, -128, -128, -128};
-+ loadConstantSimd128(SimdConstant::CreateX16(ctl), temp);
-+ as_vbpermq(temp.encoding() & 31, src.encoding() & 31, temp.encoding() & 31);
-+ as_mfvsrd(dest, temp);
-+}
-+
-+void MacroAssembler::bitmaskInt64x2(FloatRegister src, Register dest,
-+ FloatRegister temp) {
-+ if (HasPOWER10()) {
-+ as_vextractdm(dest, src);
-+ return;
-+ }
-+ // Same recipe as the other bitmask variants. ctl picks dword MSBs:
-+ // BE bit 64 for lane 0, BE bit 0 for lane 1, plus 14 ignore-bytes.
-+ int8_t ctl[16] = {64, 0, -128, -128, -128, -128, -128, -128,
-+ -128, -128, -128, -128, -128, -128, -128, -128};
-+ loadConstantSimd128(SimdConstant::CreateX16(ctl), temp);
-+ as_vbpermq(temp.encoding() & 31, src.encoding() & 31, temp.encoding() & 31);
-+ as_mfvsrd(dest, temp);
-+}
-+
-+void MacroAssembler::compareInt64x2(Assembler::Condition cond,
-+ FloatRegister rhs, FloatRegister lhsDest) {
-+ compareInt64x2(cond, lhsDest, rhs, lhsDest);
-+}
-+
-+void MacroAssembler::compareInt64x2(Assembler::Condition cond,
-+ FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxCompare(*this, cond, lhs, rhs, dest, VMX_BINARY_WRAPPER(vcmpequd),
-+ VMX_BINARY_WRAPPER(vcmpgtsd), VMX_BINARY_WRAPPER(vcmpgtud));
-+}
-+
-+void MacroAssembler::minFloat32x4(FloatRegister rhs, FloatRegister lhsDest) {
-+ minFloat32x4(lhsDest, rhs, lhsDest);
-+}
-+
-+void MacroAssembler::minFloat32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xvminsp(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::minFloat32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest, FloatRegister temp1,
-+ FloatRegister temp2) {
-+ // Wasm min with NaN propagation.
-+ // Detect NaN in either operand (not via add which falsely flags inf+(-inf)).
-+ // Compute mask and add BEFORE min (min may clobber lhs via dest aliasing).
-+ as_xvcmpeqsp(temp1, lhs, lhs);
-+ as_xvcmpeqsp(temp2, rhs, rhs);
-+ as_xxland(temp1, temp1, temp2);
-+ as_xvaddsp(temp2, lhs, rhs);
-+ as_xvminsp(dest, lhs, rhs);
-+ as_xxsel(dest, temp2, dest, temp1);
-+}
-+
-+void MacroAssembler::minFloat64x2(FloatRegister rhs, FloatRegister lhsDest) {
-+ minFloat64x2(lhsDest, rhs, lhsDest);
-+}
-+
-+void MacroAssembler::minFloat64x2(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xvmindp(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::minFloat64x2(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest, FloatRegister temp1,
-+ FloatRegister temp2) {
-+ // NaN mask and add must be computed BEFORE min (which may clobber lhs via
-+ // dest).
-+ as_xvcmpeqdp(temp1, lhs, lhs);
-+ as_xvcmpeqdp(temp2, rhs, rhs);
-+ as_xxland(temp1, temp1, temp2); // temp1 = ~0 when both non-NaN
-+ as_xvadddp(temp2, lhs, rhs); // temp2 = add (NaN source)
-+ as_xvmindp(dest, lhs, rhs); // dest = min (may clobber lhs)
-+ as_xxsel(dest, temp2, dest, temp1);
-+}
-+
-+void MacroAssembler::maxFloat32x4(FloatRegister rhs, FloatRegister lhsDest) {
-+ maxFloat32x4(lhsDest, rhs, lhsDest);
-+}
-+
-+void MacroAssembler::maxFloat32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xvmaxsp(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::maxFloat32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest, FloatRegister temp1,
-+ FloatRegister temp2) {
-+ // Wasm max with NaN propagation, using temp registers.
-+ as_xvcmpeqsp(temp1, lhs, lhs);
-+ as_xvcmpeqsp(temp2, rhs, rhs);
-+ as_xxland(temp1, temp1, temp2);
-+ as_xvaddsp(temp2, lhs, rhs);
-+ as_xvmaxsp(dest, lhs, rhs);
-+ as_xxsel(dest, temp2, dest, temp1);
-+}
-+
-+void MacroAssembler::maxFloat64x2(FloatRegister rhs, FloatRegister lhsDest) {
-+ maxFloat64x2(lhsDest, rhs, lhsDest);
-+}
-+
-+void MacroAssembler::maxFloat64x2(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xvmaxdp(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::maxFloat64x2(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest, FloatRegister temp1,
-+ FloatRegister temp2) {
-+ as_xvcmpeqdp(temp1, lhs, lhs);
-+ as_xvcmpeqdp(temp2, rhs, rhs);
-+ as_xxland(temp1, temp1, temp2);
-+ as_xvadddp(temp2, lhs, rhs);
-+ as_xvmaxdp(dest, lhs, rhs);
-+ as_xxsel(dest, temp2, dest, temp1);
-+}
-+
-+void MacroAssembler::unsignedTruncSatFloat32x4ToInt32x4(FloatRegister src,
-+ FloatRegister dest) {
-+ as_xvcvspuxws(dest, src);
-+}
-+
-+void MacroAssembler::extractLaneInt64x2(uint32_t lane, FloatRegister src,
-+ Register64 dest) {
-+ MOZ_ASSERT(lane < 2);
-+ if (lane == 1) {
-+ // Lane 1 = BE dword 0 = register bits[0:63].
-+ as_mfvsrd(dest.reg, src);
-+ } else {
-+ // Lane 0 = BE dword 1.
-+ if (HasPOWER9()) {
-+ as_mfvsrld(dest.reg, src);
-+ } else {
-+ ScratchSimd128Scope scratch(*this);
-+ as_xxpermdi(scratch, src, src, 2);
-+ as_mfvsrd(dest.reg, scratch);
-+ }
-+ }
-+}
-+
-+void MacroAssembler::replaceLaneInt64x2(unsigned lane, Register64 rhs,
-+ FloatRegister lhsDest) {
-+ MOZ_ASSERT(lane < 2);
-+ if (HasPOWER10()) {
-+ // 1 insn, no scratch VSR. UIM byte offset: lane 0 → 8, lane 1 → 0.
-+ as_vinsd(lhsDest, rhs.reg, (1 - lane) * 8);
-+ return;
-+ }
-+ ScratchSimd128Scope scratch(*this);
-+ as_mtvsrd(scratch, rhs.reg);
-+ if (lane == 0) {
-+ // Replace dw1 (LE low = lane 0). Keep dw0 (lane 1).
-+ // dm=0b00: [lhsDest.dw0, scratch.dw0]
-+ as_xxpermdi(lhsDest, lhsDest, scratch, 0);
-+ } else {
-+ // Replace dw0 (LE high = lane 1). Keep dw1 (lane 0).
-+ // dm=0b01: [scratch.dw0, lhsDest.dw1]
-+ as_xxpermdi(lhsDest, scratch, lhsDest, 1);
-+ }
-+}
-+
-+// SIMD 3-operand arithmetic (x86_shared-style signatures).
-+
-+void MacroAssembler::addFloat32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xvaddsp(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::addFloat64x2(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xvadddp(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::addInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vadduhm), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::addInt8x16(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vaddubm), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::divFloat32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xvdivsp(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::extractLaneInt16x8(uint32_t lane, FloatRegister src,
-+ Register dest) {
-+ MOZ_ASSERT(lane < 8);
-+ if (HasPOWER9()) {
-+ as_vextractuh(ScratchSimd128Reg, src, 14 - 2 * lane);
-+ as_mfvsrd(dest, ScratchSimd128Reg);
-+ as_extsh(dest, dest);
-+ return;
-+ }
-+ ExtractLaneToGPR(*this, lane, src, dest, 2, 16);
-+ as_extsh(dest, dest);
-+}
-+
-+void MacroAssembler::extractLaneInt32x4(uint32_t lane, FloatRegister src,
-+ Register dest) {
-+ MOZ_ASSERT(lane < 4);
-+ ExtractLaneToGPR(*this, lane, src, dest, 4, 32);
-+ // ExtractLaneToGPR leaves the adjacent lane in the high 32 bits for the
-+ // unshifted lanes (0 and 2); canonicalize to a sign-extended i32, as the
-+ // i8x16/i16x8 extracts do with extsb/extsh. A consumer that reads the full
-+ // 64-bit register -- e.g. the POWER8 i32.ctz emulation, whose 64-bit neg/and.
-+ // with a 32-bit cntlzw otherwise mis-handles a zero low word over nonzero
-+ // high garbage and returns -1 -- requires this.
-+ as_extsw(dest, dest);
-+}
-+
-+void MacroAssembler::extractLaneInt8x16(uint32_t lane, FloatRegister src,
-+ Register dest) {
-+ MOZ_ASSERT(lane < 16);
-+ if (HasPOWER9()) {
-+ as_vextractub(ScratchSimd128Reg, src, 15 - lane);
-+ as_mfvsrd(dest, ScratchSimd128Reg);
-+ as_extsb(dest, dest);
-+ return;
-+ }
-+ ExtractLaneToGPR(*this, lane, src, dest, 1, 8);
-+ as_extsb(dest, dest);
-+}
-+
-+void MacroAssembler::maxInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vmaxsh), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::maxInt32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vmaxsw), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::maxInt8x16(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vmaxsb), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::minInt8x16(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vminsb), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::mulInt32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vmuluwm), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::narrowInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ // On LE, VMX pack swaps operand order vs Wasm convention.
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vpkshss), rhs, lhs, dest);
-+}
-+
-+void MacroAssembler::splatX2(Register64 src, FloatRegister dest) {
-+ if (HasPOWER9()) {
-+ as_mtvsrdd(dest, src.reg, src.reg);
-+ } else {
-+ as_mtvsrd(dest, src.reg);
-+ as_xxpermdi(dest, dest, dest, 0);
-+ }
-+}
-+
-+void MacroAssembler::subInt32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsubuwm), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::swizzleInt8x16(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ // Wasm i8x16.swizzle: result[i] = (rhs[i] < 16) ? lhs[rhs[i]] : 0.
-+ //
-+ // Strategy: build ctrl in ScratchSimd128 (which can't alias inputs
-+ // because v0 is non-allocatable). Use vsububs(splat(15), rhs) to
-+ // produce ctrl = max(0, 15 - rhs); the saturation clamps out-of-range
-+ // indices to 0, and those positions get masked off below.
-+ //
-+ // The mask is computed via vcmpgtub(rhs, splat(15)) + xxlnor — 0xFF
-+ // where rhs <= 15. Reformulating "rhs < 16" as "!(rhs > 15)" lets us
-+ // use vspltisb with a 5-bit signed immediate (P7+, 1 insn, no GPR
-+ // scratch) for both splat-of-15 sites, replacing the previous
-+ // movePtr(0x0F0F0F0F)/movePtr(0x10101010) + splatX4 dance.
-+ //
-+ // Aliasing: dest may equal lhs (wasm baseline calls swizzleInt8x16(
-+ // rsd, rs, rsd); Ion's useRegisterAtStart permits the same). When
-+ // dest != rhs, ctrl can be built in scratch and the mask computed
-+ // after the permute (rhs is still alive). When dest == rhs, the
-+ // permute would clobber rhs before we could compute the mask, so the
-+ // mask goes to the red zone first.
-+ ScratchSimd128Scope scratch(*this);
-+ uint8_t s = scratch.encoding() & 31;
-+ uint8_t l = lhs.encoding() & 31;
-+ uint8_t r = rhs.encoding() & 31;
-+ uint8_t d = dest.encoding() & 31;
-+
-+ if (dest != rhs) {
-+ as_vspltisb(s, 15);
-+ as_vsububs(s, s, r); // scratch = ctrl
-+ as_vperm(d, l, l, s); // dest = vperm(lhs, lhs, ctrl)
-+ as_vspltisb(s, 15);
-+ as_vcmpgtub(s, r, s); // scratch = 0xFF where rhs > 15
-+ as_xxlandc(dest, dest, scratch); // dest &= ~scratch (= bytes-to-keep)
-+ return;
-+ }
-+
-+ // dest == rhs: vperm clobbers rhs, so build the bytes-to-zero mask first
-+ // and stash it. The xxlandc at the end consumes the un-inverted form.
-+ as_vspltisb(s, 15);
-+ as_vcmpgtub(s, r, s); // scratch = 0xFF where rhs > 15
-+ RedZoneStashSimd128(*this, scratch, 0);
-+ as_vspltisb(s, 15);
-+ as_vsububs(s, s, r); // scratch = ctrl
-+ as_vperm(d, l, l, s); // dest = vperm(lhs, lhs, ctrl)
-+ RedZoneRestoreSimd128(*this, 0, scratch);
-+ as_xxlandc(dest, dest, scratch); // dest &= ~scratch (= bytes-to-keep)
-+}
-+// SIMD 3-operand arithmetic (continued).
-+
-+void MacroAssembler::addInt32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vadduwm), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::addInt64x2(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vaddudm), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::addSatInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vaddshs), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::addSatInt8x16(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vaddsbs), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::divFloat64x2(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xvdivdp(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::minInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vminsh), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::minInt32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vminsw), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::mulFloat32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xvmulsp(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::mulFloat64x2(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xvmuldp(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::mulInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ ZeroSimd128(*this, scratch);
-+ EmitVmxTernary(
-+ *this,
-+ [](Assembler& a, uint8_t vrt, uint8_t vra, uint8_t vrb, uint8_t vrc) {
-+ a.as_vmladduhm(vrt, vra, vrb, vrc);
-+ },
-+ lhs, rhs, scratch, dest);
-+}
-+
-+void MacroAssembler::narrowInt32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vpkswss), rhs, lhs, dest);
-+}
-+
-+void MacroAssembler::subFloat32x4(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xvsubsp(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::subFloat64x2(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ as_xvsubdp(dest, lhs, rhs);
-+}
-+
-+void MacroAssembler::subInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsubuhm), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::subInt64x2(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsubudm), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::subInt8x16(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsububm), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::subSatInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsubshs), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::subSatInt8x16(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsubsbs), lhs, rhs, dest);
-+}
-+
-+void MacroAssembler::widenDotInt16x8(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister dest) {
-+ // i32x4.dot_i16x8_s: result[k] = lhs[2k]*rhs[2k] + lhs[2k+1]*rhs[2k+1].
-+ // vmsumshm computes exactly that for each i32 lane plus an addend (VRC).
-+ // With VRC = 0, the addend disappears and we get the wasm spec result in
-+ // a single instruction. xxlxor zeros the scratch in 1 insn, so total is
-+ // 2 insns vs the old vmulesh/vmulosh/vadduwm trio.
-+ ScratchSimd128Scope scratch(*this);
-+ as_xxlxor(scratch, scratch, scratch);
-+ as_vmsumshm(dest.encoding() & 31, lhs.encoding() & 31, rhs.encoding() & 31,
-+ scratch.encoding() & 31);
-+}
-+
-+// SIMD variable-shift and FMA helpers.
-+// Pattern: splat the GPR shift count across all lanes of a scratch VSR,
-+// then issue a vector-shift on lhs and the splat. vsl{b,h} / vsr{b,h} /
-+// vsra{b,h} use the low 3-or-4 bits of each lane's shift count, exactly
-+// matching wasm modulo-N shift semantics.
-+
-+void MacroAssembler::leftShiftInt8x16(FloatRegister lhs, Register rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ splatX16(rhs, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vslb), lhs, scratch, dest);
-+}
-+
-+void MacroAssembler::rightShiftInt8x16(FloatRegister lhs, Register rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ splatX16(rhs, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsrab), lhs, scratch, dest);
-+}
-+
-+void MacroAssembler::unsignedRightShiftInt8x16(FloatRegister lhs, Register rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ splatX16(rhs, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsrb), lhs, scratch, dest);
-+}
-+
-+void MacroAssembler::leftShiftInt16x8(FloatRegister lhs, Register rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ splatX8(rhs, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vslh), lhs, scratch, dest);
-+}
-+
-+void MacroAssembler::rightShiftInt16x8(FloatRegister lhs, Register rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ splatX8(rhs, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsrah), lhs, scratch, dest);
-+}
-+
-+void MacroAssembler::unsignedRightShiftInt16x8(FloatRegister lhs, Register rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ splatX8(rhs, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsrh), lhs, scratch, dest);
-+}
-+
-+void MacroAssembler::leftShiftInt32x4(FloatRegister lhs, Register rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ splatX4(rhs, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vslw), lhs, scratch, dest);
-+}
-+
-+void MacroAssembler::leftShiftInt64x2(FloatRegister lhs, Register rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ splatX4(rhs, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsld), lhs, scratch, dest);
-+}
-+
-+void MacroAssembler::rightShiftInt32x4(FloatRegister lhs, Register rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ splatX4(rhs, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsraw), lhs, scratch, dest);
-+}
-+
-+void MacroAssembler::rightShiftInt64x2(FloatRegister lhs, Register rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ splatX4(rhs, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsrad), lhs, scratch, dest);
-+}
-+
-+void MacroAssembler::unsignedRightShiftInt32x4(FloatRegister lhs, Register rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ splatX4(rhs, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsrw), lhs, scratch, dest);
-+}
-+
-+void MacroAssembler::unsignedRightShiftInt64x2(FloatRegister lhs, Register rhs,
-+ FloatRegister dest) {
-+ ScratchSimd128Scope scratch(*this);
-+ splatX4(rhs, scratch);
-+ EmitVmxBinary(*this, VMX_BINARY_WRAPPER(vsrd), lhs, scratch, dest);
-+}
-+
-+void MacroAssembler::fmaFloat32x4(FloatRegister src1, FloatRegister src2,
-+ FloatRegister srcDest) {
-+ as_xvmaddasp(srcDest, src1, src2);
-+}
-+
-+void MacroAssembler::fmaFloat64x2(FloatRegister src1, FloatRegister src2,
-+ FloatRegister srcDest) {
-+ as_xvmaddadp(srcDest, src1, src2);
-+}
-+
-+//}}} check_macroassembler_style
-+
-+} // namespace jit
-+} // namespace js
-+
-+#endif /* jit_ppc64_MacroAssembler_ppc64_inl_h */
-diff --git a/js/src/jit/ppc64/MacroAssembler-ppc64.cpp b/js/src/jit/ppc64/MacroAssembler-ppc64.cpp
-new file mode 100644
-index 000000000000..702fb3cd4cba
---- /dev/null
-+++ b/js/src/jit/ppc64/MacroAssembler-ppc64.cpp
-@@ -0,0 +1,3467 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#include "jit/ppc64/MacroAssembler-ppc64.h"
-+
-+#include "jit/Bailouts.h"
-+#include "jit/BaselineFrame.h"
-+#include "jit/FlushICache.h"
-+#include "jit/JitFrames.h"
-+#include "jit/JitRuntime.h"
-+#include "jit/MacroAssembler.h"
-+#include "jit/MoveEmitter.h"
-+#include "jit/ppc64/SharedICRegisters-ppc64.h"
-+#include "vm/JitActivation.h"
-+#include "vm/JSContext.h"
-+#include "wasm/WasmStubs.h"
-+
-+#include "jit/MacroAssembler-inl.h"
-+
-+namespace js {
-+namespace jit {
-+
-+MacroAssembler& MacroAssemblerPPC64::asMasm() {
-+ return *static_cast<MacroAssembler*>(this);
-+}
-+
-+const MacroAssembler& MacroAssemblerPPC64::asMasm() const {
-+ return *static_cast<const MacroAssembler*>(this);
-+}
-+
-+// ===============================================================
-+// Out-of-line fake exit frame
-+
-+bool MacroAssemblerPPC64Compat::buildOOLFakeExitFrame(void* fakeReturnAddr) {
-+ asMasm().Push(FrameDescriptor(FrameType::IonJS));
-+ asMasm().Push(ImmPtr(fakeReturnAddr));
-+ asMasm().Push(FramePointer);
-+ return true;
-+}
-+
-+// ===============================================================
-+// Load int32 or double from memory
-+
-+void MacroAssemblerPPC64Compat::loadInt32OrDouble(const Address& src,
-+ FloatRegister dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ Label end;
-+
-+ // Load the boxed value and stash in the FPR immediately, then reuse the
-+ // GPR for the tag test. Only one scratch GPR is held here so that
-+ // branchTestInt32 can acquire the second one for the ImmTag constant.
-+ loadPtr(Address(src.base, src.offset), scratch);
-+ as_mtvsrd(dest, scratch);
-+ x_srdi(scratch, scratch, JSVAL_TAG_SHIFT);
-+ asMasm().branchTestInt32(Assembler::NotEqual, scratch, &end);
-+ // It was an int32. Recover the boxed value from the FPR, sign-extend
-+ // the low 32 bits, and convert to double.
-+ as_mfvsrd(scratch, dest);
-+ as_extsw(scratch, scratch);
-+ as_mtvsrd(dest, scratch);
-+ as_fcfid(dest, dest);
-+
-+ bind(&end);
-+}
-+
-+void MacroAssemblerPPC64Compat::loadInt32OrDouble(const BaseIndex& addr,
-+ FloatRegister dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ Label end;
-+
-+ computeScaledAddress(addr, scratch);
-+ loadPtr(Address(scratch, addr.offset), scratch);
-+ as_mtvsrd(dest, scratch);
-+ x_srdi(scratch, scratch, JSVAL_TAG_SHIFT);
-+ asMasm().branchTestInt32(Assembler::NotEqual, scratch, &end);
-+ as_mfvsrd(scratch, dest);
-+ as_extsw(scratch, scratch);
-+ as_mtvsrd(dest, scratch);
-+ as_fcfid(dest, dest);
-+
-+ bind(&end);
-+}
-+
-+// ===============================================================
-+// Conversion functions
-+
-+void MacroAssemblerPPC64Compat::convertUInt32ToDouble(Register src,
-+ FloatRegister dest) {
-+ // mtvsrwz: VSR[dest].dw0 = zero_ext_64(src[32:63]); P8+ (ISA 2.07).
-+ // Replaces rldicl + mtvsrd (2 insns + scratch) with 1 insn.
-+ as_mtvsrwz(dest, src);
-+ as_fcfid(dest, dest);
-+}
-+
-+void MacroAssemblerPPC64Compat::convertUInt32ToFloat32(Register src,
-+ FloatRegister dest) {
-+ // mtvsrwz + fcfids; same recipe as convertUInt32ToDouble.
-+ as_mtvsrwz(dest, src);
-+ as_fcfids(dest, dest);
-+}
-+
-+// Helper for the negative-zero check after a successful round-trip.
-+// Precondition: `dest` holds the integer round-trip result; if it equals
-+// zero, then `src` was either +0.0 or -0.0 (those are the only doubles
-+// that round-trip to int 0). Distinguish them by inspecting src's sign
-+// bit: -0.0 has its MSB set, so an mfvsrd-then-signed-cmp-against-zero
-+// branches to `fail` only for -0.0. Non-zero `dest` values (including
-+// every negative integer) skip the check entirely.
-+static void EmitNegativeZeroCheck(MacroAssemblerPPC64Compat& masm,
-+ FloatRegister src, Register dest,
-+ Label* fail) {
-+ Label notZero;
-+ masm.as_cmpdi(dest, 0);
-+ masm.ma_b(Assembler::NotEqual, ¬Zero);
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.as_mfvsrd(scratch, src);
-+ masm.as_cmpdi(scratch, 0);
-+ masm.ma_b(Assembler::LessThan, fail);
-+ masm.bind(¬Zero);
-+}
-+
-+void MacroAssemblerPPC64Compat::convertDoubleToInt32(FloatRegister src,
-+ Register dest, Label* fail,
-+ bool negativeZeroCheck) {
-+ // Truncate to int32 (round toward zero), sign-extend, and verify
-+ // exactness via round-trip compare. fctiwz writes the int32 to BE
-+ // bits 32..63 of the FPR; mfvsrd extracts and extsw sign-extends.
-+ // The compare also catches NaN (unordered) and Inf (saturated to
-+ // INT32_{MIN,MAX}, won't round-trip equal).
-+ as_fctiwz(ScratchDoubleReg, src);
-+ as_mfvsrd(dest, ScratchDoubleReg);
-+ as_extsw(dest, dest);
-+ as_mtvsrd(ScratchDoubleReg, dest);
-+ as_fcfid(ScratchDoubleReg, ScratchDoubleReg);
-+ as_fcmpu(ScratchDoubleReg, src);
-+ ma_b(Assembler::DoubleNotEqualOrUnordered, fail);
-+
-+ if (negativeZeroCheck) {
-+ EmitNegativeZeroCheck(*this, src, dest, fail);
-+ }
-+}
-+
-+void MacroAssemblerPPC64Compat::convertDoubleToPtr(FloatRegister src,
-+ Register dest, Label* fail,
-+ bool negativeZeroCheck) {
-+ // Same pattern as convertDoubleToInt32 but to int64 (no sign-extend
-+ // needed since fctidz already produces a 64-bit result).
-+ as_fctidz(ScratchDoubleReg, src);
-+ as_mfvsrd(dest, ScratchDoubleReg);
-+ as_mtvsrd(ScratchDoubleReg, dest);
-+ as_fcfid(ScratchDoubleReg, ScratchDoubleReg);
-+ as_fcmpu(ScratchDoubleReg, src);
-+ ma_b(Assembler::DoubleNotEqualOrUnordered, fail);
-+
-+ if (negativeZeroCheck) {
-+ EmitNegativeZeroCheck(*this, src, dest, fail);
-+ }
-+}
-+
-+void MacroAssemblerPPC64Compat::convertFloat32ToInt32(FloatRegister src,
-+ Register dest,
-+ Label* fail,
-+ bool negativeZeroCheck) {
-+ // Same as convertDoubleToInt32 but the round-trip uses fcfids so the
-+ // comparison happens at single precision (matches src's actual width).
-+ as_fctiwz(ScratchDoubleReg, src);
-+ as_mfvsrd(dest, ScratchDoubleReg);
-+ as_extsw(dest, dest);
-+ as_mtvsrd(ScratchDoubleReg, dest);
-+ as_fcfids(ScratchDoubleReg, ScratchDoubleReg);
-+ as_fcmpu(ScratchDoubleReg, src);
-+ ma_b(Assembler::DoubleNotEqualOrUnordered, fail);
-+
-+ if (negativeZeroCheck) {
-+ EmitNegativeZeroCheck(*this, src, dest, fail);
-+ }
-+}
-+
-+CodeOffset MacroAssemblerPPC64Compat::toggledCall(JitCode* target,
-+ bool enabled) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ // stanza(8) + mtctr/bctrl(2) = 10 instructions.
-+ m_buffer.enterNoPool(kNoPoolPatchableBranchInsns);
-+ BufferOffset boLoad =
-+ emitLoad64Stanza(scratch, (uint64_t)uintptr_t(target->raw()));
-+ CodeOffset offset(boLoad.getOffset());
-+ addPendingJump(boLoad, ImmPtr(target->raw()), RelocationKind::JITCODE);
-+ if (enabled) {
-+ xs_mtctr(scratch);
-+ as_bctr(LinkBit::LinkB);
-+ } else {
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ }
-+ m_buffer.leaveNoPool();
-+ MOZ_ASSERT_IF(!oom(), nextOffset().getOffset() - offset.offset() ==
-+ ToggledCallSize(nullptr));
-+ return offset;
-+}
-+
-+// ===============================================================
-+// Exception handling
-+
-+void MacroAssemblerPPC64Compat::handleFailureWithHandlerTail(
-+ Label* profilerExitTail, Label* bailoutTail,
-+ uint32_t* returnValueCheckOffset) {
-+ // Round sizeof(ResumeFromException) up to ABIStackAlignment. The
-+ // canonical (sz + align - 1) & ~(align - 1) form is exact: when sz
-+ // is already a multiple of `align` the rounding is a no-op. The
-+ // previous (sz + align) & ~(align - 1) over-allocated by `align`
-+ // bytes whenever sz was already aligned.
-+ int size = (sizeof(ResumeFromException) + ABIStackAlignment - 1) &
-+ ~(ABIStackAlignment - 1);
-+ asMasm().subPtr(Imm32(size), StackPointer);
-+ // Use r3 (first argument register).
-+ mov(StackPointer, r3);
-+
-+ using Fn = void (*)(ResumeFromException* rfe);
-+ asMasm().setupUnalignedABICall(r4);
-+ asMasm().passABIArg(r3);
-+ asMasm().callWithABI<Fn, HandleException>(
-+ ABIType::General, CheckUnsafeCallWithABI::DontCheckHasExitFrame);
-+
-+ *returnValueCheckOffset = asMasm().currentOffset();
-+
-+ Label entryFrame;
-+ Label catch_;
-+ Label finally;
-+ Label returnBaseline;
-+ Label returnIon;
-+ Label bailout;
-+ Label wasmInterpEntry;
-+ Label wasmCatch;
-+
-+ load32(Address(StackPointer, ResumeFromException::offsetOfKind()), r3);
-+ asMasm().branch32(Assembler::Equal, r3,
-+ Imm32(ExceptionResumeKind::EntryFrame), &entryFrame);
-+ asMasm().branch32(Assembler::Equal, r3, Imm32(ExceptionResumeKind::Catch),
-+ &catch_);
-+ asMasm().branch32(Assembler::Equal, r3, Imm32(ExceptionResumeKind::Finally),
-+ &finally);
-+ asMasm().branch32(Assembler::Equal, r3,
-+ Imm32(ExceptionResumeKind::ForcedReturnBaseline),
-+ &returnBaseline);
-+ asMasm().branch32(Assembler::Equal, r3,
-+ Imm32(ExceptionResumeKind::ForcedReturnIon), &returnIon);
-+ asMasm().branch32(Assembler::Equal, r3, Imm32(ExceptionResumeKind::Bailout),
-+ &bailout);
-+ asMasm().branch32(Assembler::Equal, r3,
-+ Imm32(ExceptionResumeKind::WasmInterpEntry),
-+ &wasmInterpEntry);
-+ asMasm().branch32(Assembler::Equal, r3, Imm32(ExceptionResumeKind::WasmCatch),
-+ &wasmCatch);
-+
-+ breakpoint(); // Invalid kind.
-+
-+ // No exception handler. Return error from entry frame.
-+ bind(&entryFrame);
-+ asMasm().moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand);
-+ loadPtr(Address(StackPointer, ResumeFromException::offsetOfFramePointer()),
-+ FramePointer);
-+ loadPtr(Address(StackPointer, ResumeFromException::offsetOfStackPointer()),
-+ StackPointer);
-+ ret();
-+
-+ // Catch handler.
-+ bind(&catch_);
-+ loadPtr(Address(StackPointer, ResumeFromException::offsetOfTarget()), r3);
-+ loadPtr(Address(StackPointer, ResumeFromException::offsetOfFramePointer()),
-+ FramePointer);
-+ loadPtr(Address(StackPointer, ResumeFromException::offsetOfStackPointer()),
-+ StackPointer);
-+ jump(r3);
-+
-+ // Finally block.
-+ bind(&finally);
-+ ValueOperand exception = ValueOperand(r4);
-+ loadValue(Address(StackPointer, ResumeFromException::offsetOfException()),
-+ exception);
-+
-+ ValueOperand exceptionStack = ValueOperand(r5);
-+ loadValue(
-+ Address(StackPointer, ResumeFromException::offsetOfExceptionStack()),
-+ exceptionStack);
-+
-+ loadPtr(Address(StackPointer, ResumeFromException::offsetOfTarget()), r3);
-+ loadPtr(Address(StackPointer, ResumeFromException::offsetOfFramePointer()),
-+ FramePointer);
-+ loadPtr(Address(StackPointer, ResumeFromException::offsetOfStackPointer()),
-+ StackPointer);
-+
-+ pushValue(exception);
-+ pushValue(exceptionStack);
-+ pushValue(BooleanValue(true));
-+ jump(r3);
-+
-+ // Forced return from baseline.
-+ Label profilingInstrumentation;
-+ bind(&returnBaseline);
-+ loadPtr(Address(StackPointer, ResumeFromException::offsetOfFramePointer()),
-+ FramePointer);
-+ loadPtr(Address(StackPointer, ResumeFromException::offsetOfStackPointer()),
-+ StackPointer);
-+ loadValue(Address(FramePointer, BaselineFrame::reverseOffsetOfReturnValue()),
-+ JSReturnOperand);
-+ jump(&profilingInstrumentation);
-+
-+ // Forced return from Ion.
-+ bind(&returnIon);
-+ loadValue(Address(StackPointer, ResumeFromException::offsetOfException()),
-+ JSReturnOperand);
-+ loadPtr(Address(StackPointer, ResumeFromException::offsetOfFramePointer()),
-+ FramePointer);
-+ loadPtr(Address(StackPointer, ResumeFromException::offsetOfStackPointer()),
-+ StackPointer);
-+
-+ bind(&profilingInstrumentation);
-+ {
-+ Label skipProfilingInstrumentation;
-+ AbsoluteAddress addressOfEnabled(
-+ asMasm().runtime()->geckoProfiler().addressOfEnabled());
-+ asMasm().branch32(Assembler::Equal, addressOfEnabled, Imm32(0),
-+ &skipProfilingInstrumentation);
-+ jump(profilerExitTail);
-+ bind(&skipProfilingInstrumentation);
-+ }
-+
-+ xs_mr(StackPointer, FramePointer);
-+ // Pop FP from stack, then return (pop LR + blr).
-+ loadPtr(Address(StackPointer, 0), FramePointer);
-+ asMasm().addPtr(Imm32(sizeof(void*)), StackPointer);
-+ ret();
-+
-+ // Bailout.
-+ bind(&bailout);
-+ loadPtr(Address(StackPointer, ResumeFromException::offsetOfBailoutInfo()),
-+ r5);
-+ loadPtr(Address(StackPointer, ResumeFromException::offsetOfStackPointer()),
-+ StackPointer);
-+ xs_li(ReturnReg, 1);
-+ jump(bailoutTail);
-+
-+ // Wasm interp entry.
-+ bind(&wasmInterpEntry);
-+ loadPtr(Address(StackPointer, ResumeFromException::offsetOfFramePointer()),
-+ FramePointer);
-+ loadPtr(Address(StackPointer, ResumeFromException::offsetOfStackPointer()),
-+ StackPointer);
-+ movePtr(ImmWord(wasm::InterpFailInstanceReg), InstanceReg);
-+ ret();
-+
-+ // Wasm catch.
-+ bind(&wasmCatch);
-+ wasm::GenerateJumpToCatchHandler(asMasm(), StackPointer, r4, r5, r6);
-+}
-+
-+void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) {
-+ ScratchDoubleScope fpscratch(asMasm());
-+
-+ if (HasPOWER9()) {
-+ // P9 xsmaxjdp uses Java/JS semantics (ISA v3.0B): any NaN
-+ // is treated as "less than any number that is not a NaN", so
-+ // xsmaxjdp(input, 0) collapses {NaN, -Inf, ≤ 0} to 0 in one insn —
-+ // the "≤ 0 or NaN → 0" branch dance disappears.
-+ //
-+ // After the max, fctid (round-to-nearest-even per FPSCR default,
-+ // matches ECMA Uint8ClampedArray's round-half-to-even) saturates
-+ // out-of-int64 values to INT64_MAX. Remaining upper clamp
-+ // (output > 255 → 255) is one cmpdi + isel.
-+ zeroDouble(fpscratch);
-+ as_xsmaxjdp(fpscratch, input, fpscratch);
-+ as_fctid(fpscratch, fpscratch);
-+ as_mfvsrd(output, fpscratch);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register max255 = temps.Acquire();
-+ xs_li(max255, 255);
-+ as_cmpdi(output, 255);
-+ as_isel(output, max255, output, GreaterThan);
-+ return;
-+ }
-+
-+ // POWER8 fallback: xsmaxjdp is unavailable, so filter NaN explicitly
-+ // before fctid. Per Power ISA, fctid maps NaN to INT64_MAX, which
-+ // would clamp to 255 instead of the spec-required 0.
-+ Label positive, below255, done;
-+ zeroDouble(fpscratch);
-+ branchDouble(DoubleGreaterThan, input, fpscratch, &positive);
-+ {
-+ move32(Imm32(0), output);
-+ jump(&done);
-+ }
-+
-+ bind(&positive);
-+
-+ loadConstantDouble(255.0, fpscratch);
-+ branchDouble(DoubleLessThan, input, fpscratch, &below255);
-+ {
-+ move32(Imm32(255), output);
-+ jump(&done);
-+ }
-+
-+ bind(&below255);
-+
-+ as_fctid(fpscratch, input);
-+ as_mfvsrd(output, fpscratch);
-+ bind(&done);
-+}
-+
-+void MacroAssembler::subFromStackPtr(Imm32 imm32) {
-+ if (imm32.value) {
-+ asMasm().subPtr(imm32, StackPointer);
-+ }
-+}
-+
-+//{{{ check_macroassembler_style
-+
-+void MacroAssembler::widenInt32(Register r) {
-+ move32To64SignExtend(r, Register64(r));
-+}
-+
-+// Stack operations.
-+void MacroAssembler::Push(Register reg) {
-+ push(reg);
-+ adjustFrame(int32_t(sizeof(intptr_t)));
-+}
-+void MacroAssembler::Push(const Imm32 imm) {
-+ push(imm);
-+ adjustFrame(int32_t(sizeof(intptr_t)));
-+}
-+
-+void MacroAssembler::Push(const ImmWord imm) {
-+ push(imm);
-+ adjustFrame(int32_t(sizeof(intptr_t)));
-+}
-+
-+void MacroAssembler::Push(const ImmPtr imm) {
-+ Push(ImmWord(uintptr_t(imm.value)));
-+}
-+
-+void MacroAssembler::Push(const ImmGCPtr ptr) {
-+ push(ptr);
-+ adjustFrame(int32_t(sizeof(intptr_t)));
-+}
-+
-+void MacroAssembler::PushBoxed(FloatRegister reg) {
-+ subFromStackPtr(Imm32(sizeof(double)));
-+ boxDouble(reg, Address(getStackPointer(), 0));
-+ adjustFrame(sizeof(double));
-+}
-+
-+void MacroAssembler::Pop(Register reg) {
-+ pop(reg);
-+ adjustFrame(-int32_t(sizeof(intptr_t)));
-+}
-+void MacroAssembler::PushRegsInMask(LiveRegisterSet set) {
-+ int32_t diff =
-+ set.gprs().size() * sizeof(intptr_t) + set.fpus().getPushSizeInBytes();
-+ const int32_t reserved = diff;
-+
-+ reserveStack(reserved);
-+ for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); ++iter) {
-+ diff -= sizeof(intptr_t);
-+ storePtr(*iter, Address(StackPointer, diff));
-+ }
-+
-+ // Natural per-kind slot — 8 bytes for Single/Double via stfd, 16 bytes
-+ // for Simd128 via stxvx. RegisterDump::FPUArray is sized 32 × 8 = 256
-+ // bytes (sizeof(RegisterContent) is 8 — no v128 in the union), so
-+ // f_K's stfd slot lands at the right offset. Bailout AllRegs excludes
-+ // Simd128 (Ion has no SIMD live), so the FP region in bailout frames
-+ // is strictly Float-only.
-+ for (FloatRegisterBackwardIterator iter(set.fpus().reduceSetForPush());
-+ iter.more(); ++iter) {
-+ FloatRegister reg = *iter;
-+ diff -= reg.size();
-+ if (reg.isSimd128()) {
-+ storeUnalignedSimd128(reg, Address(StackPointer, diff));
-+ } else {
-+ storeDouble(reg.asDouble(), Address(StackPointer, diff));
-+ }
-+ }
-+ MOZ_ASSERT(diff == 0);
-+}
-+void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set,
-+ LiveRegisterSet ignore) {
-+ int32_t diff =
-+ set.gprs().size() * sizeof(intptr_t) + set.fpus().getPushSizeInBytes();
-+ const int32_t reserved = diff;
-+
-+ for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); ++iter) {
-+ diff -= sizeof(intptr_t);
-+ if (!ignore.has(*iter)) {
-+ loadPtr(Address(StackPointer, diff), *iter);
-+ }
-+ }
-+
-+ // Natural per-kind slot. See PushRegsInMask comment.
-+ for (FloatRegisterBackwardIterator iter(set.fpus().reduceSetForPush());
-+ iter.more(); ++iter) {
-+ FloatRegister reg = *iter;
-+ diff -= reg.size();
-+ if (!ignore.has(reg)) {
-+ if (reg.isSimd128()) {
-+ loadUnalignedSimd128(Address(StackPointer, diff), reg);
-+ } else {
-+ loadDouble(Address(StackPointer, diff), reg.asDouble());
-+ }
-+ }
-+ }
-+ MOZ_ASSERT(diff == 0);
-+ freeStack(reserved);
-+}
-+
-+// Call operations.
-+CodeOffset MacroAssembler::call(Register reg) {
-+ // ELFv2 ABI: r12 must hold the target address at function entry
-+ // so the callee can compute its TOC pointer from r12.
-+ if (reg != CallReg) {
-+ movePtr(reg, CallReg);
-+ }
-+ xs_mtctr(CallReg);
-+ as_bctr(LinkB);
-+ return CodeOffset(currentOffset());
-+}
-+CodeOffset MacroAssembler::call(Label* label) {
-+ if (label->bound()) {
-+ // Open the no-pool window BEFORE computing the displacement.
-+ // enterNoPool() can itself trigger a pending pool flush, advancing
-+ // currentOffset(). A pre-flush displacement emitted at the post-flush
-+ // position would overshoot the target by poolSize bytes.
-+ m_buffer.enterNoPool(kNoPoolPatchableBranchInsns);
-+ int32_t offset = label->offset() - currentOffset();
-+ // Call instruction goes at inst[9] in the 10-word stanza.
-+ int32_t callOffset = offset - 9 * (int32_t)sizeof(uint32_t);
-+ if (JOffImm26::IsInRange(callOffset)) {
-+ // Short: 9 nops + bl = 10 instructions.
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ as_b(JOffImm26(callOffset), RelativeBranch, LinkB);
-+ m_buffer.leaveNoPool();
-+ return CodeOffset(currentOffset());
-+ }
-+ // Long call to bound label: stanza(8) + mtctr + bctrl = 10 instructions.
-+ BufferOffset bo =
-+ emitLoad64Stanza(SecondScratchReg, LabelBase::INVALID_OFFSET);
-+ xs_mtctr(SecondScratchReg);
-+ as_bctr(LinkB);
-+ m_buffer.leaveNoPool();
-+ addLongJump(bo, BufferOffset(label->offset()));
-+ return CodeOffset(currentOffset());
-+ }
-+ // Emit a CallTag stanza: trap + chain + 8 nops (10 instructions total).
-+ m_buffer.enterNoPool(kNoPoolPatchableBranchInsns);
-+ BufferOffset bo = xs_trap_tagged(CallTag);
-+ writeInst(label->used() ? label->offset() : LabelBase::INVALID_OFFSET);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ m_buffer.leaveNoPool();
-+ if (!oom()) {
-+ label->use(bo.getOffset());
-+ }
-+ return CodeOffset(currentOffset());
-+}
-+CodeOffset MacroAssembler::call(const Address& addr) {
-+ loadPtr(addr, CallReg);
-+ return call(CallReg);
-+}
-+
-+void MacroAssembler::call(ImmPtr target) {
-+ uint64_t addr = uintptr_t(target.value);
-+ // stanza(8) + mtctr + bctrl = 10 instructions.
-+ m_buffer.enterNoPool(kNoPoolPatchableBranchInsns);
-+ BufferOffset bo = emitLoad64Stanza(CallReg, addr);
-+ addPendingJump(bo, target, RelocationKind::HARDCODED);
-+ xs_mtctr(CallReg);
-+ as_bctr(LinkB);
-+ m_buffer.leaveNoPool();
-+}
-+
-+CodeOffset MacroAssembler::call(wasm::SymbolicAddress target) {
-+ movePtr(target, CallReg);
-+ return call(CallReg);
-+}
-+
-+void MacroAssembler::callWithABINoProfiler(const Address& fun, ABIType result) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(fun, scratch);
-+
-+ uint32_t stackAdjust;
-+ callWithABIPre(&stackAdjust);
-+ call(scratch);
-+ callWithABIPost(stackAdjust, result);
-+}
-+
-+void MacroAssembler::callWithABIPre(uint32_t* stackAdjust, bool callFromWasm) {
-+ MOZ_ASSERT(inCall_);
-+ uint32_t stackForCall = abiArgs_.stackBytesConsumedSoFar();
-+
-+ // Reserve place for LR save.
-+ stackForCall += sizeof(intptr_t);
-+
-+ if (dynamicAlignment_) {
-+ stackForCall += ComputeByteAlignment(stackForCall, ABIStackAlignment);
-+ } else {
-+ uint32_t alignmentAtPrologue = callFromWasm ? sizeof(wasm::Frame) : 0;
-+ stackForCall += ComputeByteAlignment(
-+ stackForCall + framePushed() + alignmentAtPrologue, ABIStackAlignment);
-+ }
-+
-+ *stackAdjust = stackForCall;
-+ reserveStack(stackForCall);
-+
-+ // Save LR. Restore it in callWithABIPost.
-+ {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ xs_mflr(scratch);
-+ storePtr(scratch, Address(StackPointer, stackForCall - sizeof(intptr_t)));
-+ }
-+
-+ // Position all arguments.
-+ {
-+ enoughMemory_ &= moveResolver_.resolve();
-+ if (!enoughMemory_) {
-+ return;
-+ }
-+
-+ MoveEmitter emitter(*this);
-+ emitter.emit(moveResolver_);
-+ emitter.finish();
-+ }
-+
-+ assertStackAlignment(ABIStackAlignment);
-+}
-+
-+void MacroAssembler::callWithABIPost(uint32_t stackAdjust, ABIType result) {
-+ {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(Address(StackPointer, stackAdjust - sizeof(intptr_t)), scratch);
-+ xs_mtlr(scratch);
-+ }
-+
-+ if (dynamicAlignment_) {
-+ // Restore SP from stack (as stored in setupUnalignedABICall).
-+ loadPtr(Address(StackPointer, stackAdjust), StackPointer);
-+ adjustFrame(-stackAdjust);
-+ } else {
-+ freeStack(stackAdjust);
-+ }
-+
-+#ifdef DEBUG
-+ MOZ_ASSERT(inCall_);
-+ inCall_ = false;
-+#endif
-+}
-+
-+// Value operations.
-+void MacroAssembler::moveValue(const ValueOperand& src,
-+ const ValueOperand& dest) {
-+ if (src.valueReg() != dest.valueReg()) {
-+ movePtr(src.valueReg(), dest.valueReg());
-+ }
-+}
-+void MacroAssembler::moveValue(const Value& src, const ValueOperand& dest) {
-+ if (!src.isGCThing()) {
-+ movePtr(ImmWord(src.asRawBits()), dest.valueReg());
-+ return;
-+ }
-+ CodeOffset off = movWithPatch(ImmWord(src.asRawBits()), dest.valueReg());
-+ writeDataRelocation(off, src);
-+}
-+
-+// Branch operations.
-+void MacroAssembler::branchTestValue(Condition cond, const ValueOperand& lhs,
-+ const Value& rhs, Label* label) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ MOZ_ASSERT(!rhs.isNaN());
-+
-+ if (!rhs.isGCThing()) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(lhs.valueReg() != scratch);
-+ movePtr(ImmWord(rhs.asRawBits()), scratch);
-+ branchPtr(cond, lhs.valueReg(), scratch, label);
-+ } else {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(lhs.valueReg() != scratch);
-+ moveValue(rhs, ValueOperand(scratch));
-+ branchPtr(cond, lhs.valueReg(), scratch, label);
-+ }
-+}
-+void MacroAssembler::branchTestNaNValue(Condition cond, const ValueOperand& val,
-+ Register temp, Label* label) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(val.valueReg() != scratch);
-+
-+ // Strip the IEEE sign bit (LSB-numbering bit 63 = PPC-numbering bit 0)
-+ // with rldicl SH=0, MB=1: rotate by zero (no-op) then keep bits 1..63 of
-+ // PPC-numbering, clearing bit 0. Rotating by 1 instead would also shift
-+ // the quiet-NaN bit out of position and cause 1.5 (0x3FF8...) and NaN
-+ // (0x7FF8...) to collide after masking — bug 1943704 PPC64 regression.
-+ as_rldicl(temp, val.valueReg(), 0, 1);
-+
-+ // Load canonical NaN (with sign bit 0) and strip its sign bit too.
-+ static_assert(JS::detail::CanonicalizedNaNSignBit == 0);
-+ moveValue(DoubleValue(JS::GenericNaN()), ValueOperand(scratch));
-+ as_rldicl(scratch, scratch, 0, 1);
-+
-+ branchPtr(cond, temp, scratch, label);
-+}
-+
-+void MacroAssembler::branchPtrInNurseryChunk(Condition cond, Register ptr,
-+ Register temp, Label* label) {
-+ MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
-+ MOZ_ASSERT(ptr != temp);
-+ MOZ_ASSERT(temp != InvalidReg);
-+
-+ andPtr(Imm32(int32_t(~gc::ChunkMask)), ptr, temp);
-+ branchPtr(InvertCondition(cond), Address(temp, gc::ChunkStoreBufferOffset),
-+ ImmWord(0), label);
-+}
-+void MacroAssembler::branchValueIsNurseryCell(Condition cond,
-+ ValueOperand value, Register temp,
-+ Label* label) {
-+ branchValueIsNurseryCellImpl(cond, value, temp, label);
-+}
-+
-+// Patching / near address operations.
-+CodeOffset MacroAssembler::nopPatchableToCall() {
-+ // Emit 10 nops that can be patched to a call stanza:
-+ // 8 load64 nops + mtctr nop + bctrl nop
-+ // Return offset AFTER the stanza (= the return address).
-+ m_buffer.enterNoPool(kNoPoolPatchableBranchInsns);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ m_buffer.leaveNoPool();
-+ return CodeOffset(currentOffset());
-+}
-+CodeOffset MacroAssembler::moveNearAddressWithPatch(Register dest) {
-+ CodeOffset offset(currentOffset());
-+ emitLoad64Stanza(dest, 0);
-+ return offset;
-+}
-+// static
-+void MacroAssembler::patchNearAddressMove(CodeLocationLabel loc,
-+ CodeLocationLabel target) {
-+ Instruction* inst = (Instruction*)loc.raw();
-+ UpdateLoad64Value(inst, (uint64_t)target.raw());
-+}
-+
-+// Return address operations (link register architectures).
-+//
-+// Note: these MUST decrement SP by exactly 8 bytes. wasm::Frame is 16 bytes
-+// (callerFP_ + returnAddress_) and GenerateCallablePrologue pairs this with
-+// push(FramePointer) to match that layout exactly — a 16-byte decrement here
-+// would insert 8 bytes of padding and break FP-chain unwinding. The 8-byte
-+// intermediate misalignment between this save and the following push(FP) is
-+// never observed by a C call (no intervening transition), and any caller that
-+// does make a C call after pushReturnAddress routes through
-+// setupUnalignedABICall which re-aligns.
-+void MacroAssembler::pushReturnAddress() {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ xs_mflr(scratch);
-+ push(scratch);
-+}
-+void MacroAssembler::popReturnAddress() {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ pop(scratch);
-+ xs_mtlr(scratch);
-+}
-+
-+// ABI setup.
-+void MacroAssembler::setupUnalignedABICall(Register scratch) {
-+ MOZ_ASSERT(!IsCompilingWasm(), "wasm should only use aligned ABI calls");
-+ setupNativeABICall();
-+ dynamicAlignment_ = true;
-+
-+ movePtr(StackPointer, scratch);
-+
-+ // Force sp to be aligned.
-+ subPtr(Imm32(sizeof(uintptr_t)), StackPointer);
-+ andPtr(Imm32(~(ABIStackAlignment - 1)), StackPointer);
-+ storePtr(scratch, Address(StackPointer, 0));
-+}
-+
-+// ===============================================================
-+// Arithmetic helpers.
-+
-+void MacroAssembler::flexibleDivMod32(Register lhs, Register rhs,
-+ Register divOutput, Register remOutput,
-+ bool isUnsigned, const LiveRegisterSet&) {
-+ MOZ_ASSERT(lhs != divOutput && lhs != remOutput, "lhs is preserved");
-+ MOZ_ASSERT(rhs != divOutput && rhs != remOutput, "rhs is preserved");
-+
-+ // PPC64 has no modulus instruction. Compute: rem = lhs - (lhs/rhs)*rhs
-+ // PPC64 divw(INT32_MIN, -1) is undefined; quotient=INT32_MIN, remainder=0.
-+ Label done;
-+ if (!isUnsigned) {
-+ Label notMinOverflow;
-+ branchPtr(Assembler::NotEqual, lhs, ImmWord(INT32_MIN), ¬MinOverflow);
-+ branchPtr(Assembler::NotEqual, rhs, ImmWord(-1), ¬MinOverflow);
-+ move32(Imm32(INT32_MIN), divOutput);
-+ move32(Imm32(0), remOutput);
-+ jump(&done);
-+ bind(¬MinOverflow);
-+ }
-+ if (isUnsigned) {
-+ as_divwu(divOutput, lhs, rhs);
-+ } else {
-+ as_divw(divOutput, lhs, rhs);
-+ }
-+ as_extsw(divOutput, divOutput);
-+ if (HasPOWER9()) {
-+ if (isUnsigned) {
-+ as_moduw(remOutput, lhs, rhs);
-+ } else {
-+ as_modsw(remOutput, lhs, rhs);
-+ }
-+ } else {
-+ as_mullw(remOutput, divOutput, rhs);
-+ as_subf(remOutput, remOutput, lhs);
-+ }
-+ as_extsw(remOutput, remOutput);
-+ bind(&done);
-+}
-+
-+void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift,
-+ Register pointer) {
-+ if (IsShiftInScaleRange(shift)) {
-+ computeEffectiveAddress(
-+ BaseIndex(pointer, indexTemp32, ShiftToScale(shift)), pointer);
-+ return;
-+ }
-+ lshift32(Imm32(shift), indexTemp32);
-+ addPtr(indexTemp32, pointer);
-+}
-+
-+void MacroAssembler::convertInt64ToDouble(Register64 src, FloatRegister dest) {
-+ as_mtvsrd(dest, src.reg);
-+ as_fcfid(dest, dest);
-+}
-+
-+void MacroAssembler::nearbyIntDouble(RoundingMode mode, FloatRegister src,
-+ FloatRegister dest) {
-+ switch (mode) {
-+ case RoundingMode::NearestTiesToEven: {
-+ // PPC64's frin rounds ties away from zero, NOT to even (ISA v3.1).
-+ // Use fctid+fcfid which uses FPSCR RN (default = round-to-nearest-even).
-+ // Guard: if |src| >= 2^52, value is already integral (or NaN/Inf) —
-+ // just copy src. This preserves NaN, Inf, and -0.
-+ // Check via integer exponent extraction to avoid FP temp conflicts.
-+ Label done;
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ moveDouble(src, ScratchDoubleReg);
-+ if (src != dest) {
-+ moveDouble(src, dest);
-+ }
-+ if (HasPOWER9()) {
-+ // xsxexpdp lays the 11-bit biased exponent in XT.dw0 with the
-+ // rest zeroed, so mfvsrd reads it directly — drops the
-+ // srdi+andi. masking pair.
-+ ScratchSimd128Scope expScratch(*this);
-+ as_xsxexpdp(expScratch, ScratchDoubleReg);
-+ as_mfvsrd(scratch, expScratch);
-+ } else {
-+ as_mfvsrd(scratch, ScratchDoubleReg);
-+ x_srdi(scratch, scratch, 52);
-+ as_andi_rc(scratch, scratch, 0x7FF);
-+ }
-+ // Biased exponent >= 1075 (= 1023+52) means |val| >= 2^52.
-+ // Also catches Inf (exp=2047) and NaN (exp=2047).
-+ ma_cmp(scratch, Imm32(1075), Assembler::GreaterThanOrEqual);
-+ ma_b(Assembler::GreaterThanOrEqual, &done);
-+ as_fctid(dest, ScratchDoubleReg);
-+ as_fcfid(dest, dest);
-+ as_fcpsgn(dest, ScratchDoubleReg, dest);
-+ bind(&done);
-+ break;
-+ }
-+ case RoundingMode::TowardsZero:
-+ as_friz(dest, src);
-+ break;
-+ case RoundingMode::Up:
-+ as_frip(dest, src);
-+ break;
-+ case RoundingMode::Down:
-+ as_frim(dest, src);
-+ break;
-+ default:
-+ MOZ_CRASH("Unexpected rounding mode");
-+ }
-+}
-+
-+void MacroAssembler::nearbyIntFloat32(RoundingMode mode, FloatRegister src,
-+ FloatRegister dest) {
-+ // PPC FP rounding instructions operate on double-precision.
-+ // For single-precision, we round as double then round back to single.
-+ // The frsp instruction handles the double->single conversion.
-+ nearbyIntDouble(mode, src, dest);
-+ as_frsp(dest, dest);
-+}
-+
-+// ===============================================================
-+// Far jump support.
-+
-+CodeOffset MacroAssembler::farJumpWithPatch() {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+
-+ // stanza(8) + mtctr + bctr = 10 instructions.
-+ CodeOffset loadOffset(currentOffset());
-+ m_buffer.enterNoPool(kNoPoolPatchableBranchInsns);
-+ emitLoad64Stanza(scratch, 0);
-+ xs_mtctr(scratch);
-+ as_bctr();
-+ m_buffer.leaveNoPool();
-+
-+ return loadOffset;
-+}
-+
-+// ===============================================================
-+void MacroAssembler::flush() { Assembler::flush(); }
-+
-+// Wasm support.
-+
-+FaultingCodeOffset MacroAssembler::wasmTrapInstruction() {
-+ m_buffer.flushPool(); // see comment in wasmLoadImpl
-+ FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
-+ xs_trap();
-+ return fco;
-+}
-+
-+// PPC64 SlowCallMarker: `ori r0, r0, 0` -- a NOP-like instruction
-+// that won't appear in normal code generation.
-+// ori r0, r0, 0 = 0x60000000 -- that's actually PPC_nop.
-+// Use a distinguishable encoding: `ori r12, r12, 0` = 0x618C0000
-+static const int32_t SlowCallMarker = 0x618C0000;
-+
-+void MacroAssembler::wasmMarkCallAsSlow() {
-+ // Emit: ori r12, r12, 0
-+ as_ori(CallReg, CallReg, 0);
-+}
-+
-+void MacroAssembler::wasmCheckSlowCallsite(Register ra_, Label* notSlow,
-+ Register temp1, Register temp2) {
-+ MOZ_ASSERT(ra_ != temp2);
-+ load32(Address(ra_, 0), temp2);
-+ branch32(Assembler::NotEqual, temp2, Imm32(SlowCallMarker), notSlow);
-+}
-+
-+CodeOffset MacroAssembler::wasmMarkedSlowCall(const wasm::CallSiteDesc& desc,
-+ const Register reg) {
-+ CodeOffset offset = call(desc, reg);
-+ wasmMarkCallAsSlow();
-+ return offset;
-+}
-+
-+// ===============================================================
-+// Additional stack operations.
-+
-+void MacroAssembler::Push(FloatRegister f) {
-+ push(f);
-+ adjustFrame(int32_t(sizeof(double)));
-+}
-+void MacroAssembler::Pop(FloatRegister f) {
-+ pop(f);
-+ adjustFrame(-int32_t(sizeof(double)));
-+}
-+void MacroAssembler::Pop(const ValueOperand& val) {
-+ popValue(val);
-+ adjustFrame(-int32_t(sizeof(Value)));
-+}
-+
-+// static
-+size_t MacroAssembler::PushRegsInMaskSizeInBytes(LiveRegisterSet set) {
-+ return set.gprs().size() * sizeof(intptr_t) + set.fpus().getPushSizeInBytes();
-+}
-+
-+void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest,
-+ Register scratch) {
-+ FloatRegisterSet fpuSet(set.fpus().reduceSetForPush());
-+ mozilla::DebugOnly<unsigned> numFpu = fpuSet.size();
-+ mozilla::DebugOnly<int32_t> diffF = fpuSet.getPushSizeInBytes();
-+ mozilla::DebugOnly<int32_t> diffG = set.gprs().size() * sizeof(intptr_t);
-+
-+ MOZ_ASSERT(dest.offset >= diffG + diffF);
-+
-+ for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); ++iter) {
-+ diffG -= sizeof(intptr_t);
-+ dest.offset -= sizeof(intptr_t);
-+ storePtr(*iter, dest);
-+ }
-+ MOZ_ASSERT(diffG == 0);
-+
-+ // Natural per-kind slot. See PushRegsInMask comment.
-+ for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) {
-+ FloatRegister reg = *iter;
-+ diffF -= reg.size();
-+ numFpu -= 1;
-+ dest.offset -= reg.size();
-+ if (reg.isSimd128()) {
-+ storeUnalignedSimd128(reg, dest);
-+ } else {
-+ storeDouble(reg.asDouble(), dest);
-+ }
-+ }
-+ MOZ_ASSERT(diffF == 0);
-+}
-+
-+void MacroAssembler::freeStackTo(uint32_t framePushed) {
-+ MOZ_ASSERT(framePushed <= framePushed_);
-+ // SP = FP - framePushed
-+ movePtr(FramePointer, StackPointer);
-+ if (framePushed) {
-+ subPtr(Imm32(framePushed), StackPointer);
-+ }
-+ framePushed_ = framePushed;
-+}
-+
-+// ===============================================================
-+// Additional call / patch operations.
-+
-+void MacroAssembler::call(JitCode* c) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+
-+ uint64_t addr = uintptr_t(c->raw());
-+ BufferOffset bo = emitLoad64Stanza(scratch, addr);
-+ addPendingJump(bo, ImmPtr(c->raw()), RelocationKind::JITCODE);
-+
-+ callJitNoProfiler(scratch);
-+}
-+
-+CodeOffset MacroAssembler::callWithPatch() {
-+ // Emit a CallTag-sized stanza of nops. Will be patched by patchCall.
-+ // Return offset AFTER the stanza (= the return address when bl executes).
-+ m_buffer.enterNoPool(kNoPoolPatchableBranchInsns);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ m_buffer.leaveNoPool();
-+ return CodeOffset(currentOffset());
-+}
-+
-+void MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset) {
-+ // callerOffset points AFTER the 10-instruction stanza (the return address).
-+ // Subtract to find the stanza start. The `bl` goes at inst[9].
-+ uint32_t stanzaStart = callerOffset - 10 * sizeof(uint32_t);
-+ Instruction* i0 = (Instruction*)(m_buffer.getInst(BufferOffset(stanzaStart)));
-+ // bl offset is relative to inst[9], which is at stanzaStart + 36.
-+ intptr_t blAddr = (intptr_t)stanzaStart + 9 * (intptr_t)sizeof(uint32_t);
-+ intptr_t callOffset = (intptr_t)calleeOffset - blAddr;
-+ if (JOffImm26::IsInRange(callOffset)) {
-+ i0[0].makeNop();
-+ i0[1].makeNop();
-+ i0[2].makeNop();
-+ i0[3].makeNop();
-+ i0[4].makeNop();
-+ i0[5].makeNop();
-+ i0[6].makeNop();
-+ i0[7].makeNop();
-+ i0[8].makeNop();
-+ i0[9].setData(PPC_b | JOffImm26(callOffset).encode() | LinkB);
-+ } else {
-+ addLongJump(BufferOffset(stanzaStart), BufferOffset(calleeOffset));
-+ WriteLoad64Instructions(i0, SecondScratchReg, LabelBase::INVALID_OFFSET);
-+ i0[8].makeOp_mtctr(SecondScratchReg);
-+ i0[9].makeOp_bctr(LinkB);
-+ }
-+}
-+
-+void MacroAssembler::patchFarJump(CodeOffset farJump, uint32_t targetOffset) {
-+ Instruction* inst =
-+ (Instruction*)m_buffer.getInst(BufferOffset(farJump.offset()));
-+ // Extract the destination register from the existing stanza. Both shapes
-+ // encode rD at LE bits [21..25] of their first "register-touching" slot:
-+ // P8 = mflr rD at [2], P9+ = addpcis rD at [0]. Major opcode of slot [0]
-+ // distinguishes (31 = mfspr, 19 = addpcis).
-+ uint32_t i0 = inst[0].encode();
-+ uint32_t regCode = (((i0 >> 26) & 0x3f) == 19)
-+ ? ((i0 >> 21) & 0x1f)
-+ : ((inst[2].encode() >> 21) & 0x1f);
-+ Register reg = Register::FromCode(regCode);
-+ WriteLoad64Instructions(inst, reg, LabelBase::INVALID_OFFSET);
-+ addLongJump(BufferOffset(farJump.offset()), BufferOffset(targetOffset));
-+}
-+
-+// static
-+void MacroAssembler::patchFarJump(uint8_t* farJump, uint8_t* target) {
-+ UpdateLoad64Value((Instruction*)farJump, (uint64_t)(uintptr_t)target);
-+ FlushICache(farJump, 8 * sizeof(Instruction));
-+}
-+
-+// static
-+void MacroAssembler::patchNopToCall(uint8_t* callsite, uint8_t* target) {
-+ // callsite points AFTER the 10-instruction stanza. Subtract to find start.
-+ Instruction* inst = (Instruction*)callsite - 10;
-+ WriteLoad64Instructions(inst, SecondScratchReg, (uint64_t)(uintptr_t)target);
-+ inst[8].makeOp_mtctr(SecondScratchReg);
-+ inst[9].makeOp_bctr(LinkB);
-+ FlushICache(inst, 10 * sizeof(Instruction));
-+}
-+
-+// static
-+void MacroAssembler::patchCallToNop(uint8_t* callsite) {
-+ // callsite points AFTER the 10-instruction stanza. Subtract to find start.
-+ Instruction* inst = (Instruction*)callsite - 10;
-+ for (int i = 0; i < 10; i++) {
-+ inst[i].makeNop();
-+ }
-+ FlushICache(inst, 10 * sizeof(Instruction));
-+}
-+
-+void MacroAssembler::patchMove32(CodeOffset offset, Imm32 n) {
-+ // Patch an 8-instruction load64 sequence with a 32-bit value.
-+ Instruction* inst =
-+ (Instruction*)m_buffer.getInst(BufferOffset(offset.offset()));
-+ UpdateLoad64Value(inst, uint64_t(int64_t(n.value)));
-+}
-+
-+uint32_t MacroAssembler::pushFakeReturnAddress(Register scratch) {
-+ CodeLabel cl;
-+
-+ // Use mov(CodeLabel*, Register) which always emits a full 8-instruction
-+ // load64 sequence (via NOPs + WriteLoad64Instructions). This is critical
-+ // because movePtr(ImmWord(0)) would optimize to a single li instruction,
-+ // but processCodeLabels->Bind->UpdateLoad64Value expects the full
-+ // 8-instruction literal pool sequence at the patchAt offset.
-+ mov(&cl, scratch);
-+
-+ Push(scratch);
-+
-+ bind(&cl);
-+ uint32_t retAddr = currentOffset();
-+
-+ addCodeLabel(cl);
-+ return retAddr;
-+}
-+
-+void MacroAssembler::callWithABINoProfiler(Register fun, ABIType result) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ // Save fun to scratch since fun might be clobbered by callWithABIPre.
-+ movePtr(fun, scratch);
-+
-+ uint32_t stackAdjust;
-+ callWithABIPre(&stackAdjust);
-+ call(scratch);
-+ callWithABIPost(stackAdjust, result);
-+}
-+
-+// ===============================================================
-+// Additional arithmetic helpers.
-+
-+void MacroAssembler::flexibleRemainder32(Register lhs, Register rhs,
-+ Register dest, bool isUnsigned,
-+ const LiveRegisterSet&) {
-+ // rem = lhs - (lhs/rhs)*rhs
-+ // PPC64 divw(INT32_MIN, -1) is undefined; result is 0.
-+ Label done;
-+ if (!isUnsigned) {
-+ Label notMinOverflow;
-+ branchPtr(Assembler::NotEqual, lhs, ImmWord(INT32_MIN), ¬MinOverflow);
-+ branchPtr(Assembler::NotEqual, rhs, ImmWord(-1), ¬MinOverflow);
-+ move32(Imm32(0), dest);
-+ jump(&done);
-+ bind(¬MinOverflow);
-+ }
-+ if (HasPOWER9()) {
-+ if (isUnsigned) {
-+ as_moduw(dest, lhs, rhs);
-+ } else {
-+ as_modsw(dest, lhs, rhs);
-+ }
-+ } else {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ if (isUnsigned) {
-+ as_divwu(scratch, lhs, rhs);
-+ } else {
-+ as_divw(scratch, lhs, rhs);
-+ }
-+ as_mullw(scratch, scratch, rhs);
-+ as_subf(dest, scratch, lhs);
-+ }
-+ as_extsw(dest, dest);
-+ bind(&done);
-+}
-+
-+void MacroAssembler::flexibleQuotientPtr(Register lhs, Register rhs,
-+ Register dest, bool isUnsigned,
-+ const LiveRegisterSet&) {
-+ // PPC64 divd(INT64_MIN, -1) is undefined; return INT64_MIN to match
-+ // ARM64/LoongArch64 hardware sdiv behavior.
-+ Label done;
-+ if (!isUnsigned) {
-+ Label notMinOverflow;
-+ branchPtr(Assembler::NotEqual, lhs, ImmWord(INT64_MIN), ¬MinOverflow);
-+ branchPtr(Assembler::NotEqual, rhs, ImmWord(-1), ¬MinOverflow);
-+ movePtr(ImmWord(INT64_MIN), dest);
-+ jump(&done);
-+ bind(¬MinOverflow);
-+ }
-+ if (isUnsigned) {
-+ as_divdu(dest, lhs, rhs);
-+ } else {
-+ as_divd(dest, lhs, rhs);
-+ }
-+ bind(&done);
-+}
-+
-+void MacroAssembler::flexibleRemainderPtr(Register lhs, Register rhs,
-+ Register dest, bool isUnsigned,
-+ const LiveRegisterSet&) {
-+ // rem = lhs - (lhs/rhs)*rhs
-+ // PPC64 divd(INT64_MIN, -1) is undefined; result is 0.
-+ Label done;
-+ if (!isUnsigned) {
-+ Label notMinOverflow;
-+ branchPtr(Assembler::NotEqual, lhs, ImmWord(INT64_MIN), ¬MinOverflow);
-+ branchPtr(Assembler::NotEqual, rhs, ImmWord(-1), ¬MinOverflow);
-+ movePtr(ImmWord(0), dest);
-+ jump(&done);
-+ bind(¬MinOverflow);
-+ }
-+ if (HasPOWER9()) {
-+ if (isUnsigned) {
-+ as_modud(dest, lhs, rhs);
-+ } else {
-+ as_modsd(dest, lhs, rhs);
-+ }
-+ } else {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ if (isUnsigned) {
-+ as_divdu(scratch, lhs, rhs);
-+ } else {
-+ as_divd(scratch, lhs, rhs);
-+ }
-+ as_mulld(scratch, scratch, rhs);
-+ as_subf(dest, scratch, lhs);
-+ }
-+ bind(&done);
-+}
-+
-+// ===============================================================
-+// Rounding helpers.
-+
-+void MacroAssembler::floorDoubleToInt32(FloatRegister src, Register dest,
-+ Label* fail) {
-+ ScratchDoubleScope fpscratch(asMasm());
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+
-+ // Round toward negative infinity, then convert to int64.
-+ as_frim(fpscratch, src);
-+ as_fctidz(fpscratch, fpscratch);
-+ as_mfvsrd(dest, fpscratch);
-+
-+ // Check if result fits in int32.
-+ as_extsw(scratch, dest);
-+ as_cmpd(dest, scratch);
-+ ma_b(NotEqual, fail);
-+
-+ // Check for -0 and NaN when result is zero.
-+ Label notZero;
-+ as_cmpdi(dest, 0);
-+ ma_b(NotEqual, ¬Zero);
-+ {
-+ // If top 2 bits of src are set, it's negative or NaN.
-+ as_mfvsrd(dest, src);
-+ // rldicl. = x_srdi + record form: dest = top 2 bits, CR0[eq]=(dest==0).
-+ // Folds the explicit cmpdi src,0 that would otherwise drive the branch.
-+ as_rldicl_rc(dest, dest, 2, 62);
-+ ma_b(NotEqual, fail);
-+ }
-+ bind(¬Zero);
-+}
-+
-+void MacroAssembler::floorFloat32ToInt32(FloatRegister src, Register dest,
-+ Label* fail) {
-+ ScratchDoubleScope fpscratch(asMasm());
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+
-+ // PPC FP rounding works on doubles. Single-precision FPRs are
-+ // already in double-width registers, so frim works fine.
-+ as_frim(fpscratch, src);
-+ as_fctidz(fpscratch, fpscratch);
-+ as_mfvsrd(dest, fpscratch);
-+
-+ // Check if result fits in int32.
-+ as_extsw(scratch, dest);
-+ as_cmpd(dest, scratch);
-+ ma_b(NotEqual, fail);
-+
-+ // Check for -0 and NaN when result is zero.
-+ Label notZero;
-+ as_cmpdi(dest, 0);
-+ ma_b(NotEqual, ¬Zero);
-+ {
-+ // src is held in the FPR as a 64-bit double (lfs widens float32 to
-+ // double on load), so the same top-2-bits check used for doubles
-+ // applies: bit 63 = sign, bit 62 = exponent MSB. Nonzero means -0,
-+ // ±Inf, NaN, or a large magnitude — none of which is +0.
-+ as_mfvsrd(dest, src);
-+ // rldicl. = x_srdi + record form: dest = top 2 bits, CR0[eq]=(dest==0).
-+ // Folds the explicit cmpdi src,0 that would otherwise drive the branch.
-+ as_rldicl_rc(dest, dest, 2, 62);
-+ ma_b(NotEqual, fail);
-+ }
-+ bind(¬Zero);
-+}
-+
-+void MacroAssembler::ceilDoubleToInt32(FloatRegister src, Register dest,
-+ Label* fail) {
-+ ScratchDoubleScope fpscratch(asMasm());
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+
-+ as_frip(fpscratch, src);
-+ as_fctidz(fpscratch, fpscratch);
-+ as_mfvsrd(dest, fpscratch);
-+
-+ // Check if result fits in int32.
-+ as_extsw(scratch, dest);
-+ as_cmpd(dest, scratch);
-+ ma_b(NotEqual, fail);
-+
-+ // Check for (-1, -0] and NaN when result is zero.
-+ Label notZero;
-+ as_cmpdi(dest, 0);
-+ ma_b(NotEqual, ¬Zero);
-+ {
-+ // If binary value is not zero, input was not 0 (could be -0 or NaN).
-+ as_mfvsrd(dest, src);
-+ as_cmpdi(dest, 0);
-+ ma_b(NotEqual, fail);
-+ }
-+ bind(¬Zero);
-+}
-+
-+void MacroAssembler::ceilFloat32ToInt32(FloatRegister src, Register dest,
-+ Label* fail) {
-+ ScratchDoubleScope fpscratch(asMasm());
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+
-+ as_frip(fpscratch, src);
-+ as_fctidz(fpscratch, fpscratch);
-+ as_mfvsrd(dest, fpscratch);
-+
-+ // Check if result fits in int32.
-+ as_extsw(scratch, dest);
-+ as_cmpd(dest, scratch);
-+ ma_b(NotEqual, fail);
-+
-+ // Check for (-1, -0] and NaN when result is zero.
-+ Label notZero;
-+ as_cmpdi(dest, 0);
-+ ma_b(NotEqual, ¬Zero);
-+ {
-+ as_mfvsrd(dest, src);
-+ as_cmpdi(dest, 0);
-+ ma_b(NotEqual, fail);
-+ }
-+ bind(¬Zero);
-+}
-+
-+void MacroAssembler::truncDoubleToInt32(FloatRegister src, Register dest,
-+ Label* fail) {
-+ ScratchDoubleScope fpscratch(asMasm());
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+
-+ as_fctidz(fpscratch, src);
-+ as_mfvsrd(dest, fpscratch);
-+
-+ // Check if result fits in int32.
-+ as_extsw(scratch, dest);
-+ as_cmpd(dest, scratch);
-+ ma_b(NotEqual, fail);
-+
-+ // Check for -0 and NaN when result is zero.
-+ Label notZero;
-+ as_cmpdi(dest, 0);
-+ ma_b(NotEqual, ¬Zero);
-+ {
-+ as_mfvsrd(dest, src);
-+ // rldicl. = x_srdi + record form: dest = top 2 bits, CR0[eq]=(dest==0).
-+ // Folds the explicit cmpdi src,0 that would otherwise drive the branch.
-+ as_rldicl_rc(dest, dest, 2, 62);
-+ ma_b(NotEqual, fail);
-+ }
-+ bind(¬Zero);
-+}
-+
-+void MacroAssembler::truncFloat32ToInt32(FloatRegister src, Register dest,
-+ Label* fail) {
-+ ScratchDoubleScope fpscratch(asMasm());
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+
-+ as_fctidz(fpscratch, src);
-+ as_mfvsrd(dest, fpscratch);
-+
-+ // Check if result fits in int32.
-+ as_extsw(scratch, dest);
-+ as_cmpd(dest, scratch);
-+ ma_b(NotEqual, fail);
-+
-+ // Check for -0 and NaN when result is zero.
-+ Label notZero;
-+ as_cmpdi(dest, 0);
-+ ma_b(NotEqual, ¬Zero);
-+ {
-+ as_mfvsrd(dest, src);
-+ // rldicl. = x_srdi + record form: dest = top 2 bits, CR0[eq]=(dest==0).
-+ // Folds the explicit cmpdi src,0 that would otherwise drive the branch.
-+ as_rldicl_rc(dest, dest, 2, 62);
-+ ma_b(NotEqual, fail);
-+ }
-+ bind(¬Zero);
-+}
-+
-+void MacroAssembler::roundDoubleToInt32(FloatRegister src, Register dest,
-+ FloatRegister temp, Label* fail) {
-+ ScratchDoubleScope fpscratch(asMasm());
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+
-+ Label negative, end, performRound;
-+
-+ // Branch for negative inputs.
-+ zeroDouble(fpscratch);
-+ branchDouble(DoubleGreaterThanOrEqual, src, fpscratch, &performRound);
-+
-+ // Input is negative.
-+ loadConstantDouble(-0.5, fpscratch);
-+ branchDouble(DoubleGreaterThanOrEqual, src, fpscratch, fail);
-+ jump(&performRound);
-+
-+ bind(&performRound);
-+ {
-+ loadConstantDouble(GetBiggestNumberLessThan(0.5), temp);
-+ as_fadd(fpscratch, src, temp);
-+ as_frim(fpscratch, fpscratch);
-+ as_fctidz(fpscratch, fpscratch);
-+ as_mfvsrd(dest, fpscratch);
-+
-+ // Check if result fits in int32.
-+ as_extsw(scratch, dest);
-+ as_cmpd(dest, scratch);
-+ ma_b(NotEqual, fail);
-+ }
-+ bind(&end);
-+
-+ // Check for -0 and NaN when result is zero.
-+ Label notZero;
-+ as_cmpdi(dest, 0);
-+ ma_b(NotEqual, ¬Zero);
-+ {
-+ as_mfvsrd(dest, src);
-+ as_cmpdi(dest, 0);
-+ ma_b(NotEqual, fail);
-+ }
-+ bind(¬Zero);
-+}
-+
-+void MacroAssembler::roundFloat32ToInt32(FloatRegister src, Register dest,
-+ FloatRegister temp, Label* fail) {
-+ ScratchDoubleScope fpscratch(asMasm());
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+
-+ Label negative, end, performRound;
-+
-+ // Branch for non-negative inputs.
-+ loadConstantFloat32(0.0f, fpscratch);
-+ branchFloat(DoubleGreaterThanOrEqual, src, fpscratch, &performRound);
-+
-+ // Input is negative.
-+ loadConstantFloat32(-0.5f, fpscratch);
-+ branchFloat(DoubleGreaterThanOrEqual, src, fpscratch, fail);
-+ jump(&performRound);
-+
-+ bind(&performRound);
-+ {
-+ loadConstantFloat32(float(GetBiggestNumberLessThan(0.5)), temp);
-+ as_fadds(fpscratch, src, temp);
-+ as_frim(fpscratch, fpscratch);
-+ as_fctidz(fpscratch, fpscratch);
-+ as_mfvsrd(dest, fpscratch);
-+
-+ // Check if result fits in int32.
-+ as_extsw(scratch, dest);
-+ as_cmpd(dest, scratch);
-+ ma_b(NotEqual, fail);
-+ }
-+ bind(&end);
-+
-+ // Check for -0 and NaN when result is zero.
-+ Label notZero;
-+ as_cmpdi(dest, 0);
-+ ma_b(NotEqual, ¬Zero);
-+ {
-+ as_mfvsrd(dest, src);
-+ as_cmpdi(dest, 0);
-+ ma_b(NotEqual, fail);
-+ }
-+ bind(¬Zero);
-+}
-+
-+// ===============================================================
-+// FP conversion / copy-sign.
-+
-+void MacroAssembler::convertIntPtrToDouble(Register src, FloatRegister dest) {
-+ convertInt64ToDouble(Register64(src), dest);
-+}
-+
-+void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister output) {
-+ // fcpsgn frt, fra, frb: copies sign of fra to magnitude of frb.
-+ // lhs = magnitude source, rhs = sign source.
-+ as_fcpsgn(output, rhs, lhs);
-+}
-+
-+void MacroAssembler::copySignFloat32(FloatRegister lhs, FloatRegister rhs,
-+ FloatRegister output) {
-+ as_fcpsgn(output, rhs, lhs);
-+}
-+
-+// ===============================================================
-+// GC / nursery helpers.
-+
-+void MacroAssembler::loadStoreBuffer(Register ptr, Register buffer) {
-+ andPtr(Imm32(int32_t(~gc::ChunkMask)), ptr, buffer);
-+ loadPtr(Address(buffer, gc::ChunkStoreBufferOffset), buffer);
-+}
-+
-+void MacroAssembler::branchValueIsNurseryCell(Condition cond,
-+ const Address& address,
-+ Register temp, Label* label) {
-+ branchValueIsNurseryCellImpl(cond, address, temp, label);
-+}
-+
-+template <typename T>
-+void MacroAssembler::branchValueIsNurseryCellImpl(Condition cond,
-+ const T& value, Register temp,
-+ Label* label) {
-+ MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
-+ MOZ_ASSERT(temp != InvalidReg);
-+ Label done;
-+ branchTestGCThing(Assembler::NotEqual, value,
-+ cond == Assembler::Equal ? &done : label);
-+
-+ getGCThingValueChunk(value, temp);
-+ loadPtr(Address(temp, gc::ChunkStoreBufferOffset), temp);
-+ branchPtr(InvertCondition(cond), temp, ImmWord(0), label);
-+
-+ bind(&done);
-+}
-+
-+// ===============================================================
-+// Template instantiations.
-+
-+template <typename T>
-+void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
-+ MIRType valueType, const T& dest) {
-+ MOZ_ASSERT(valueType < MIRType::Value);
-+
-+ if (valueType == MIRType::Double) {
-+ boxDouble(value.reg().typedReg().fpu(), dest);
-+ return;
-+ }
-+
-+ if (value.constant()) {
-+ storeValue(value.value(), dest);
-+ } else {
-+ storeValue(ValueTypeFromMIRType(valueType), value.reg().typedReg().gpr(),
-+ dest);
-+ }
-+}
-+
-+template void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
-+ MIRType valueType,
-+ const Address& dest);
-+template void MacroAssembler::storeUnboxedValue(
-+ const ConstantOrRegister& value, MIRType valueType,
-+ const BaseObjectElementIndex& dest);
-+
-+// ===============================================================
-+// Misc stubs.
-+
-+void MacroAssembler::comment(const char* msg) {}
-+
-+void MacroAssembler::speculationBarrier() {
-+ // isync provides execution synchronization: discards prefetched
-+ // instructions and forces a refetch+reexecute past the barrier.
-+ // No instruction following isync may begin (architecturally) until
-+ // isync completes, blocking speculative bypass — exactly the
-+ // Spectre v1 guarantee needed after a C call returns a value that
-+ // may influence subsequent loads. Reachable from shared
-+ // CodeGenerator under JitOptions.spectreJitToCxxCalls.
-+ as_isync();
-+}
-+
-+void MacroAssembler::atomicPause() { nop(); }
-+
-+void MacroAssembler::enterFakeExitFrameForWasm(Register cxreg, Register scratch,
-+ ExitFrameType type) {
-+ enterFakeExitFrame(cxreg, scratch, type);
-+}
-+
-+void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index,
-+ Register boundsCheckLimit,
-+ Label* label) {
-+ ma_cmp(index, boundsCheckLimit, cond);
-+ ma_b(cond, label);
-+}
-+
-+void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index,
-+ Address boundsCheckLimit, Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ load32(boundsCheckLimit, scratch);
-+ ma_cmp(index, scratch, cond);
-+ ma_b(cond, label);
-+}
-+
-+void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
-+ Register64 boundsCheckLimit,
-+ Label* label) {
-+ ma_cmp(index.reg, boundsCheckLimit.reg, cond);
-+ ma_b(cond, label);
-+}
-+
-+void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
-+ Address boundsCheckLimit, Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ loadPtr(boundsCheckLimit, scratch);
-+ ma_cmp(index.reg, scratch, cond);
-+ ma_b(cond, label);
-+}
-+
-+CodeOffset MacroAssembler::move32WithPatch(Register dest) {
-+ CodeOffset offset(currentOffset());
-+ emitLoad64Stanza(dest, 0);
-+ return offset;
-+}
-+
-+CodeOffset MacroAssembler::sub32FromMemAndBranchIfNegativeWithPatch(
-+ Address address, Label* label) {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(scratch != address.base);
-+ load32(address, scratch);
-+ // Subtract a placeholder value (will be patched).
-+ // Use addi with positive placeholder (128), which will be patched to
-+ // addi with negative value. The immediate is in the addi instruction.
-+ as_addi(scratch, scratch, 128);
-+ CodeOffset patchPoint = CodeOffset(currentOffset());
-+ store32(scratch, address);
-+ // Branch if result is negative (signed).
-+ as_cmpwi(scratch, 0);
-+ ma_b(LessThan, label);
-+ return patchPoint;
-+}
-+
-+bool MacroAssembler::convertUInt64ToDoubleNeedsTemp() { return false; }
-+
-+void MacroAssembler::call(ImmWord imm) { call(ImmPtr((void*)imm.value)); }
-+
-+void MacroAssembler::convertUInt64ToDouble(Register64 src, FloatRegister dest,
-+ Register temp) {
-+ MOZ_ASSERT(temp == Register::Invalid());
-+ // POWER7+ has fcfidu (unsigned i64 → f64) as a single instruction; no
-+ // sign-split / branch / GPR scratch needed.
-+ as_mtvsrd(dest, src.reg);
-+ as_fcfidu(dest, dest);
-+}
-+
-+void MacroAssembler::convertInt64ToFloat32(Register64 src, FloatRegister dest) {
-+ as_mtvsrd(dest, src.reg);
-+ as_fcfids(dest, dest);
-+}
-+
-+void MacroAssembler::convertUInt64ToFloat32(Register64 src, FloatRegister dest,
-+ Register temp) {
-+ MOZ_ASSERT(temp == Register::Invalid());
-+ // POWER7+ has fcfidus (unsigned i64 → f32) as a single instruction.
-+ as_mtvsrd(dest, src.reg);
-+ as_fcfidus(dest, dest);
-+}
-+
-+void MacroAssembler::flexibleQuotient32(
-+ Register lhs, Register rhs, Register dest, bool isUnsigned,
-+ const LiveRegisterSet& volatileLiveRegs) {
-+ // PPC64 divw(INT32_MIN, -1) is undefined; return INT32_MIN to match
-+ // ARM64/LoongArch64 hardware sdiv behavior.
-+ Label done;
-+ if (!isUnsigned) {
-+ Label notMinOverflow;
-+ branchPtr(Assembler::NotEqual, lhs, ImmWord(INT32_MIN), ¬MinOverflow);
-+ branchPtr(Assembler::NotEqual, rhs, ImmWord(-1), ¬MinOverflow);
-+ move32(Imm32(INT32_MIN), dest);
-+ jump(&done);
-+ bind(¬MinOverflow);
-+ }
-+ if (isUnsigned) {
-+ as_divwu(dest, lhs, rhs);
-+ } else {
-+ as_divw(dest, lhs, rhs);
-+ }
-+ as_extsw(dest, dest);
-+ bind(&done);
-+}
-+
-+void MacroAssembler::oolWasmTruncateCheckF32ToI32(
-+ FloatRegister input, Register output, TruncFlags flags,
-+ const wasm::TrapSiteDesc& trapSiteDesc, Label* rejoin) {
-+ outOfLineWasmTruncateToInt32Check(input, output, MIRType::Float32, flags,
-+ rejoin, trapSiteDesc);
-+}
-+
-+void MacroAssembler::oolWasmTruncateCheckF32ToI64(
-+ FloatRegister input, Register64 output, TruncFlags flags,
-+ const wasm::TrapSiteDesc& trapSiteDesc, Label* rejoin) {
-+ outOfLineWasmTruncateToInt64Check(input, output, MIRType::Float32, flags,
-+ rejoin, trapSiteDesc);
-+}
-+
-+void MacroAssembler::oolWasmTruncateCheckF64ToI32(
-+ FloatRegister input, Register output, TruncFlags flags,
-+ const wasm::TrapSiteDesc& trapSiteDesc, Label* rejoin) {
-+ outOfLineWasmTruncateToInt32Check(input, output, MIRType::Double, flags,
-+ rejoin, trapSiteDesc);
-+}
-+
-+void MacroAssembler::oolWasmTruncateCheckF64ToI64(
-+ FloatRegister input, Register64 output, TruncFlags flags,
-+ const wasm::TrapSiteDesc& trapSiteDesc, Label* rejoin) {
-+ outOfLineWasmTruncateToInt64Check(input, output, MIRType::Double, flags,
-+ rejoin, trapSiteDesc);
-+}
-+
-+void MacroAssemblerPPC64Compat::outOfLineWasmTruncateToInt32Check(
-+ FloatRegister input, Register output, MIRType fromType, TruncFlags flags,
-+ Label* rejoin, const wasm::TrapSiteDesc& trapSiteDesc) {
-+ bool isUnsigned = flags & TRUNC_UNSIGNED;
-+ bool isSaturating = flags & TRUNC_SATURATING;
-+
-+ if (isSaturating) {
-+ ScratchDoubleScope fpscratch(asMasm());
-+ if (fromType == MIRType::Double) {
-+ asMasm().loadConstantDouble(0.0, fpscratch);
-+ } else {
-+ asMasm().loadConstantFloat32(0.0f, fpscratch);
-+ }
-+
-+ if (isUnsigned) {
-+ // If input < 0 or NaN, output = 0; else output = UINT32_MAX.
-+ Label notNegOrNaN;
-+ if (fromType == MIRType::Double) {
-+ asMasm().branchDouble(Assembler::DoubleGreaterThanOrEqual, input,
-+ fpscratch, ¬NegOrNaN);
-+ } else {
-+ asMasm().branchFloat(Assembler::DoubleGreaterThanOrEqual, input,
-+ fpscratch, ¬NegOrNaN);
-+ }
-+ asMasm().move32(Imm32(0), output);
-+ asMasm().jump(rejoin);
-+ asMasm().bind(¬NegOrNaN);
-+ asMasm().move32(Imm32(UINT32_MAX), output);
-+ } else {
-+ // Signed: NaN -> 0, negative overflow -> INT32_MIN,
-+ // positive overflow already saturated to INT32_MAX.
-+ Label notNaN, done;
-+ if (fromType == MIRType::Double) {
-+ asMasm().branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN);
-+ } else {
-+ asMasm().branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN);
-+ }
-+ asMasm().move32(Imm32(0), output);
-+ asMasm().jump(rejoin);
-+
-+ asMasm().bind(¬NaN);
-+ if (fromType == MIRType::Double) {
-+ asMasm().branchDouble(Assembler::DoubleGreaterThanOrEqual, input,
-+ fpscratch, rejoin);
-+ } else {
-+ asMasm().branchFloat(Assembler::DoubleGreaterThanOrEqual, input,
-+ fpscratch, rejoin);
-+ }
-+ asMasm().move32(Imm32(INT32_MIN), output);
-+ }
-+
-+ MOZ_ASSERT(rejoin->bound());
-+ asMasm().jump(rejoin);
-+ return;
-+ }
-+
-+ Label inputIsNaN;
-+ if (fromType == MIRType::Double) {
-+ asMasm().branchDouble(Assembler::DoubleUnordered, input, input,
-+ &inputIsNaN);
-+ } else {
-+ asMasm().branchFloat(Assembler::DoubleUnordered, input, input, &inputIsNaN);
-+ }
-+
-+ asMasm().wasmTrap(wasm::Trap::IntegerOverflow, trapSiteDesc);
-+ asMasm().bind(&inputIsNaN);
-+ asMasm().wasmTrap(wasm::Trap::InvalidConversionToInteger, trapSiteDesc);
-+}
-+
-+void MacroAssemblerPPC64Compat::outOfLineWasmTruncateToInt64Check(
-+ FloatRegister input, Register64 output_, MIRType fromType, TruncFlags flags,
-+ Label* rejoin, const wasm::TrapSiteDesc& trapSiteDesc) {
-+ bool isUnsigned = flags & TRUNC_UNSIGNED;
-+ bool isSaturating = flags & TRUNC_SATURATING;
-+
-+ if (isSaturating) {
-+ ScratchDoubleScope fpscratch(asMasm());
-+ Register output = output_.reg;
-+
-+ if (fromType == MIRType::Double) {
-+ asMasm().loadConstantDouble(0.0, fpscratch);
-+ } else {
-+ asMasm().loadConstantFloat32(0.0f, fpscratch);
-+ }
-+
-+ if (isUnsigned) {
-+ Label notNegOrNaN;
-+ if (fromType == MIRType::Double) {
-+ asMasm().branchDouble(Assembler::DoubleGreaterThanOrEqual, input,
-+ fpscratch, ¬NegOrNaN);
-+ } else {
-+ asMasm().branchFloat(Assembler::DoubleGreaterThanOrEqual, input,
-+ fpscratch, ¬NegOrNaN);
-+ }
-+ asMasm().movePtr(ImmWord(0), output);
-+ asMasm().jump(rejoin);
-+ asMasm().bind(¬NegOrNaN);
-+ asMasm().movePtr(ImmWord(UINT64_MAX), output);
-+ } else {
-+ Label notNaN;
-+ if (fromType == MIRType::Double) {
-+ asMasm().branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN);
-+ } else {
-+ asMasm().branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN);
-+ }
-+ asMasm().movePtr(ImmWord(0), output);
-+ asMasm().jump(rejoin);
-+
-+ asMasm().bind(¬NaN);
-+ if (fromType == MIRType::Double) {
-+ asMasm().branchDouble(Assembler::DoubleGreaterThanOrEqual, input,
-+ fpscratch, rejoin);
-+ } else {
-+ asMasm().branchFloat(Assembler::DoubleGreaterThanOrEqual, input,
-+ fpscratch, rejoin);
-+ }
-+ asMasm().movePtr(ImmWord(INT64_MIN), output);
-+ }
-+
-+ MOZ_ASSERT(rejoin->bound());
-+ asMasm().jump(rejoin);
-+ return;
-+ }
-+
-+ Label inputIsNaN;
-+ if (fromType == MIRType::Double) {
-+ asMasm().branchDouble(Assembler::DoubleUnordered, input, input,
-+ &inputIsNaN);
-+ } else {
-+ asMasm().branchFloat(Assembler::DoubleUnordered, input, input, &inputIsNaN);
-+ }
-+
-+ asMasm().wasmTrap(wasm::Trap::IntegerOverflow, trapSiteDesc);
-+ asMasm().bind(&inputIsNaN);
-+ asMasm().wasmTrap(wasm::Trap::InvalidConversionToInteger, trapSiteDesc);
-+}
-+
-+void MacroAssembler::PopStackPtr() {
-+ loadPtr(Address(StackPointer, 0), StackPointer);
-+ adjustFrame(-int32_t(sizeof(intptr_t)));
-+}
-+
-+void MacroAssembler::patchSub32FromMemAndBranchIfNegative(CodeOffset offset,
-+ Imm32 imm) {
-+ int32_t val = imm.value;
-+ MOZ_RELEASE_ASSERT(val >= 1 && val <= 127);
-+ // Patch the addi instruction that's right before patchPoint.
-+ // addi is 1 instruction before the CodeOffset (which is after the addi).
-+ Instruction* inst =
-+ (Instruction*)m_buffer.getInst(BufferOffset(offset.offset() - 4));
-+ // Rewrite the immediate field to -val.
-+ // PPC addi: opcode(6) | RT(5) | RA(5) | SI(16)
-+ uint32_t instWord = inst->encode();
-+ uint32_t base = instWord & 0xffff0000;
-+ inst->setData(base | (uint16_t)(-val & 0xffff));
-+}
-+
-+void MacroAssembler::wasmTruncateDoubleToInt32(FloatRegister input,
-+ Register output,
-+ bool isSaturating,
-+ Label* oolEntry) {
-+ ScratchDoubleScope fpscratch(asMasm());
-+ // Clear VXCVI (bit 23) before the conversion so we can detect overflow.
-+ as_mtfsb0(23);
-+ as_fctiwz(fpscratch, input);
-+ as_mfvsrd(output, fpscratch);
-+ as_extsw(output, output);
-+ // Move FPSCR field 5 (which contains VXCVI) to CR0.
-+ // If the conversion was invalid (NaN or out-of-range), VXCVI=1 → SO set.
-+ as_mcrfs(cr0, 5);
-+ ma_b(SOBit, oolEntry);
-+}
-+
-+void MacroAssembler::wasmTruncateDoubleToUInt32(FloatRegister input,
-+ Register output,
-+ bool isSaturating,
-+ Label* oolEntry) {
-+ ScratchDoubleScope fpscratch(asMasm());
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ // Always check for NaN — the ool handler clamps for saturating mode.
-+ as_fcmpu(input, input);
-+ ma_b(DoubleUnordered, oolEntry);
-+ as_fctidz(fpscratch, input);
-+ as_mfvsrd(output, fpscratch);
-+ x_srdi(scratch, output, 32);
-+ as_extsw(output, output);
-+ as_cmpdi(scratch, 0);
-+ ma_b(NotEqual, oolEntry);
-+}
-+
-+void MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input,
-+ Register output,
-+ bool isSaturating,
-+ Label* oolEntry) {
-+ ScratchDoubleScope fpscratch(asMasm());
-+ as_mtfsb0(23);
-+ as_fctiwz(fpscratch, input);
-+ as_mfvsrd(output, fpscratch);
-+ as_extsw(output, output);
-+ as_mcrfs(cr0, 5);
-+ ma_b(SOBit, oolEntry);
-+}
-+
-+void MacroAssembler::wasmTruncateFloat32ToUInt32(FloatRegister input,
-+ Register output,
-+ bool isSaturating,
-+ Label* oolEntry) {
-+ ScratchDoubleScope fpscratch(asMasm());
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+ as_fcmpu(input, input);
-+ ma_b(DoubleUnordered, oolEntry);
-+ as_fctidz(fpscratch, input);
-+ as_mfvsrd(output, fpscratch);
-+ x_srdi(scratch, output, 32);
-+ as_extsw(output, output);
-+ as_cmpdi(scratch, 0);
-+ ma_b(NotEqual, oolEntry);
-+}
-+
-+void MacroAssembler::wasmTruncateDoubleToInt64(
-+ FloatRegister input, Register64 output, bool isSaturating, Label* oolEntry,
-+ Label* oolRejoin, FloatRegister tempDouble) {
-+ MOZ_ASSERT(tempDouble.isInvalid());
-+ ScratchDoubleScope fpscratch(asMasm());
-+ as_mtfsb0(23);
-+ as_fctidz(fpscratch, input);
-+ as_mfvsrd(output.reg, fpscratch);
-+ as_mcrfs(cr0, 5);
-+ ma_b(SOBit, oolEntry);
-+ if (isSaturating) {
-+ bind(oolRejoin);
-+ }
-+}
-+
-+void MacroAssembler::wasmTruncateFloat32ToInt64(
-+ FloatRegister input, Register64 output, bool isSaturating, Label* oolEntry,
-+ Label* oolRejoin, FloatRegister tempFloat) {
-+ MOZ_ASSERT(tempFloat.isInvalid());
-+ ScratchDoubleScope fpscratch(asMasm());
-+ as_mtfsb0(23);
-+ as_fctidz(fpscratch, input);
-+ as_mfvsrd(output.reg, fpscratch);
-+ as_mcrfs(cr0, 5);
-+ ma_b(SOBit, oolEntry);
-+ if (isSaturating) {
-+ bind(oolRejoin);
-+ }
-+}
-+
-+void MacroAssembler::wasmTruncateDoubleToUInt64(
-+ FloatRegister input, Register64 output, bool isSaturating, Label* oolEntry,
-+ Label* oolRejoin, FloatRegister tempDouble) {
-+ MOZ_ASSERT(tempDouble.isInvalid());
-+ ScratchDoubleScope fpscratch(asMasm());
-+ as_mtfsb0(23);
-+ as_fctiduz(fpscratch, input);
-+ as_mfvsrd(output.reg, fpscratch);
-+ as_mcrfs(cr0, 5);
-+ ma_b(SOBit, oolEntry);
-+ if (isSaturating) {
-+ bind(oolRejoin);
-+ }
-+}
-+
-+void MacroAssembler::wasmTruncateFloat32ToUInt64(
-+ FloatRegister input, Register64 output, bool isSaturating, Label* oolEntry,
-+ Label* oolRejoin, FloatRegister tempFloat) {
-+ MOZ_ASSERT(tempFloat.isInvalid());
-+ ScratchDoubleScope fpscratch(asMasm());
-+ as_mtfsb0(23);
-+ as_fctiduz(fpscratch, input);
-+ as_mfvsrd(output.reg, fpscratch);
-+ as_mcrfs(cr0, 5);
-+ ma_b(SOBit, oolEntry);
-+ if (isSaturating) {
-+ bind(oolRejoin);
-+ }
-+}
-+
-+void MacroAssemblerPPC64Compat::profilerEnterFrame(Register framePtr,
-+ Register scratch) {
-+ asMasm().loadJSContext(scratch);
-+ loadPtr(Address(scratch, offsetof(JSContext, profilingActivation_)), scratch);
-+ storePtr(framePtr,
-+ Address(scratch, JitActivation::offsetOfLastProfilingFrame()));
-+ storePtr(ImmPtr(nullptr),
-+ Address(scratch, JitActivation::offsetOfLastProfilingCallSite()));
-+}
-+
-+void MacroAssemblerPPC64Compat::profilerExitFrame() {
-+ jump(asMasm().runtime()->jitRuntime()->getProfilerExitFrameTail());
-+}
-+
-+void MacroAssemblerPPC64Compat::ma_mod_mask(Register src, Register dest,
-+ Register hold, Register remain,
-+ int32_t shift, Label* negZero) {
-+ // Compute x % ((1<<shift) - 1) by digit-summing in base b = 1<<shift.
-+ // Since b % (b-1) == 1, x % (b-1) == sum of base-b digits of x, mod (b-1).
-+ int32_t mask = (1 << shift) - 1;
-+ Label head, negative, sumSigned, done;
-+
-+ as_or_(remain, src, src); // move src -> remain
-+ xs_li(dest, 0);
-+
-+ // Check sign (32-bit signed comparison)
-+ as_cmpwi(remain, 0);
-+ ma_b(Assembler::LessThan, &negative);
-+ xs_li(hold, 1);
-+ jump(&head);
-+
-+ bind(&negative);
-+ xs_li(hold, -1);
-+ as_neg(remain, remain);
-+ as_rldicl(remain, remain, 0, 32);
-+
-+ bind(&head);
-+ {
-+ UseScratchRegisterScope temps(asMasm());
-+ Register scratch = temps.Acquire();
-+
-+ // Extract bottom 'shift' bits: scratch = remain & mask
-+ move32(Imm32(mask), scratch);
-+ as_and_(scratch, remain, scratch);
-+
-+ // Add to accumulator
-+ as_add(dest, dest, scratch);
-+
-+ // Trial subtraction: scratch = dest - mask
-+ move32(Imm32(mask), scratch);
-+ as_subf(scratch, scratch, dest); // scratch = dest - scratch
-+
-+ // If (dest - mask) > 0, keep the subtracted value
-+ as_cmpwi(scratch, 0);
-+ ma_b(Assembler::LessThan, &sumSigned);
-+ as_or_(dest, scratch, scratch); // dest = scratch
-+ bind(&sumSigned);
-+
-+ // Shift out the bits we just processed
-+ x_srwi(remain, remain, shift);
-+
-+ // Continue if remain != 0
-+ as_cmpwi(remain, 0);
-+ ma_b(Assembler::NotEqual, &head);
-+ }
-+
-+ // If input was negative, negate result
-+ as_cmpwi(hold, 0);
-+ ma_b(Assembler::GreaterThanOrEqual, &done);
-+
-+ if (negZero != nullptr) {
-+ as_cmpwi(dest, 0);
-+ ma_b(Assembler::Equal, negZero);
-+ }
-+
-+ as_neg(dest, dest);
-+ as_extsw(dest, dest);
-+
-+ bind(&done);
-+}
-+
-+// ========================================================================
-+// Atomic operations.
-+
-+template <typename T>
-+static void CompareExchange(MacroAssembler& masm,
-+ const wasm::MemoryAccessDesc* access,
-+ Scalar::Type type, Synchronization sync,
-+ const T& mem, Register oldval, Register newval,
-+ Register valueTemp, Register offsetTemp,
-+ Register maskTemp, Register output) {
-+ UseScratchRegisterScope temps(masm);
-+ bool signExtend = Scalar::isSignedIntType(type);
-+ unsigned nbytes = Scalar::byteSize(type);
-+
-+ switch (nbytes) {
-+ case 1:
-+ case 2:
-+ break;
-+ case 4:
-+ MOZ_ASSERT(valueTemp == InvalidReg);
-+ MOZ_ASSERT(offsetTemp == InvalidReg);
-+ MOZ_ASSERT(maskTemp == InvalidReg);
-+ break;
-+ default:
-+ MOZ_CRASH();
-+ }
-+
-+ Label again, end;
-+
-+ Register scratch = temps.Acquire();
-+ masm.computeEffectiveAddress(mem, scratch);
-+
-+ if (nbytes == 4) {
-+ masm.memoryBarrierBefore(sync);
-+ masm.bind(&again);
-+
-+ if (access) {
-+ masm.flushBuffer(); // see comment in wasmLoadImpl
-+ masm.append(*access, wasm::TrapMachineInsn::Atomic,
-+ FaultingCodeOffset(masm.currentOffset()));
-+ }
-+
-+ masm.as_lwarx(output, r0, scratch);
-+ // ma_cmp(..., is32bit=true) emits cmpw, which compares only bits
-+ // 32:63 (low 32) of both operands per ISA v3.0B. The upper
-+ // 32 bits of oldval are ignored, so no canonicalising extsw needed.
-+ masm.ma_cmp(output, oldval, Assembler::NotEqual, /* is32bit */ true);
-+ masm.ma_b(Assembler::NotEqual, &end);
-+ masm.as_stwcx(newval, r0, scratch);
-+ masm.ma_b(Assembler::NotEqual, &again);
-+
-+ masm.memoryBarrierAfter(sync);
-+ masm.bind(&end);
-+ // lwarx zero-extends; sign-extend for 32-bit canonical form.
-+ masm.as_extsw(output, output);
-+
-+ return;
-+ }
-+
-+ // Sub-word (1 or 2 byte) compare-exchange via native lbarx/lharx +
-+ // stbcx./sthcx. POWER7+ (well below our POWER8 baseline). Replaces the prior
-+ // round-down-to-word
-+ // + mask + RMW dance. lXarx zero-extends the loaded byte/half; stXcx. stores
-+ // only the low 8/16 bits of RS, so no pre-masking is needed on the store
-+ // side. offsetTemp / maskTemp are still allocated by the lowering but unused
-+ // here.
-+ (void)offsetTemp;
-+ (void)maskTemp;
-+
-+ masm.memoryBarrierBefore(sync);
-+
-+ masm.bind(&again);
-+
-+ if (access) {
-+ masm.flushBuffer(); // see comment in wasmLoadImpl
-+ masm.append(*access, wasm::TrapMachineInsn::Atomic,
-+ FaultingCodeOffset(masm.currentOffset()));
-+ }
-+
-+ switch (nbytes) {
-+ case 1:
-+ masm.as_lbarx(output, r0, scratch);
-+ if (signExtend) {
-+ masm.as_extsb(valueTemp, oldval);
-+ masm.as_extsb(output, output);
-+ } else {
-+ masm.as_andi_rc(valueTemp, oldval, 0xff);
-+ }
-+ break;
-+ case 2:
-+ masm.as_lharx(output, r0, scratch);
-+ if (signExtend) {
-+ masm.as_extsh(valueTemp, oldval);
-+ masm.as_extsh(output, output);
-+ } else {
-+ masm.as_rlwinm(valueTemp, oldval, 0, 16, 31);
-+ }
-+ break;
-+ }
-+
-+ masm.ma_cmp(output, valueTemp, Assembler::NotEqual, /* is32bit */ true);
-+ masm.ma_b(Assembler::NotEqual, &end);
-+
-+ if (nbytes == 1) {
-+ masm.as_stbcx(newval, r0, scratch);
-+ } else {
-+ masm.as_sthcx(newval, r0, scratch);
-+ }
-+ masm.ma_b(Assembler::NotEqual, &again);
-+
-+ masm.memoryBarrierAfter(sync);
-+
-+ masm.bind(&end);
-+}
-+
-+template <typename T>
-+static void CompareExchange64(MacroAssembler& masm,
-+ const wasm::MemoryAccessDesc* access,
-+ Synchronization sync, const T& mem,
-+ Register64 expect, Register64 replace,
-+ Register64 output) {
-+ MOZ_ASSERT(expect != output && replace != output);
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.computeEffectiveAddress(mem, scratch);
-+
-+ Label tryAgain;
-+ Label exit;
-+
-+ masm.memoryBarrierBefore(sync);
-+
-+ masm.bind(&tryAgain);
-+
-+ if (access) {
-+ masm.flushBuffer(); // see comment in wasmLoadImpl
-+ masm.append(*access, wasm::TrapMachineInsn::Atomic,
-+ FaultingCodeOffset(masm.currentOffset()));
-+ }
-+
-+ masm.as_ldarx(output.reg, r0, scratch);
-+
-+ masm.ma_cmp(output.reg, expect.reg, Assembler::NotEqual);
-+ masm.ma_b(Assembler::NotEqual, &exit);
-+ masm.as_stdcx(replace.reg, r0, scratch);
-+ masm.ma_b(Assembler::NotEqual, &tryAgain);
-+
-+ masm.memoryBarrierAfter(sync);
-+
-+ masm.bind(&exit);
-+}
-+
-+template <typename T>
-+static void AtomicExchange(MacroAssembler& masm,
-+ const wasm::MemoryAccessDesc* access,
-+ Scalar::Type type, Synchronization sync,
-+ const T& mem, Register value, Register valueTemp,
-+ Register offsetTemp, Register maskTemp,
-+ Register output) {
-+ UseScratchRegisterScope temps(masm);
-+ bool signExtend = Scalar::isSignedIntType(type);
-+ unsigned nbytes = Scalar::byteSize(type);
-+
-+ switch (nbytes) {
-+ case 1:
-+ case 2:
-+ break;
-+ case 4:
-+ MOZ_ASSERT(valueTemp == InvalidReg);
-+ MOZ_ASSERT(offsetTemp == InvalidReg);
-+ MOZ_ASSERT(maskTemp == InvalidReg);
-+ break;
-+ default:
-+ MOZ_CRASH();
-+ }
-+
-+ Label again;
-+
-+ Register memTemp = temps.Acquire();
-+ masm.computeEffectiveAddress(mem, memTemp);
-+
-+ if (nbytes == 4) {
-+ masm.memoryBarrierBefore(sync);
-+ masm.bind(&again);
-+
-+ if (access) {
-+ masm.flushBuffer(); // see comment in wasmLoadImpl
-+ masm.append(*access, wasm::TrapMachineInsn::Atomic,
-+ FaultingCodeOffset(masm.currentOffset()));
-+ }
-+
-+ masm.as_lwarx(output, r0, memTemp);
-+ masm.as_stwcx(value, r0, memTemp);
-+ masm.ma_b(Assembler::NotEqual, &again);
-+
-+ masm.memoryBarrierAfter(sync);
-+ // lwarx zero-extends; sign-extend for 32-bit canonical form.
-+ masm.as_extsw(output, output);
-+
-+ return;
-+ }
-+
-+ // Sub-word exchange via native lbarx/lharx + stbcx./sthcx. (POWER7+).
-+ // valueTemp / offsetTemp / maskTemp are still allocated by the lowering but
-+ // unused here.
-+ (void)valueTemp;
-+ (void)offsetTemp;
-+ (void)maskTemp;
-+
-+ masm.memoryBarrierBefore(sync);
-+
-+ masm.bind(&again);
-+
-+ if (access) {
-+ masm.flushBuffer(); // see comment in wasmLoadImpl
-+ masm.append(*access, wasm::TrapMachineInsn::Atomic,
-+ FaultingCodeOffset(masm.currentOffset()));
-+ }
-+
-+ if (nbytes == 1) {
-+ masm.as_lbarx(output, r0, memTemp);
-+ masm.as_stbcx(value, r0, memTemp);
-+ } else {
-+ masm.as_lharx(output, r0, memTemp);
-+ masm.as_sthcx(value, r0, memTemp);
-+ }
-+ masm.ma_b(Assembler::NotEqual, &again);
-+
-+ if (signExtend) {
-+ if (nbytes == 1) {
-+ masm.as_extsb(output, output);
-+ } else {
-+ masm.as_extsh(output, output);
-+ }
-+ }
-+ // Unsigned: lbarx/lharx already zero-extend; output is canonical.
-+
-+ masm.memoryBarrierAfter(sync);
-+}
-+
-+template <typename T>
-+static void AtomicExchange64(MacroAssembler& masm,
-+ const wasm::MemoryAccessDesc* access,
-+ Synchronization sync, const T& mem,
-+ Register64 value, Register64 output) {
-+ MOZ_ASSERT(value != output);
-+ UseScratchRegisterScope temps(masm);
-+
-+ Register scratch = temps.Acquire();
-+ masm.computeEffectiveAddress(mem, scratch);
-+
-+ Label tryAgain;
-+
-+ masm.memoryBarrierBefore(sync);
-+
-+ masm.bind(&tryAgain);
-+
-+ if (access) {
-+ masm.flushBuffer(); // see comment in wasmLoadImpl
-+ masm.append(*access, wasm::TrapMachineInsn::Atomic,
-+ FaultingCodeOffset(masm.currentOffset()));
-+ }
-+
-+ masm.as_ldarx(output.reg, r0, scratch);
-+
-+ masm.as_stdcx(value.reg, r0, scratch);
-+ masm.ma_b(Assembler::NotEqual, &tryAgain);
-+
-+ masm.memoryBarrierAfter(sync);
-+}
-+
-+template <typename T>
-+static void AtomicFetchOp(MacroAssembler& masm,
-+ const wasm::MemoryAccessDesc* access,
-+ Scalar::Type type, Synchronization sync, AtomicOp op,
-+ const T& mem, Register value, Register valueTemp,
-+ Register offsetTemp, Register maskTemp,
-+ Register output) {
-+ UseScratchRegisterScope temps(masm);
-+ bool signExtend = Scalar::isSignedIntType(type);
-+ unsigned nbytes = Scalar::byteSize(type);
-+
-+ switch (nbytes) {
-+ case 1:
-+ case 2:
-+ break;
-+ case 4:
-+ MOZ_ASSERT(valueTemp == InvalidReg);
-+ MOZ_ASSERT(offsetTemp == InvalidReg);
-+ MOZ_ASSERT(maskTemp == InvalidReg);
-+ break;
-+ default:
-+ MOZ_CRASH();
-+ }
-+
-+ Label again;
-+
-+ Register memTemp = temps.Acquire();
-+ masm.computeEffectiveAddress(mem, memTemp);
-+
-+ Register scratch = temps.Acquire();
-+
-+ if (nbytes == 4) {
-+ masm.memoryBarrierBefore(sync);
-+ masm.bind(&again);
-+
-+ if (access) {
-+ masm.flushBuffer(); // see comment in wasmLoadImpl
-+ masm.append(*access, wasm::TrapMachineInsn::Atomic,
-+ FaultingCodeOffset(masm.currentOffset()));
-+ }
-+
-+ masm.as_lwarx(output, r0, memTemp);
-+
-+ switch (op) {
-+ case AtomicOp::Add:
-+ masm.as_add(scratch, output, value);
-+ break;
-+ case AtomicOp::Sub:
-+ masm.as_subf(scratch, value, output);
-+ break;
-+ case AtomicOp::And:
-+ masm.as_and_(scratch, output, value);
-+ break;
-+ case AtomicOp::Or:
-+ masm.as_or_(scratch, output, value);
-+ break;
-+ case AtomicOp::Xor:
-+ masm.as_xor_(scratch, output, value);
-+ break;
-+ default:
-+ MOZ_CRASH();
-+ }
-+
-+ masm.as_stwcx(scratch, r0, memTemp);
-+ masm.ma_b(Assembler::NotEqual, &again);
-+
-+ masm.memoryBarrierAfter(sync);
-+ // lwarx zero-extends; sign-extend for 32-bit canonical form.
-+ masm.as_extsw(output, output);
-+
-+ return;
-+ }
-+
-+ // Sub-word fetch-and-op via native lbarx/lharx + stbcx./sthcx. (POWER7+).
-+ // `output` holds the pre-op loaded value (returned to caller); `valueTemp`
-+ // is the post-op value we condition-store. stXcx. only stores low 8/16 bits
-+ // of RS, so no pre-mask of valueTemp is needed.
-+ // offsetTemp / maskTemp are still allocated by the lowering but unused; the
-+ // local `scratch` is only used in the 4-byte branch above.
-+ (void)offsetTemp;
-+ (void)maskTemp;
-+
-+ masm.memoryBarrierBefore(sync);
-+
-+ masm.bind(&again);
-+
-+ if (access) {
-+ masm.flushBuffer(); // see comment in wasmLoadImpl
-+ masm.append(*access, wasm::TrapMachineInsn::Atomic,
-+ FaultingCodeOffset(masm.currentOffset()));
-+ }
-+
-+ if (nbytes == 1) {
-+ masm.as_lbarx(output, r0, memTemp);
-+ } else {
-+ masm.as_lharx(output, r0, memTemp);
-+ }
-+
-+ switch (op) {
-+ case AtomicOp::Add:
-+ masm.as_add(valueTemp, output, value);
-+ break;
-+ case AtomicOp::Sub:
-+ masm.as_subf(valueTemp, value, output);
-+ break;
-+ case AtomicOp::And:
-+ masm.as_and_(valueTemp, output, value);
-+ break;
-+ case AtomicOp::Or:
-+ masm.as_or_(valueTemp, output, value);
-+ break;
-+ case AtomicOp::Xor:
-+ masm.as_xor_(valueTemp, output, value);
-+ break;
-+ default:
-+ MOZ_CRASH();
-+ }
-+
-+ if (nbytes == 1) {
-+ masm.as_stbcx(valueTemp, r0, memTemp);
-+ } else {
-+ masm.as_sthcx(valueTemp, r0, memTemp);
-+ }
-+ masm.ma_b(Assembler::NotEqual, &again);
-+
-+ if (signExtend) {
-+ if (nbytes == 1) {
-+ masm.as_extsb(output, output);
-+ } else {
-+ masm.as_extsh(output, output);
-+ }
-+ }
-+ // Unsigned: lbarx/lharx already zero-extend; output is canonical.
-+
-+ masm.memoryBarrierAfter(sync);
-+}
-+
-+template <typename T>
-+static void AtomicFetchOp64(MacroAssembler& masm,
-+ const wasm::MemoryAccessDesc* access,
-+ Synchronization sync, AtomicOp op, Register64 value,
-+ const T& mem, Register64 temp, Register64 output) {
-+ MOZ_ASSERT(value != output);
-+ MOZ_ASSERT(value != temp);
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.computeEffectiveAddress(mem, scratch);
-+
-+ Label tryAgain;
-+
-+ masm.memoryBarrierBefore(sync);
-+
-+ masm.bind(&tryAgain);
-+
-+ if (access) {
-+ masm.flushBuffer(); // see comment in wasmLoadImpl
-+ masm.append(*access, wasm::TrapMachineInsn::Atomic,
-+ FaultingCodeOffset(masm.currentOffset()));
-+ }
-+
-+ masm.as_ldarx(output.reg, r0, scratch);
-+
-+ switch (op) {
-+ case AtomicOp::Add:
-+ masm.as_add(temp.reg, output.reg, value.reg);
-+ break;
-+ case AtomicOp::Sub:
-+ masm.as_subf(temp.reg, value.reg, output.reg);
-+ break;
-+ case AtomicOp::And:
-+ masm.as_and_(temp.reg, output.reg, value.reg);
-+ break;
-+ case AtomicOp::Or:
-+ masm.as_or_(temp.reg, output.reg, value.reg);
-+ break;
-+ case AtomicOp::Xor:
-+ masm.as_xor_(temp.reg, output.reg, value.reg);
-+ break;
-+ default:
-+ MOZ_CRASH();
-+ }
-+
-+ masm.as_stdcx(temp.reg, r0, scratch);
-+ masm.ma_b(Assembler::NotEqual, &tryAgain);
-+
-+ masm.memoryBarrierAfter(sync);
-+}
-+
-+template <typename T>
-+static void AtomicEffectOp(MacroAssembler& masm,
-+ const wasm::MemoryAccessDesc* access,
-+ Scalar::Type type, Synchronization sync, AtomicOp op,
-+ const T& mem, Register value, Register valueTemp,
-+ Register offsetTemp, Register maskTemp) {
-+ UseScratchRegisterScope temps(masm);
-+ unsigned nbytes = Scalar::byteSize(type);
-+
-+ switch (nbytes) {
-+ case 1:
-+ case 2:
-+ break;
-+ case 4:
-+ MOZ_ASSERT(valueTemp == InvalidReg);
-+ MOZ_ASSERT(offsetTemp == InvalidReg);
-+ MOZ_ASSERT(maskTemp == InvalidReg);
-+ break;
-+ default:
-+ MOZ_CRASH();
-+ }
-+
-+ Label again;
-+
-+ Register scratch = temps.Acquire();
-+ masm.computeEffectiveAddress(mem, scratch);
-+
-+ Register scratch2 = temps.Acquire();
-+
-+ if (nbytes == 4) {
-+ masm.memoryBarrierBefore(sync);
-+ masm.bind(&again);
-+
-+ if (access) {
-+ masm.flushBuffer(); // see comment in wasmLoadImpl
-+ masm.append(*access, wasm::TrapMachineInsn::Atomic,
-+ FaultingCodeOffset(masm.currentOffset()));
-+ }
-+
-+ masm.as_lwarx(scratch2, r0, scratch);
-+
-+ switch (op) {
-+ case AtomicOp::Add:
-+ masm.as_add(scratch2, scratch2, value);
-+ break;
-+ case AtomicOp::Sub:
-+ masm.as_subf(scratch2, value, scratch2);
-+ break;
-+ case AtomicOp::And:
-+ masm.as_and_(scratch2, scratch2, value);
-+ break;
-+ case AtomicOp::Or:
-+ masm.as_or_(scratch2, scratch2, value);
-+ break;
-+ case AtomicOp::Xor:
-+ masm.as_xor_(scratch2, scratch2, value);
-+ break;
-+ default:
-+ MOZ_CRASH();
-+ }
-+
-+ masm.as_stwcx(scratch2, r0, scratch);
-+ masm.ma_b(Assembler::NotEqual, &again);
-+
-+ masm.memoryBarrierAfter(sync);
-+
-+ return;
-+ }
-+
-+ // Sub-word effect-only op via native lbarx/lharx + stbcx./sthcx. (POWER7+).
-+ // No output to return; scratch2 holds the load+op+store value.
-+ // valueTemp / offsetTemp / maskTemp are still allocated by the lowering but
-+ // unused here.
-+ (void)valueTemp;
-+ (void)offsetTemp;
-+ (void)maskTemp;
-+
-+ masm.memoryBarrierBefore(sync);
-+
-+ masm.bind(&again);
-+
-+ if (access) {
-+ masm.flushBuffer(); // see comment in wasmLoadImpl
-+ masm.append(*access, wasm::TrapMachineInsn::Atomic,
-+ FaultingCodeOffset(masm.currentOffset()));
-+ }
-+
-+ if (nbytes == 1) {
-+ masm.as_lbarx(scratch2, r0, scratch);
-+ } else {
-+ masm.as_lharx(scratch2, r0, scratch);
-+ }
-+
-+ switch (op) {
-+ case AtomicOp::Add:
-+ masm.as_add(scratch2, scratch2, value);
-+ break;
-+ case AtomicOp::Sub:
-+ masm.as_subf(scratch2, value, scratch2);
-+ break;
-+ case AtomicOp::And:
-+ masm.as_and_(scratch2, scratch2, value);
-+ break;
-+ case AtomicOp::Or:
-+ masm.as_or_(scratch2, scratch2, value);
-+ break;
-+ case AtomicOp::Xor:
-+ masm.as_xor_(scratch2, scratch2, value);
-+ break;
-+ default:
-+ MOZ_CRASH();
-+ }
-+
-+ if (nbytes == 1) {
-+ masm.as_stbcx(scratch2, r0, scratch);
-+ } else {
-+ masm.as_sthcx(scratch2, r0, scratch);
-+ }
-+ masm.ma_b(Assembler::NotEqual, &again);
-+
-+ masm.memoryBarrierAfter(sync);
-+}
-+
-+// Public MacroAssembler methods.
-+
-+void MacroAssembler::compareExchange(Scalar::Type type, Synchronization sync,
-+ const Address& mem, Register oldval,
-+ Register newval, Register valueTemp,
-+ Register offsetTemp, Register maskTemp,
-+ Register output) {
-+ CompareExchange(*this, nullptr, type, sync, mem, oldval, newval, valueTemp,
-+ offsetTemp, maskTemp, output);
-+}
-+
-+void MacroAssembler::compareExchange(Scalar::Type type, Synchronization sync,
-+ const BaseIndex& mem, Register oldval,
-+ Register newval, Register valueTemp,
-+ Register offsetTemp, Register maskTemp,
-+ Register output) {
-+ CompareExchange(*this, nullptr, type, sync, mem, oldval, newval, valueTemp,
-+ offsetTemp, maskTemp, output);
-+}
-+
-+void MacroAssembler::compareExchange64(Synchronization sync, const Address& mem,
-+ Register64 expect, Register64 replace,
-+ Register64 output) {
-+ CompareExchange64(*this, nullptr, sync, mem, expect, replace, output);
-+}
-+
-+void MacroAssembler::compareExchange64(Synchronization sync,
-+ const BaseIndex& mem, Register64 expect,
-+ Register64 replace, Register64 output) {
-+ CompareExchange64(*this, nullptr, sync, mem, expect, replace, output);
-+}
-+
-+void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access,
-+ const Address& mem, Register oldval,
-+ Register newval, Register valueTemp,
-+ Register offsetTemp, Register maskTemp,
-+ Register output) {
-+ CompareExchange(*this, &access, access.type(), access.sync(), mem, oldval,
-+ newval, valueTemp, offsetTemp, maskTemp, output);
-+}
-+
-+void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access,
-+ const BaseIndex& mem, Register oldval,
-+ Register newval, Register valueTemp,
-+ Register offsetTemp, Register maskTemp,
-+ Register output) {
-+ CompareExchange(*this, &access, access.type(), access.sync(), mem, oldval,
-+ newval, valueTemp, offsetTemp, maskTemp, output);
-+}
-+
-+void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
-+ const Address& mem,
-+ Register64 expect,
-+ Register64 replace,
-+ Register64 output) {
-+ CompareExchange64(*this, &access, access.sync(), mem, expect, replace,
-+ output);
-+}
-+
-+void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
-+ const BaseIndex& mem,
-+ Register64 expect,
-+ Register64 replace,
-+ Register64 output) {
-+ CompareExchange64(*this, &access, access.sync(), mem, expect, replace,
-+ output);
-+}
-+
-+void MacroAssembler::atomicExchange(Scalar::Type type, Synchronization sync,
-+ const Address& mem, Register value,
-+ Register valueTemp, Register offsetTemp,
-+ Register maskTemp, Register output) {
-+ AtomicExchange(*this, nullptr, type, sync, mem, value, valueTemp, offsetTemp,
-+ maskTemp, output);
-+}
-+
-+void MacroAssembler::atomicExchange(Scalar::Type type, Synchronization sync,
-+ const BaseIndex& mem, Register value,
-+ Register valueTemp, Register offsetTemp,
-+ Register maskTemp, Register output) {
-+ AtomicExchange(*this, nullptr, type, sync, mem, value, valueTemp, offsetTemp,
-+ maskTemp, output);
-+}
-+
-+void MacroAssembler::atomicExchange64(Synchronization sync, const Address& mem,
-+ Register64 value, Register64 output) {
-+ AtomicExchange64(*this, nullptr, sync, mem, value, output);
-+}
-+
-+void MacroAssembler::atomicExchange64(Synchronization sync,
-+ const BaseIndex& mem, Register64 value,
-+ Register64 output) {
-+ AtomicExchange64(*this, nullptr, sync, mem, value, output);
-+}
-+
-+void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
-+ const Address& mem, Register value,
-+ Register valueTemp, Register offsetTemp,
-+ Register maskTemp, Register output) {
-+ AtomicExchange(*this, &access, access.type(), access.sync(), mem, value,
-+ valueTemp, offsetTemp, maskTemp, output);
-+}
-+
-+void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
-+ const BaseIndex& mem, Register value,
-+ Register valueTemp, Register offsetTemp,
-+ Register maskTemp, Register output) {
-+ AtomicExchange(*this, &access, access.type(), access.sync(), mem, value,
-+ valueTemp, offsetTemp, maskTemp, output);
-+}
-+
-+template <typename T>
-+static void WasmAtomicExchange64(MacroAssembler& masm,
-+ const wasm::MemoryAccessDesc& access,
-+ const T& mem, Register64 value,
-+ Register64 output) {
-+ AtomicExchange64(masm, &access, access.sync(), mem, value, output);
-+}
-+
-+void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
-+ const Address& mem, Register64 src,
-+ Register64 output) {
-+ WasmAtomicExchange64(*this, access, mem, src, output);
-+}
-+
-+void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
-+ const BaseIndex& mem, Register64 src,
-+ Register64 output) {
-+ WasmAtomicExchange64(*this, access, mem, src, output);
-+}
-+
-+void MacroAssembler::atomicFetchOp(Scalar::Type type, Synchronization sync,
-+ AtomicOp op, Register value,
-+ const Address& mem, Register valueTemp,
-+ Register offsetTemp, Register maskTemp,
-+ Register output) {
-+ AtomicFetchOp(*this, nullptr, type, sync, op, mem, value, valueTemp,
-+ offsetTemp, maskTemp, output);
-+}
-+
-+void MacroAssembler::atomicFetchOp(Scalar::Type type, Synchronization sync,
-+ AtomicOp op, Register value,
-+ const BaseIndex& mem, Register valueTemp,
-+ Register offsetTemp, Register maskTemp,
-+ Register output) {
-+ AtomicFetchOp(*this, nullptr, type, sync, op, mem, value, valueTemp,
-+ offsetTemp, maskTemp, output);
-+}
-+
-+void MacroAssembler::atomicFetchOp64(Synchronization sync, AtomicOp op,
-+ Register64 value, const Address& mem,
-+ Register64 temp, Register64 output) {
-+ AtomicFetchOp64(*this, nullptr, sync, op, value, mem, temp, output);
-+}
-+
-+void MacroAssembler::atomicFetchOp64(Synchronization sync, AtomicOp op,
-+ Register64 value, const BaseIndex& mem,
-+ Register64 temp, Register64 output) {
-+ AtomicFetchOp64(*this, nullptr, sync, op, value, mem, temp, output);
-+}
-+
-+void MacroAssembler::atomicEffectOp64(Synchronization sync, AtomicOp op,
-+ Register64 value, const Address& mem,
-+ Register64 temp) {
-+ AtomicFetchOp64(*this, nullptr, sync, op, value, mem, temp, temp);
-+}
-+
-+void MacroAssembler::atomicEffectOp64(Synchronization sync, AtomicOp op,
-+ Register64 value, const BaseIndex& mem,
-+ Register64 temp) {
-+ AtomicFetchOp64(*this, nullptr, sync, op, value, mem, temp, temp);
-+}
-+
-+void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access,
-+ AtomicOp op, Register value,
-+ const Address& mem, Register valueTemp,
-+ Register offsetTemp, Register maskTemp,
-+ Register output) {
-+ AtomicFetchOp(*this, &access, access.type(), access.sync(), op, mem, value,
-+ valueTemp, offsetTemp, maskTemp, output);
-+}
-+
-+void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access,
-+ AtomicOp op, Register value,
-+ const BaseIndex& mem, Register valueTemp,
-+ Register offsetTemp, Register maskTemp,
-+ Register output) {
-+ AtomicFetchOp(*this, &access, access.type(), access.sync(), op, mem, value,
-+ valueTemp, offsetTemp, maskTemp, output);
-+}
-+
-+void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
-+ AtomicOp op, Register64 value,
-+ const Address& mem, Register64 temp,
-+ Register64 output) {
-+ AtomicFetchOp64(*this, &access, access.sync(), op, value, mem, temp, output);
-+}
-+
-+void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
-+ AtomicOp op, Register64 value,
-+ const BaseIndex& mem, Register64 temp,
-+ Register64 output) {
-+ AtomicFetchOp64(*this, &access, access.sync(), op, value, mem, temp, output);
-+}
-+
-+void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access,
-+ AtomicOp op, Register value,
-+ const Address& mem, Register valueTemp,
-+ Register offsetTemp,
-+ Register maskTemp) {
-+ AtomicEffectOp(*this, &access, access.type(), access.sync(), op, mem, value,
-+ valueTemp, offsetTemp, maskTemp);
-+}
-+
-+void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access,
-+ AtomicOp op, Register value,
-+ const BaseIndex& mem,
-+ Register valueTemp, Register offsetTemp,
-+ Register maskTemp) {
-+ AtomicEffectOp(*this, &access, access.type(), access.sync(), op, mem, value,
-+ valueTemp, offsetTemp, maskTemp);
-+}
-+
-+// ========================================================================
-+// JS atomic operations.
-+
-+template <typename T>
-+static void CompareExchangeJS(MacroAssembler& masm, Scalar::Type arrayType,
-+ Synchronization sync, const T& mem,
-+ Register oldval, Register newval,
-+ Register valueTemp, Register offsetTemp,
-+ Register maskTemp, Register temp,
-+ AnyRegister output) {
-+ if (arrayType == Scalar::Uint32) {
-+ masm.compareExchange(arrayType, sync, mem, oldval, newval, valueTemp,
-+ offsetTemp, maskTemp, temp);
-+ masm.convertUInt32ToDouble(temp, output.fpu());
-+ } else {
-+ masm.compareExchange(arrayType, sync, mem, oldval, newval, valueTemp,
-+ offsetTemp, maskTemp, output.gpr());
-+ }
-+}
-+
-+template <typename T>
-+static void AtomicExchangeJS(MacroAssembler& masm, Scalar::Type arrayType,
-+ Synchronization sync, const T& mem, Register value,
-+ Register valueTemp, Register offsetTemp,
-+ Register maskTemp, Register temp,
-+ AnyRegister output) {
-+ if (arrayType == Scalar::Uint32) {
-+ masm.atomicExchange(arrayType, sync, mem, value, valueTemp, offsetTemp,
-+ maskTemp, temp);
-+ masm.convertUInt32ToDouble(temp, output.fpu());
-+ } else {
-+ masm.atomicExchange(arrayType, sync, mem, value, valueTemp, offsetTemp,
-+ maskTemp, output.gpr());
-+ }
-+}
-+
-+template <typename T>
-+static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType,
-+ Synchronization sync, AtomicOp op, Register value,
-+ const T& mem, Register valueTemp,
-+ Register offsetTemp, Register maskTemp,
-+ Register temp, AnyRegister output) {
-+ if (arrayType == Scalar::Uint32) {
-+ masm.atomicFetchOp(arrayType, sync, op, value, mem, valueTemp, offsetTemp,
-+ maskTemp, temp);
-+ masm.convertUInt32ToDouble(temp, output.fpu());
-+ } else {
-+ masm.atomicFetchOp(arrayType, sync, op, value, mem, valueTemp, offsetTemp,
-+ maskTemp, output.gpr());
-+ }
-+}
-+
-+void MacroAssembler::compareExchangeJS(Scalar::Type arrayType,
-+ Synchronization sync, const Address& mem,
-+ Register oldval, Register newval,
-+ Register valueTemp, Register offsetTemp,
-+ Register maskTemp, Register temp,
-+ AnyRegister output) {
-+ CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, valueTemp,
-+ offsetTemp, maskTemp, temp, output);
-+}
-+
-+void MacroAssembler::compareExchangeJS(Scalar::Type arrayType,
-+ Synchronization sync,
-+ const BaseIndex& mem, Register oldval,
-+ Register newval, Register valueTemp,
-+ Register offsetTemp, Register maskTemp,
-+ Register temp, AnyRegister output) {
-+ CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, valueTemp,
-+ offsetTemp, maskTemp, temp, output);
-+}
-+
-+void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType,
-+ Synchronization sync, const Address& mem,
-+ Register value, Register valueTemp,
-+ Register offsetTemp, Register maskTemp,
-+ Register temp, AnyRegister output) {
-+ AtomicExchangeJS(*this, arrayType, sync, mem, value, valueTemp, offsetTemp,
-+ maskTemp, temp, output);
-+}
-+
-+void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType,
-+ Synchronization sync,
-+ const BaseIndex& mem, Register value,
-+ Register valueTemp, Register offsetTemp,
-+ Register maskTemp, Register temp,
-+ AnyRegister output) {
-+ AtomicExchangeJS(*this, arrayType, sync, mem, value, valueTemp, offsetTemp,
-+ maskTemp, temp, output);
-+}
-+
-+void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType,
-+ Synchronization sync, AtomicOp op,
-+ Register value, const Address& mem,
-+ Register valueTemp, Register offsetTemp,
-+ Register maskTemp, Register temp,
-+ AnyRegister output) {
-+ AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, valueTemp, offsetTemp,
-+ maskTemp, temp, output);
-+}
-+
-+void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType,
-+ Synchronization sync, AtomicOp op,
-+ Register value, const BaseIndex& mem,
-+ Register valueTemp, Register offsetTemp,
-+ Register maskTemp, Register temp,
-+ AnyRegister output) {
-+ AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, valueTemp, offsetTemp,
-+ maskTemp, temp, output);
-+}
-+
-+void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType,
-+ Synchronization sync, AtomicOp op,
-+ Register value, const BaseIndex& mem,
-+ Register valueTemp, Register offsetTemp,
-+ Register maskTemp) {
-+ AtomicEffectOp(*this, nullptr, arrayType, sync, op, mem, value, valueTemp,
-+ offsetTemp, maskTemp);
-+}
-+
-+void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType,
-+ Synchronization sync, AtomicOp op,
-+ Register value, const Address& mem,
-+ Register valueTemp, Register offsetTemp,
-+ Register maskTemp) {
-+ AtomicEffectOp(*this, nullptr, arrayType, sync, op, mem, value, valueTemp,
-+ offsetTemp, maskTemp);
-+}
-+
-+// ========================================================================
-+// Wasm address offset carry tests.
-+
-+void MacroAssemblerPPC64Compat::ma_add32TestCarry(Condition cond, Register rd,
-+ Register rs, Imm32 imm,
-+ Label* overflow) {
-+ MOZ_ASSERT(cond == Assembler::CarrySet || cond == Assembler::CarryClear);
-+ if (rd != rs) {
-+ asMasm().move32(rs, rd);
-+ asMasm().add32(imm, rd);
-+ as_cmplw(rd, rs);
-+ } else {
-+ // visitWasmAddOffset uses useRegisterAtStart, so the LIR allocator may
-+ // collapse rd onto rs. move32 + add32 would clobber rs before the
-+ // compare; save rs to a scratch first.
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ asMasm().move32(rs, scratch);
-+ asMasm().add32(imm, rd);
-+ as_cmplw(rd, scratch);
-+ }
-+ ma_b(cond == Assembler::CarrySet ? LessThan : GreaterThanOrEqual, overflow);
-+}
-+
-+void MacroAssemblerPPC64Compat::ma_addPtrTestCarry(Condition cond, Register rd,
-+ Register rs, ImmWord imm,
-+ Label* overflow) {
-+ MOZ_ASSERT(cond == Assembler::CarrySet || cond == Assembler::CarryClear);
-+ if (rd != rs) {
-+ asMasm().movePtr(rs, rd);
-+ asMasm().addPtr(ImmWord(imm.value), rd);
-+ as_cmpld(rd, rs);
-+ } else {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ asMasm().movePtr(rs, scratch);
-+ asMasm().addPtr(ImmWord(imm.value), rd);
-+ as_cmpld(rd, scratch);
-+ }
-+ ma_b(cond == Assembler::CarrySet ? LessThan : GreaterThanOrEqual, overflow);
-+}
-+
-+// ========================================================================
-+// Wasm load/store helpers.
-+
-+void MacroAssemblerPPC64Compat::wasmProbeLastByte(
-+ const wasm::MemoryAccessDesc& access, Register memoryBase, Register ptr) {
-+ if (HasPOWER9()) {
-+ return;
-+ }
-+ const unsigned size = Scalar::byteSize(access.type());
-+ if (size <= 1) {
-+ return;
-+ }
-+ UseScratchRegisterScope temps(asMasm());
-+ Register probeAddr = temps.Acquire();
-+ // size is at most 16 (Simd128), well within the int16_t range of as_addi.
-+ as_addi(probeAddr, ptr, int16_t(size - 1));
-+ // Record the probe as a wasm trap site so its SIGSEGV dispatches
-+ // through the wasm signal handler the same way the real access would.
-+ m_buffer.flushPool();
-+ append(access, wasm::TrapMachineInsn::Load8,
-+ FaultingCodeOffset(currentOffset()));
-+ // Probing 1-byte load; result discarded.
-+ as_lbzx(probeAddr, memoryBase, probeAddr);
-+}
-+
-+void MacroAssemblerPPC64Compat::wasmLoadImpl(
-+ const wasm::MemoryAccessDesc& access, Register memoryBase, Register ptr,
-+ Register ptrScratch, AnyRegister output) {
-+ access.assertOffsetInGuardPages();
-+ uint32_t offset = access.offset32();
-+ MOZ_ASSERT_IF(offset, ptrScratch != InvalidReg);
-+
-+ if (offset) {
-+ asMasm().addPtr(ImmWord(offset), ptrScratch);
-+ ptr = ptrScratch;
-+ }
-+
-+ wasmProbeLastByte(access, memoryBase, ptr);
-+
-+ asMasm().memoryBarrierBefore(access.sync());
-+ // Flush any pending constant pool entries before recording the trap site,
-+ // otherwise a pool body inserted between the recorded offset and the
-+ // emitted load shifts the load and leaves the pool guard branch at the
-+ // recorded offset (SummarizeTrapInstruction then rejects the trap site).
-+ m_buffer.flushPool();
-+ append(access, wasm::TrapMachineInsnForLoad(Scalar::byteSize(access.type())),
-+ FaultingCodeOffset(currentOffset()));
-+
-+ switch (access.type()) {
-+ case Scalar::Int8:
-+ as_lbzx(output.gpr(), memoryBase, ptr);
-+ as_extsb(output.gpr(), output.gpr());
-+ break;
-+ case Scalar::Uint8:
-+ as_lbzx(output.gpr(), memoryBase, ptr);
-+ break;
-+ case Scalar::Int16:
-+ as_lhax(output.gpr(), memoryBase, ptr);
-+ break;
-+ case Scalar::Uint16:
-+ as_lhzx(output.gpr(), memoryBase, ptr);
-+ break;
-+ case Scalar::Int32:
-+ case Scalar::Uint32:
-+ as_lwzx(output.gpr(), memoryBase, ptr);
-+ as_extsw(output.gpr(), output.gpr());
-+ break;
-+ case Scalar::Float64:
-+ if (access.isZeroExtendSimd128Load() || access.isSplatSimd128Load() ||
-+ access.isWidenSimd128Load()) {
-+ // lfdx is X-form scalar FP — encodes only 5-bit FRT, so a
-+ // Simd128 dest (encoding 32+) corrupts the opcode. Bridge
-+ // through ScratchDoubleReg (FPR f0, encoding 0).
-+ ScratchDoubleScope dscratch(asMasm());
-+ as_lfdx(dscratch, memoryBase, ptr);
-+ if (access.isZeroExtendSimd128Load()) {
-+ // Loaded value goes to BE dw1 (= LE dw0 = lane 0); BE dw0 = 0.
-+ as_xxlxor(ScratchSimd128Reg, ScratchSimd128Reg, ScratchSimd128Reg);
-+ as_xxpermdi(output.fpu(), ScratchSimd128Reg, dscratch, 0);
-+ } else if (access.isSplatSimd128Load()) {
-+ as_xxpermdi(output.fpu(), dscratch, dscratch, 0);
-+ } else {
-+ // widen: place loaded 64 bits in LE dw0 (= BE dw1) for widenLow.
-+ as_xxpermdi(output.fpu(), dscratch, dscratch, 2);
-+ switch (access.widenSimdOp()) {
-+ case wasm::SimdOp::V128Load8x8S:
-+ asMasm().widenLowInt8x16(output.fpu(), output.fpu());
-+ break;
-+ case wasm::SimdOp::V128Load8x8U:
-+ asMasm().unsignedWidenLowInt8x16(output.fpu(), output.fpu());
-+ break;
-+ case wasm::SimdOp::V128Load16x4S:
-+ asMasm().widenLowInt16x8(output.fpu(), output.fpu());
-+ break;
-+ case wasm::SimdOp::V128Load16x4U:
-+ asMasm().unsignedWidenLowInt16x8(output.fpu(), output.fpu());
-+ break;
-+ case wasm::SimdOp::V128Load32x2S:
-+ asMasm().widenLowInt32x4(output.fpu(), output.fpu());
-+ break;
-+ case wasm::SimdOp::V128Load32x2U:
-+ asMasm().unsignedWidenLowInt32x4(output.fpu(), output.fpu());
-+ break;
-+ default:
-+ MOZ_CRASH("Unexpected widen op");
-+ }
-+ }
-+ } else {
-+ as_lfdx(output.fpu(), memoryBase, ptr);
-+ }
-+ break;
-+ case Scalar::Float32:
-+ if (access.isZeroExtendSimd128Load()) {
-+ // v128.load32_zero: load 32 raw bits into lane 0, zero the rest.
-+ UseScratchRegisterScope temps(asMasm());
-+ Register tmp = temps.Acquire();
-+ as_lwzx(tmp, memoryBase, ptr);
-+ as_xxlxor(output.fpu(), output.fpu(), output.fpu());
-+ if (HasPOWER9()) {
-+ as_mtvsrws(ScratchSimd128Reg, tmp);
-+ as_xxinsertw(output.fpu(), ScratchSimd128Reg, 12);
-+ } else {
-+ // POWER8: mtvsrd puts value in BE dw0 low 32 bits.
-+ // xxpermdi(dest, zero, scratch, 0) = {zero[dw0], scratch[dw0]}
-+ // in BE, placing the value in LE word 0 with the rest zero.
-+ as_mtvsrd(ScratchSimd128Reg, tmp);
-+ as_xxpermdi(output.fpu(), output.fpu(), ScratchSimd128Reg, 0);
-+ }
-+ } else {
-+ as_lfsx(output.fpu(), memoryBase, ptr);
-+ }
-+ break;
-+ case Scalar::Simd128:
-+ if (HasPOWER9()) {
-+ as_lxvx(output.fpu(), memoryBase, ptr);
-+ } else {
-+ as_lxvd2x(output.fpu(), memoryBase, ptr);
-+ as_xxpermdi(output.fpu(), output.fpu(), output.fpu(), 2);
-+ }
-+ break;
-+ default:
-+ MOZ_CRASH("unexpected array type");
-+ }
-+
-+ asMasm().memoryBarrierAfter(access.sync());
-+}
-+
-+void MacroAssemblerPPC64Compat::wasmStoreImpl(
-+ const wasm::MemoryAccessDesc& access, AnyRegister value,
-+ Register memoryBase, Register ptr, Register ptrScratch) {
-+ access.assertOffsetInGuardPages();
-+ uint32_t offset = access.offset32();
-+ MOZ_ASSERT_IF(offset, ptrScratch != InvalidReg);
-+
-+ if (offset) {
-+ asMasm().addPtr(ImmWord(offset), ptrScratch);
-+ ptr = ptrScratch;
-+ }
-+
-+ wasmProbeLastByte(access, memoryBase, ptr);
-+
-+ asMasm().memoryBarrierBefore(access.sync());
-+ // Record trap site at the faulting memory instruction. For P8 Simd128
-+ // store, the faulting instruction (stxvd2x) is after a byte-swap
-+ // (xxpermdi), so we defer the trap site recording.
-+ // Flush pool first; see comment in wasmLoadImpl.
-+ if (access.type() != Scalar::Simd128 || HasPOWER9()) {
-+ m_buffer.flushPool();
-+ append(access,
-+ wasm::TrapMachineInsnForStore(Scalar::byteSize(access.type())),
-+ FaultingCodeOffset(currentOffset()));
-+ }
-+
-+ switch (access.type()) {
-+ case Scalar::Int8:
-+ case Scalar::Uint8:
-+ as_stbx(value.gpr(), memoryBase, ptr);
-+ break;
-+ case Scalar::Int16:
-+ case Scalar::Uint16:
-+ as_sthx(value.gpr(), memoryBase, ptr);
-+ break;
-+ case Scalar::Int32:
-+ case Scalar::Uint32:
-+ as_stwx(value.gpr(), memoryBase, ptr);
-+ break;
-+ case Scalar::Int64:
-+ as_stdx(value.gpr(), memoryBase, ptr);
-+ break;
-+ case Scalar::Float64:
-+ as_stfdx(value.fpu(), memoryBase, ptr);
-+ break;
-+ case Scalar::Float32:
-+ as_stfsx(value.fpu(), memoryBase, ptr);
-+ break;
-+ case Scalar::Simd128:
-+ if (HasPOWER9()) {
-+ as_stxvx(value.fpu(), memoryBase, ptr);
-+ } else {
-+ as_xxpermdi(ScratchSimd128Reg, value.fpu(), value.fpu(), 2);
-+ m_buffer.flushPool(); // see comment in wasmLoadImpl
-+ append(access,
-+ wasm::TrapMachineInsnForStore(Scalar::byteSize(access.type())),
-+ FaultingCodeOffset(currentOffset()));
-+ as_stxvd2x(ScratchSimd128Reg, memoryBase, ptr);
-+ }
-+ break;
-+ default:
-+ MOZ_CRASH("unexpected array type");
-+ }
-+
-+ asMasm().memoryBarrierAfter(access.sync());
-+}
-+
-+void MacroAssemblerPPC64Compat::wasmLoadI64Impl(
-+ const wasm::MemoryAccessDesc& access, Register memoryBase, Register ptr,
-+ Register ptrScratch, Register64 output) {
-+ uint32_t offset = access.offset32();
-+ MOZ_ASSERT_IF(offset, ptrScratch != InvalidReg);
-+
-+ if (offset) {
-+ asMasm().addPtr(ImmWord(offset), ptrScratch);
-+ ptr = ptrScratch;
-+ }
-+
-+ wasmProbeLastByte(access, memoryBase, ptr);
-+
-+ asMasm().memoryBarrierBefore(access.sync());
-+ m_buffer.flushPool(); // see comment in wasmLoadImpl
-+ append(access, wasm::TrapMachineInsnForLoad(Scalar::byteSize(access.type())),
-+ FaultingCodeOffset(currentOffset()));
-+
-+ switch (access.type()) {
-+ case Scalar::Int8:
-+ as_lbzx(output.reg, memoryBase, ptr);
-+ as_extsb(output.reg, output.reg);
-+ break;
-+ case Scalar::Uint8:
-+ as_lbzx(output.reg, memoryBase, ptr);
-+ break;
-+ case Scalar::Int16:
-+ as_lhax(output.reg, memoryBase, ptr);
-+ break;
-+ case Scalar::Uint16:
-+ as_lhzx(output.reg, memoryBase, ptr);
-+ break;
-+ case Scalar::Int32:
-+ as_lwzx(output.reg, memoryBase, ptr);
-+ as_extsw(output.reg, output.reg);
-+ break;
-+ case Scalar::Uint32:
-+ as_lwzx(output.reg, memoryBase, ptr);
-+ // Zero-extended by lwzx already
-+ break;
-+ case Scalar::Int64:
-+ as_ldx(output.reg, memoryBase, ptr);
-+ break;
-+ default:
-+ MOZ_CRASH("unexpected array type");
-+ }
-+
-+ asMasm().memoryBarrierAfter(access.sync());
-+}
-+
-+void MacroAssemblerPPC64Compat::wasmStoreI64Impl(
-+ const wasm::MemoryAccessDesc& access, Register64 value, Register memoryBase,
-+ Register ptr, Register ptrScratch) {
-+ uint32_t offset = access.offset32();
-+ MOZ_ASSERT_IF(offset, ptrScratch != InvalidReg);
-+
-+ if (offset) {
-+ asMasm().addPtr(ImmWord(offset), ptrScratch);
-+ ptr = ptrScratch;
-+ }
-+
-+ wasmProbeLastByte(access, memoryBase, ptr);
-+
-+ asMasm().memoryBarrierBefore(access.sync());
-+ m_buffer.flushPool(); // see comment in wasmLoadImpl
-+ append(access, wasm::TrapMachineInsnForStore(Scalar::byteSize(access.type())),
-+ FaultingCodeOffset(currentOffset()));
-+
-+ switch (access.type()) {
-+ case Scalar::Int8:
-+ case Scalar::Uint8:
-+ as_stbx(value.reg, memoryBase, ptr);
-+ break;
-+ case Scalar::Int16:
-+ case Scalar::Uint16:
-+ as_sthx(value.reg, memoryBase, ptr);
-+ break;
-+ case Scalar::Int32:
-+ case Scalar::Uint32:
-+ as_stwx(value.reg, memoryBase, ptr);
-+ break;
-+ case Scalar::Int64:
-+ as_stdx(value.reg, memoryBase, ptr);
-+ break;
-+ default:
-+ MOZ_CRASH("unexpected array type");
-+ }
-+
-+ asMasm().memoryBarrierAfter(access.sync());
-+}
-+
-+void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,
-+ Register memoryBase, Register ptr,
-+ Register ptrScratch, AnyRegister output) {
-+ wasmLoadImpl(access, memoryBase, ptr, ptrScratch, output);
-+}
-+
-+void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access,
-+ Register memoryBase, Register ptr,
-+ Register ptrScratch, Register64 output) {
-+ wasmLoadI64Impl(access, memoryBase, ptr, ptrScratch, output);
-+}
-+
-+void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access,
-+ AnyRegister value, Register memoryBase,
-+ Register ptr, Register ptrScratch) {
-+ wasmStoreImpl(access, value, memoryBase, ptr, ptrScratch);
-+}
-+
-+void MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access,
-+ Register64 value, Register memoryBase,
-+ Register ptr, Register ptrScratch) {
-+ wasmStoreI64Impl(access, value, memoryBase, ptr, ptrScratch);
-+}
-+
-+//}}} check_macroassembler_style
-+
-+} // namespace jit
-+} // namespace js
-+
-+#ifdef ENABLE_WASM_SIMD
-+// static
-+bool MacroAssembler::MustMaskShiftCountSimd128(wasm::SimdOp op, int32_t* mask) {
-+ return false;
-+}
-+#endif
-diff --git a/js/src/jit/ppc64/MacroAssembler-ppc64.h b/js/src/jit/ppc64/MacroAssembler-ppc64.h
-new file mode 100644
-index 000000000000..bc2143b67465
---- /dev/null
-+++ b/js/src/jit/ppc64/MacroAssembler-ppc64.h
-@@ -0,0 +1,2031 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#ifndef jit_ppc64_MacroAssembler_ppc64_h
-+#define jit_ppc64_MacroAssembler_ppc64_h
-+
-+#include "jit/MoveResolver.h"
-+#include "jit/ppc64/Assembler-ppc64.h"
-+#include "wasm/WasmBuiltins.h"
-+
-+namespace js {
-+namespace jit {
-+
-+inline bool is_intN(int64_t x, unsigned n) {
-+ MOZ_ASSERT((0 < n) && (n < 64));
-+ int64_t limit = static_cast<int64_t>(1) << (n - 1);
-+ return (-limit <= x) && (x < limit);
-+}
-+
-+inline bool is_uintN(uint64_t x, unsigned n) {
-+ MOZ_ASSERT((0 < n) && (n < 64));
-+ return !(x >> n);
-+}
-+
-+// enterNoPool() guard sizes. Inhibiting the constant pool keeps these
-+// stanzas at a fixed instruction count, which patchers and long-branch
-+// resolvers rely on. Each constant names a distinct stanza shape; see
-+// the emitting call site for the exact layout.
-+//
-+// kNoPoolLoad64StanzaInsns (8): emitLoad64Stanza body — 8 NOPs that
-+// WriteLoad64Instructions later overwrites in place. Two shapes share
-+// the same 8-slot footprint with the .quad fixed at slots [6..7]:
-+// - POWER9+ (HasPOWER9()): addpcis + ld + b + 3 NOPs (2 dynamic insns,
-+// no LR clobber). Preferred path.
-+// - POWER8 fallback: mflr/bcl/mflr/mtlr/ld/b LR-bouncing sequence
-+// (6 dynamic insns, RAS-thrashing — kept only because P8 has no
-+// addpcis).
-+//
-+// kNoPoolPatchableBranchInsns (10): patchable far call / jump /
-+// unconditional branch. Three alternative shapes, all fitting the
-+// same budget:
-+// - load64 stanza (8) + mtctr + bctr[l] = 10 (bound call/jump)
-+// - 9 NOPs + bl = 10 (short bound call)
-+// - xs_trap_tagged(TAG) + chain + 8 NOPs = 10 (fwd-ref stanza)
-+//
-+// kNoPoolCondLongBranchInsnsP8Max (14): conditional long branch, POWER8
-+// Overflow worst case. POWER8 has no mcrxrx so overflow/carry test is
-+// mfxer+rlwinm+mtcrf (3 insns) on top of the base shape. Budget =
-+// 3 (XER inspection) + 1 (bc) + 8 (load64 stanza) + 2 (mtctr+bctr) = 14.
-+static constexpr size_t kNoPoolLoad64StanzaInsns = 8;
-+static constexpr size_t kNoPoolPatchableBranchInsns = 10;
-+static constexpr size_t kNoPoolCondLongBranchInsnsP8Max = 14;
-+
-+enum LoadStoreSize {
-+ SizeByte = 8,
-+ SizeHalfWord = 16,
-+ SizeWord = 32,
-+ SizeDouble = 64
-+};
-+
-+enum LoadStoreExtension { ZeroExtend = 0, SignExtend = 1 };
-+
-+static Register CallReg = r12;
-+
-+struct ImmShiftedTag : public ImmWord {
-+ explicit ImmShiftedTag(JSValueShiftedTag shtag) : ImmWord((uintptr_t)shtag) {}
-+ explicit ImmShiftedTag(JSValueType type)
-+ : ImmWord(((uintptr_t)JSVAL_TYPE_TO_SHIFTED_TAG(type))) {}
-+};
-+
-+struct ImmTag : public Imm32 {
-+ explicit ImmTag(JSValueTag tag) : Imm32(tag) {}
-+};
-+
-+class ScratchTagScope {
-+ UseScratchRegisterScope temps_;
-+ Register scratch_;
-+ bool owned_;
-+ mozilla::DebugOnly<bool> released_;
-+
-+ public:
-+ ScratchTagScope(Assembler& masm, const ValueOperand&)
-+ : temps_(masm), owned_(true), released_(false) {
-+ scratch_ = temps_.Acquire();
-+ }
-+
-+ operator Register() {
-+ MOZ_ASSERT(!released_);
-+ return scratch_;
-+ }
-+
-+ void release() {
-+ MOZ_ASSERT(!released_);
-+ released_ = true;
-+ if (owned_) {
-+ temps_.Release(scratch_);
-+ owned_ = false;
-+ }
-+ }
-+
-+ void reacquire() {
-+ MOZ_ASSERT(released_);
-+ released_ = false;
-+ if (!owned_) {
-+ scratch_ = temps_.Acquire();
-+ owned_ = true;
-+ }
-+ }
-+};
-+
-+class ScratchTagScopeRelease {
-+ ScratchTagScope* ts_;
-+
-+ public:
-+ explicit ScratchTagScopeRelease(ScratchTagScope* ts) : ts_(ts) {
-+ ts_->release();
-+ }
-+ ~ScratchTagScopeRelease() { ts_->reacquire(); }
-+};
-+
-+class MacroAssemblerPPC64 : public Assembler {
-+ protected:
-+ MacroAssembler& asMasm();
-+ const MacroAssembler& asMasm() const;
-+};
-+
-+class MacroAssemblerPPC64Compat : public MacroAssemblerPPC64 {
-+ public:
-+ using MacroAssemblerPPC64::MacroAssemblerPPC64;
-+
-+ MacroAssemblerPPC64Compat() {}
-+
-+ bool buildOOLFakeExitFrame(void* fakeReturnAddr);
-+
-+ // ===============================================================
-+ // Conversion functions
-+
-+ void convertBoolToInt32(Register src, Register dest) {
-+ as_rlwinm(dest, src, 0, 31, 31);
-+ }
-+ void convertInt32ToDouble(Register src, FloatRegister dest) {
-+ // mtvsrwa: VSR[dest].dw0 = sign_ext_64(src[32:63]); P8+ (ISA 2.07).
-+ // Replaces extsw + mtvsrd (2 insns + scratch GPR) with 1 insn.
-+ as_mtvsrwa(dest, src);
-+ as_fcfid(dest, dest);
-+ }
-+ void convertInt32ToDouble(const Address& src, FloatRegister dest) {
-+ // lfiwax (P7+): FPR.dw[0] = sign_ext_64(MEM[addr, 4]). X-form indexed
-+ // — no immediate offset, so when offset != 0 we add it into a scratch
-+ // first. Replaces lwz + extsw + mtvsrd with lfiwax (one insn) plus
-+ // optional address add.
-+ if (src.offset == 0) {
-+ as_lfiwax(dest, r0, src.base);
-+ } else {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ if (is_intN(src.offset, 16)) {
-+ as_addi(scratch, src.base, src.offset);
-+ as_lfiwax(dest, r0, scratch);
-+ } else {
-+ // X-form indexed: lfiwax computes base + scratch directly, no add.
-+ movePtr(ImmWord(src.offset), scratch);
-+ as_lfiwax(dest, src.base, scratch);
-+ }
-+ }
-+ as_fcfid(dest, dest);
-+ }
-+ void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ computeScaledAddress(src, scratch);
-+ convertInt32ToDouble(Address(scratch, src.offset), dest);
-+ }
-+ void convertUInt32ToDouble(Register src, FloatRegister dest);
-+ void convertUInt32ToFloat32(Register src, FloatRegister dest);
-+ void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) {
-+ as_frsp(dest, src);
-+ }
-+ // POWER9 FP16 conversions (1 insn each). Caller must have verified
-+ // HasPOWER9() — SupportsFloat{64,32}To16 gates that. PPC64 FPRs hold
-+ // doubles internally; an "FP32-in-FPR" is just the FP32 value stored
-+ // as exact FP64, so xscvdphp/xscvhpdp work for both FP32↔FP16 and
-+ // FP64↔FP16 (FP16 fits exactly in FP32 which fits exactly in FP64).
-+ void convertDoubleToFloat16(FloatRegister src, FloatRegister dest) {
-+ MOZ_ASSERT(HasPOWER9());
-+ as_xscvdphp(dest, src);
-+ }
-+ void convertFloat16ToDouble(FloatRegister src, FloatRegister dest) {
-+ MOZ_ASSERT(HasPOWER9());
-+ as_xscvhpdp(dest, src);
-+ }
-+ void convertFloat32ToFloat16(FloatRegister src, FloatRegister dest) {
-+ MOZ_ASSERT(HasPOWER9());
-+ as_xscvdphp(dest, src);
-+ }
-+ void convertFloat16ToFloat32(FloatRegister src, FloatRegister dest) {
-+ MOZ_ASSERT(HasPOWER9());
-+ as_xscvhpdp(dest, src);
-+ }
-+ void convertInt32ToFloat16(Register src, FloatRegister dest) {
-+ MOZ_ASSERT(HasPOWER9());
-+ convertInt32ToFloat32(src, dest);
-+ convertFloat32ToFloat16(dest, dest);
-+ }
-+ void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail,
-+ bool negativeZeroCheck = true);
-+ void convertDoubleToPtr(FloatRegister src, Register dest, Label* fail,
-+ bool negativeZeroCheck = true);
-+ void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail,
-+ bool negativeZeroCheck = true);
-+ void convertFloat32ToDouble(FloatRegister src, FloatRegister dest) {
-+ // PPC64 FPRs hold every FP32 value in its FP64-equivalent representation,
-+ // so f64.promote_f32 is conceptually a no-op except that wasm requires
-+ // sNaN inputs to be quieted. frsp (Round to Single-Precision) is the
-+ // identity for SP-representable inputs but applies IEEE NaN-quieting as
-+ // a side effect, replacing the prior fmr + fcmpu + branch + canonical-
-+ // NaN-load (5+ insns + scratch GPR) with a single instruction. Result
-+ // matches what x86 vcvtss2sd / ARM fcvt produce.
-+ as_frsp(dest, src);
-+ }
-+ void convertInt32ToFloat32(Register src, FloatRegister dest) {
-+ // mtvsrwa + fcfids; same recipe as convertInt32ToDouble(Register).
-+ as_mtvsrwa(dest, src);
-+ as_fcfids(dest, dest);
-+ }
-+ void convertInt32ToFloat32(const Address& src, FloatRegister dest) {
-+ // lfiwax + fcfids; same recipe as convertInt32ToDouble(Address).
-+ if (src.offset == 0) {
-+ as_lfiwax(dest, r0, src.base);
-+ } else {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ if (is_intN(src.offset, 16)) {
-+ as_addi(scratch, src.base, src.offset);
-+ as_lfiwax(dest, r0, scratch);
-+ } else {
-+ movePtr(ImmWord(src.offset), scratch);
-+ as_lfiwax(dest, src.base, scratch);
-+ }
-+ }
-+ as_fcfids(dest, dest);
-+ }
-+
-+ // POWER9 FP16 load: lxsihzx writes the 2 memory bytes directly into
-+ // dw[0] low 16 bits with the rest zeroed — matching the layout that
-+ // xscvhpdp expects, in a single instruction.
-+ FaultingCodeOffset loadFloat16(const Address& addr, FloatRegister dest,
-+ Register temp) {
-+ MOZ_ASSERT(HasPOWER9());
-+ if (addr.offset == 0) {
-+ return FaultingCodeOffset(as_lxsihzx(dest, r0, addr.base).getOffset());
-+ }
-+ if (is_intN(addr.offset, 16)) {
-+ as_addi(temp, addr.base, addr.offset);
-+ return FaultingCodeOffset(as_lxsihzx(dest, r0, temp).getOffset());
-+ }
-+ movePtr(ImmWord(addr.offset), temp);
-+ return FaultingCodeOffset(as_lxsihzx(dest, addr.base, temp).getOffset());
-+ }
-+ FaultingCodeOffset loadFloat16(const BaseIndex& src, FloatRegister dest,
-+ Register temp) {
-+ MOZ_ASSERT(HasPOWER9());
-+ computeEffectiveAddress(src, temp);
-+ return FaultingCodeOffset(as_lxsihzx(dest, r0, temp).getOffset());
-+ }
-+
-+ // ===============================================================
-+ // Effective address computation
-+
-+ void computeScaledAddress(const BaseIndex& address, Register dest) {
-+ if (address.scale == TimesOne) {
-+ as_add(dest, address.base, address.index);
-+ } else if (dest != address.base && dest != address.index) {
-+ x_sldi(dest, address.index, address.scale);
-+ as_add(dest, address.base, dest);
-+ } else {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ x_sldi(scratch, address.index, address.scale);
-+ as_add(dest, address.base, scratch);
-+ }
-+ }
-+
-+ void computeEffectiveAddress(const Address& address, Register dest) {
-+ if (address.offset == 0) {
-+ if (dest != address.base) {
-+ xs_mr(dest, address.base);
-+ }
-+ } else if (is_intN(address.offset, 16)) {
-+ as_addi(dest, address.base, address.offset);
-+ } else if (HasPOWER10() && is_intN(address.offset, 34)) {
-+ // Single-insn 34-bit-signed reg+imm add. Avoids the scratch GPR.
-+ as_paddi(dest, address.base, address.offset, /*R=*/false);
-+ } else {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(scratch != dest);
-+ movePtr(ImmWord(address.offset), scratch);
-+ as_add(dest, address.base, scratch);
-+ }
-+ }
-+ void computeEffectiveAddress(const BaseIndex& address, Register dest) {
-+ computeScaledAddress(address, dest);
-+ if (address.offset) {
-+ if (is_intN(address.offset, 16)) {
-+ as_addi(dest, dest, address.offset);
-+ } else if (HasPOWER10() && is_intN(address.offset, 34)) {
-+ as_paddi(dest, dest, address.offset, /*R=*/false);
-+ } else {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(scratch != dest);
-+ movePtr(ImmWord(address.offset), scratch);
-+ as_add(dest, dest, scratch);
-+ }
-+ }
-+ }
-+
-+ // ===============================================================
-+ // Move instructions
-+
-+ void mov(Register src, Register dest) { xs_mr(dest, src); }
-+ void mov(ImmWord imm, Register dest) { movePtr(imm, dest); }
-+ void mov(ImmPtr imm, Register dest) {
-+ mov(ImmWord(uintptr_t(imm.value)), dest);
-+ }
-+ // Emit an 8-instruction NOP stanza for a patchable 64-bit load.
-+ // Pool flushes are inhibited during emission to prevent pool data
-+ // from being inserted mid-stanza.
-+ BufferOffset emitLoad64Stanza(Register dest, uint64_t value) {
-+ m_buffer.enterNoPool(kNoPoolLoad64StanzaInsns);
-+ BufferOffset bo = writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ m_buffer.leaveNoPool();
-+ // If any of the 8 writeInst calls hit OOM, only some of the stanza
-+ // was reserved in the buffer. WriteLoad64Instructions writes 32 bytes
-+ // unconditionally, so calling it here would overflow the Vector's
-+ // backing store and corrupt the next heap chunk's metadata, surfacing
-+ // later as a malloc-detected free-time crash.
-+ if (m_buffer.oom()) {
-+ return bo;
-+ }
-+ WriteLoad64Instructions((Instruction*)editSrc(bo), dest, value);
-+ return bo;
-+ }
-+
-+ void mov(CodeLabel* label, Register dest) {
-+ BufferOffset bo = emitLoad64Stanza(dest, LabelBase::INVALID_OFFSET);
-+ label->patchAt()->bind(bo.getOffset());
-+ label->setLinkMode(CodeLabel::MoveImmediate);
-+ }
-+ void mov(Register src, Address dest) { storePtr(src, dest); }
-+ void mov(Address src, Register dest) { loadPtr(src, dest); }
-+
-+ void move32(Imm32 imm, Register dest) {
-+ if (is_intN(imm.value, 16)) {
-+ xs_li(dest, (int16_t)imm.value);
-+ } else if (is_uintN((uint32_t)imm.value, 16)) {
-+ xs_li(dest, 0);
-+ as_ori(dest, dest, (uint16_t)imm.value);
-+ } else {
-+ xs_lis(dest, (int16_t)((uint32_t)imm.value >> 16));
-+ if (imm.value & 0xffff) {
-+ as_ori(dest, dest, (uint16_t)imm.value);
-+ }
-+ }
-+ }
-+ void move32(Register src, Register dest) { as_extsw(dest, src); }
-+
-+ void movePtr(Register src, Register dest) {
-+ if (src != dest) {
-+ xs_mr(dest, src);
-+ }
-+ }
-+ void movePtr(ImmWord imm, Register dest) {
-+ if (imm.value == 0) {
-+ xs_li(dest, 0);
-+ } else if (is_intN((intptr_t)imm.value, 16)) {
-+ xs_li(dest, (int16_t)imm.value);
-+ } else if (is_uintN(imm.value, 16)) {
-+ xs_li(dest, 0);
-+ as_ori(dest, dest, (uint16_t)imm.value);
-+ } else if (is_intN((intptr_t)imm.value, 32)) {
-+ // 32-bit signed: lis + ori (2 instructions).
-+ xs_lis(dest, (int16_t)((uint32_t)imm.value >> 16));
-+ if (imm.value & 0xFFFF) {
-+ as_ori(dest, dest, (uint16_t)imm.value);
-+ }
-+ } else if (HasPOWER10() && is_intN((intptr_t)imm.value, 34)) {
-+ // POWER10 single-instruction 34-bit signed immediate. Replaces the
-+ // 5-insn fallback for values in (33-34)-bit signed range.
-+ // 8 bytes vs 20 bytes; one slot temp register is no longer needed.
-+ as_paddi(dest, r0, (int64_t)imm.value, /*R=*/false);
-+ } else {
-+ // Full 64-bit: GCC-style lis+ori+lis+ori+rldimi (5 instructions).
-+ // No LR clobber, no embedded data — pure instruction sequence.
-+ uint32_t lo32 = (uint32_t)(imm.value);
-+ uint32_t hi32 = (uint32_t)(imm.value >> 32);
-+ Register temp = (dest != SecondScratchReg) ? SecondScratchReg
-+ : SavedScratchRegister;
-+ m_buffer.ensureSpace(5 * sizeof(uint32_t));
-+ xs_lis(dest, (int16_t)(lo32 >> 16));
-+ as_ori(dest, dest, lo32 & 0xFFFF);
-+ xs_lis(temp, (int16_t)(hi32 >> 16));
-+ as_ori(temp, temp, hi32 & 0xFFFF);
-+ as_rldimi(dest, temp, 32, 0);
-+ }
-+ }
-+ void movePtr(ImmPtr imm, Register dest) {
-+ movePtr(ImmWord(uintptr_t(imm.value)), dest);
-+ }
-+
-+ // Load a 64-bit FPR constant from the inline constant pool.
-+ // POWER9: 2 instructions (addpcis + lfd) -- no alignment constraint.
-+ // POWER10: 1 prefixed instruction (plfd, 2 slots), or 3 slots in the
-+ // (loadAddr & 63) == 60 alignment-leading-nop case. Reserve 3 to
-+ // cover both cases conservatively.
-+ // POWER8: not used -- loadConstantDouble inlines the constant.
-+ BufferOffset loadFromPoolFloat64(FloatRegister dest, double value) {
-+ size_t slots = HasPOWER10() ? 3 : 2;
-+ uint32_t hint = (uint32_t(dest.encoding()) << 16) |
-+ (uint32_t(PoolLoadFPR64) << 21) | 0xF0000000;
-+ uint32_t inst[3] = {hint, NopInst, NopInst};
-+ return m_buffer.allocEntry(slots, 2, (uint8_t*)inst, (uint8_t*)&value);
-+ }
-+ // Load a 32-bit FPR constant from the inline constant pool.
-+ // Same shape as loadFromPoolFloat64 (above). lfs/plfs auto-expand the
-+ // 32-bit single-precision value to double in the FPR, so no follow-up
-+ // xscvspdpn is needed.
-+ BufferOffset loadFromPoolFloat32(FloatRegister dest, float value) {
-+ size_t slots = HasPOWER10() ? 3 : 2;
-+ uint32_t hint = (uint32_t(dest.encoding()) << 16) |
-+ (uint32_t(PoolLoadFPR32) << 21) | 0xF0000000;
-+ uint32_t inst[3] = {hint, NopInst, NopInst};
-+ return m_buffer.allocEntry(slots, 1, (uint8_t*)inst, (uint8_t*)&value);
-+ }
-+ // Load a 128-bit SIMD constant from the inline constant pool.
-+ // Per-arch slot reservation -- the patcher writes only the slots
-+ // each micro-arch actually needs:
-+ // P8: 5 (bcl + mflr + addi + lxvd2x + xxpermdi)
-+ // P9: 3 (addpcis + addi + lxvx) -- no LR touch, no RAS hazard
-+ // P10: 3 (alignment-safe: prefix + suffix + 1 reserve for the
-+ // (loadAddr & 63) == 60 leading-nop case)
-+ // Pool entry is 4 × 4-byte words = 16 bytes. P9 uses
-+ // SavedScratchRegister (r16) as the PC base; P10 emits a single
-+ // PC-relative plxv with no scratch and no LR touch. Only P8 still
-+ // clobbers LR (correctness-only fallback; live by design).
-+ BufferOffset loadFromPoolSimd128(FloatRegister dest,
-+ const SimdConstant& v) {
-+ size_t slots;
-+ if (HasPOWER10()) {
-+ slots = 3;
-+ } else if (HasPOWER9()) {
-+ slots = 3;
-+ } else {
-+ slots = 5;
-+ }
-+ // Simd128 encoding is 32-63; mask to 5 bits for hint.
-+ // PatchConstantPoolLoad sets TX bit unconditionally for Simd128.
-+ uint32_t hint = ((uint32_t(dest.encoding()) & 0x1F) << 16) |
-+ (uint32_t(PoolLoadSimd128) << 21) | 0xF0000000;
-+ uint32_t inst[5] = {hint, NopInst, NopInst, NopInst, NopInst};
-+ return m_buffer.allocEntry(slots, 4, (uint8_t*)inst, (uint8_t*)v.bytes());
-+ }
-+ void movePtr(wasm::SymbolicAddress imm, Register dest) {
-+ BufferOffset bo = emitLoad64Stanza(dest, (uint64_t)-1);
-+ append(wasm::SymbolicAccess(CodeOffset(bo.getOffset()), imm));
-+ }
-+ void movePtr(ImmGCPtr imm, Register dest) {
-+ BufferOffset bo = emitLoad64Stanza(dest,
-+ (uint64_t)uintptr_t(imm.value));
-+ Assembler::writeDataRelocation(bo, imm);
-+ }
-+
-+ void moveFloat32(FloatRegister src, FloatRegister dest) {
-+ if (src != dest) {
-+ as_fmr(dest, src);
-+ }
-+ }
-+ void moveDouble(FloatRegister src, FloatRegister dest) {
-+ if (src != dest) {
-+ as_fmr(dest, src);
-+ }
-+ }
-+
-+ // ===============================================================
-+ // Branch functions
-+
-+ void branch(JitCode* c) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ BufferOffset bo = emitLoad64Stanza(scratch, (uint64_t)uintptr_t(c->raw()));
-+ addPendingJump(bo, ImmPtr(c->raw()), RelocationKind::JITCODE);
-+ xs_mtctr(scratch);
-+ as_bctr();
-+ }
-+ void branch(const Register reg) {
-+ xs_mtctr(reg);
-+ as_bctr();
-+ }
-+
-+ void jump(Label* label) {
-+ if (label->bound()) {
-+ // Open the no-pool window BEFORE computing the displacement. The
-+ // enterNoPool() call itself can trigger a pool flush, which advances
-+ // currentOffset(). Computing the displacement against the pre-flush
-+ // offset and then emitting the b at the post-flush offset would land
-+ // the branch (poolSize) bytes past the intended target.
-+ m_buffer.enterNoPool(2);
-+ int32_t offset = label->offset() - currentOffset();
-+ if (JOffImm26::IsInRange(offset)) {
-+ as_b(offset);
-+ writeInst(NopInst);
-+ m_buffer.leaveNoPool();
-+ return;
-+ }
-+ m_buffer.leaveNoPool();
-+ // Long jump to bound label.
-+ BufferOffset bo =
-+ emitLoad64Stanza(SecondScratchReg, LabelBase::INVALID_OFFSET);
-+ xs_mtctr(SecondScratchReg);
-+ as_bctr();
-+ addLongJump(bo, BufferOffset(label->offset()));
-+ return;
-+ }
-+ // Unbound label: emit trap-tagged stanza (10 slots).
-+ m_buffer.enterNoPool(kNoPoolPatchableBranchInsns);
-+ BufferOffset bo = xs_trap_tagged(BTag);
-+ writeInst(label->used() ? label->offset() : LabelBase::INVALID_OFFSET);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ m_buffer.leaveNoPool();
-+ if (!oom()) {
-+ label->use(bo.getOffset());
-+ }
-+ }
-+ void jump(Register reg) {
-+ xs_mtctr(reg);
-+ as_bctr();
-+ }
-+ void jump(const Address& address) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ loadPtr(address, scratch);
-+ xs_mtctr(scratch);
-+ as_bctr();
-+ }
-+ void jump(JitCode* code) { branch(code); }
-+ void jump(ImmPtr ptr) {
-+ BufferOffset bo =
-+ emitLoad64Stanza(SecondScratchReg, (uint64_t)uintptr_t(ptr.value));
-+ addPendingJump(bo, ptr, RelocationKind::HARDCODED);
-+ xs_mtctr(SecondScratchReg);
-+ as_bctr();
-+ }
-+ void jump(TrampolinePtr code) { jump(ImmPtr(code.value)); }
-+
-+ // Conditional branch to label. Assumes a compare instruction has already
-+ // been emitted that sets CR0.
-+ template <typename CondT>
-+ void ma_b(CondT cond, Label* label) {
-+ if constexpr (std::is_same_v<CondT, Condition>) {
-+ if (cond == Always) {
-+ jump(label);
-+ return;
-+ }
-+ }
-+ if (label->bound()) {
-+ // Open the no-pool window BEFORE computing the displacement. Same
-+ // hazard as jump(): enterNoPool may itself flush a pending pool,
-+ // advancing currentOffset(); the bc must emit with a displacement
-+ // computed against the post-flush offset. Budget covers max 6
-+ // instructions: POWER8 Overflow XER ops (3) + cror (1) + bc (1) +
-+ // nop (1) for the worst-case DoubleCondition+Overflow short path.
-+ m_buffer.enterNoPool(6);
-+ // For DoubleCondition, as_bc emits cror/crandc before the bc
-+ // instruction, advancing currentOffset() by 4. Account for this
-+ // in the offset calculation.
-+ int32_t crAdjust = 0;
-+ if constexpr (std::is_same_v<CondT, DoubleCondition>) {
-+ crAdjust = -(int32_t)sizeof(uint32_t);
-+ }
-+ int32_t offset = label->offset() - currentOffset() + crAdjust;
-+ if (BOffImm16::IsInRange(offset)) {
-+ as_bc((int16_t)offset, cond);
-+ writeInst(NopInst);
-+ m_buffer.leaveNoPool();
-+ return;
-+ }
-+ m_buffer.leaveNoPool();
-+ // Long conditional branch for bound label.
-+ // XER ops(0-3) + cror(0-1) + bc(1) + stanza(8) + mtctr(1) + bctr(1).
-+ // P8 Overflow: mfxer+rlwinm+mtcrf+bc+stanza+mtctr+bctr = 14 max.
-+ m_buffer.enterNoPool(kNoPoolCondLongBranchInsnsP8Max);
-+ as_bc((int16_t)44, InvertCondition(cond));
-+ BufferOffset boLoad =
-+ emitLoad64Stanza(SecondScratchReg, LabelBase::INVALID_OFFSET);
-+ xs_mtctr(SecondScratchReg);
-+ as_bctr();
-+ m_buffer.leaveNoPool();
-+ addLongJump(boLoad, BufferOffset(label->offset()));
-+ return;
-+ }
-+ // Forward reference: emit BCTag stanza.
-+ // XER ops(0-3) + cror(0-1) + bc(1) + trap_tagged(1) + chain(1) + 8 NOPs.
-+ // P8 Overflow: mfxer+rlwinm+mtcrf+bc+trap+chain+8NOPs = 14 max.
-+ m_buffer.enterNoPool(kNoPoolCondLongBranchInsnsP8Max);
-+ as_bc((int16_t)44, InvertCondition(cond));
-+ BufferOffset bo = xs_trap_tagged(BCTag);
-+ writeInst(label->used() ? label->offset() : LabelBase::INVALID_OFFSET);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ writeInst(NopInst);
-+ m_buffer.leaveNoPool();
-+ if (!oom()) {
-+ label->use(bo.getOffset());
-+ }
-+ }
-+
-+ // Set dest = 1 if CR0 satisfies cond, else dest = 0.
-+ // POWER10: setbc/setbcr (1 insn). P8/P9: isel-based path with the
-+ // r0-as-zero trick on the BranchOnClear half.
-+ void ma_cmp_set(Register dest, Condition cond) {
-+ uint32_t base = uint32_t(cond) & 0xff;
-+ uint32_t setbase = (base & ~BranchOptionMask) | BranchOnSet;
-+ if (HasPOWER10()) {
-+ if ((base & BranchOptionMask) == BranchOnSet) {
-+ as_setbc(dest, setbase, cr0);
-+ } else {
-+ as_setbcr(dest, setbase, cr0);
-+ }
-+ return;
-+ }
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ xs_li(scratch, 1);
-+ if ((base & BranchOptionMask) == BranchOnSet) {
-+ xs_li(dest, 0);
-+ as_isel(dest, scratch, dest, setbase, cr0);
-+ } else {
-+ as_isel0(dest, r0, scratch, setbase, cr0);
-+ }
-+ }
-+
-+ void ma_cmp_set_dbl(Register dest, DoubleCondition cond) {
-+ uint32_t base = uint32_t(cond) & 0xff;
-+ bool hasUnorderedFlag = uint32_t(cond) & DoubleConditionUnordered;
-+ uint32_t setbase = (base & ~BranchOptionMask) | BranchOnSet;
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ if (HasPOWER10()) {
-+ if ((base & BranchOptionMask) == BranchOnSet) {
-+ as_setbc(dest, setbase, cr0);
-+ } else {
-+ as_setbcr(dest, setbase, cr0);
-+ }
-+ // Fixup paths below still need scratch=1 for the SO-isel.
-+ if (hasUnorderedFlag || ((base & BranchOptionMask) != BranchOnSet &&
-+ cond != DoubleOrdered)) {
-+ xs_li(scratch, 1);
-+ }
-+ } else {
-+ xs_li(scratch, 1);
-+ if ((base & BranchOptionMask) == BranchOnSet) {
-+ xs_li(dest, 0);
-+ as_isel(dest, scratch, dest, setbase, cr0);
-+ } else {
-+ as_isel0(dest, r0, scratch, setbase, cr0);
-+ }
-+ }
-+ if (hasUnorderedFlag) {
-+ // Condition includes unordered (NaN): force dest=1 when SO is set.
-+ // isel dest, scratch(=1), dest, SO
-+ as_isel(dest, scratch, dest, uint16_t(SOBit), cr0);
-+ } else if ((base & BranchOptionMask) != BranchOnSet &&
-+ cond != DoubleOrdered) {
-+ // Ordered comparison that negates a CR bit (BranchOnClear): NaN
-+ // produces all-zero LT/GT/EQ bits which makes the negation return
-+ // true. Fix by forcing dest=0 when SO is set.
-+ as_isel0(dest, r0, dest, uint16_t(SOBit), cr0);
-+ }
-+ }
-+
-+ // Conditional move: if CR0 satisfies cond, dest = src.
-+ void ma_cmp_move(Register dest, Register src, Condition cond) {
-+ uint32_t base = uint32_t(cond) & 0xff;
-+ uint32_t setbase = (base & ~BranchOptionMask) | BranchOnSet;
-+ if ((base & BranchOptionMask) == BranchOnSet) {
-+ as_isel(dest, src, dest, setbase, cr0);
-+ } else {
-+ as_isel(dest, dest, src, setbase, cr0);
-+ }
-+ }
-+
-+ // If cond == 0, move src to dst; otherwise dst is unchanged. The only
-+ // callers are wasm select, whose condition is a 32-bit value: test its
-+ // 32-bit sign with cmpwi so high-bit garbage (e.g. under register pressure)
-+ // does not make a zero condition read as non-zero.
-+ void moveIfZero(Register dst, Register src, Register cond) {
-+ as_cmpwi(cond, 0);
-+ as_isel(dst, src, dst, Equal, cr0);
-+ }
-+
-+ void ma_add32TestCarry(Condition cond, Register rd, Register rs, Imm32 imm,
-+ Label* overflow);
-+ void ma_addPtrTestCarry(Condition cond, Register rd, Register rs, ImmWord imm,
-+ Label* overflow);
-+
-+ // Issue the correct compare instruction for the given condition and
-+ // operand sizes. Returns the condition to use with ma_b or ma_cmp_set
-+ // (usually the same, but unsigned conditions use cmpl* variants).
-+ Condition ma_cmp(Register lhs, Register rhs, Condition cond,
-+ bool is32bit = false) {
-+ Condition base =
-+ static_cast<Condition>(cond & ~(ConditionUnsigned | ConditionZero));
-+ bool isUnsigned = (cond & ConditionUnsigned) != 0;
-+ // ConditionZero-flagged conditions (Signed, NotSigned, Zero, NonZero)
-+ // test a single register against zero, not two registers against each
-+ // other. Compare against immediate 0.
-+ if ((cond & ConditionZero) != 0) {
-+ if (is32bit) {
-+ as_cmpwi(lhs, 0);
-+ } else {
-+ as_cmpdi(lhs, 0);
-+ }
-+ return base;
-+ }
-+ if (is32bit) {
-+ if (isUnsigned) {
-+ as_cmplw(lhs, rhs);
-+ } else {
-+ as_cmpw(lhs, rhs);
-+ }
-+ } else {
-+ if (isUnsigned) {
-+ as_cmpld(lhs, rhs);
-+ } else {
-+ as_cmpd(lhs, rhs);
-+ }
-+ }
-+ return base;
-+ }
-+
-+ Condition ma_cmp(Register lhs, Imm32 rhs, Condition cond,
-+ bool is32bit = false) {
-+ Condition base =
-+ static_cast<Condition>(cond & ~(ConditionUnsigned | ConditionZero));
-+ bool isUnsigned = (cond & ConditionUnsigned) != 0;
-+ if (isUnsigned) {
-+ if (is_uintN(rhs.value, 16)) {
-+ if (is32bit) {
-+ as_cmplwi(lhs, rhs.value);
-+ } else {
-+ as_cmpldi(lhs, rhs.value);
-+ }
-+ return base;
-+ }
-+ } else {
-+ if (is_intN(rhs.value, 16)) {
-+ if (is32bit) {
-+ as_cmpwi(lhs, rhs.value);
-+ } else {
-+ as_cmpdi(lhs, rhs.value);
-+ }
-+ return base;
-+ }
-+ }
-+ // Immediate doesn't fit — materialize into scratch and compare.
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ move32(rhs, scratch);
-+ return ma_cmp(lhs, scratch, cond, is32bit);
-+ }
-+
-+ Condition ma_cmp(Register lhs, ImmWord rhs, Condition cond) {
-+ Condition base =
-+ static_cast<Condition>(cond & ~(ConditionUnsigned | ConditionZero));
-+ bool isUnsigned = (cond & ConditionUnsigned) != 0;
-+ if (isUnsigned) {
-+ if (is_uintN(rhs.value, 16)) {
-+ as_cmpldi(lhs, rhs.value);
-+ return base;
-+ }
-+ } else {
-+ if (is_intN(rhs.value, 16)) {
-+ as_cmpdi(lhs, rhs.value);
-+ return base;
-+ }
-+ }
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ movePtr(rhs, scratch);
-+ return ma_cmp(lhs, scratch, cond);
-+ }
-+
-+ Condition ma_cmp(Register lhs, ImmPtr rhs, Condition cond) {
-+ return ma_cmp(lhs, ImmWord(uintptr_t(rhs.value)), cond);
-+ }
-+
-+ Condition ma_cmp(Register lhs, ImmGCPtr rhs, Condition cond) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ movePtr(rhs, scratch);
-+ return ma_cmp(lhs, scratch, cond);
-+ }
-+
-+ Condition ma_cmp(Register lhs, ImmTag rhs, Condition cond) {
-+ // Tag values on PUNBOX64 are 17-bit (0x1FFF0+), too large for 16-bit
-+ // signed or unsigned immediates.
-+ Condition base =
-+ static_cast<Condition>(cond & ~(ConditionUnsigned | ConditionZero));
-+ bool isUnsigned = (cond & ConditionUnsigned) != 0;
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ move32(Imm32(rhs.value), scratch);
-+ if (isUnsigned) {
-+ as_cmpld(lhs, scratch);
-+ } else {
-+ as_cmpd(lhs, scratch);
-+ }
-+ return base;
-+ }
-+
-+ // Compare a tag register against an ImmTag constant and branch, WITHOUT
-+ // acquiring a scratch register. Uses xoris+cmplwi which MODIFIES tagReg.
-+ // Only safe when tagReg is a scratch register owned by the caller.
-+ void branchTestTag(Condition cond, Register tagReg, ImmTag tag, Label* label) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ uint32_t t = tag.value;
-+ as_xoris(tagReg, tagReg, t >> 16);
-+ as_cmplwi(tagReg, t & 0xFFFF);
-+ Condition c = (cond == Equal) ? Equal : NotEqual;
-+ ma_b(c, label);
-+ }
-+
-+ void ma_mod_mask(Register src, Register dest, Register hold, Register remain,
-+ int32_t shift, Label* negZero = nullptr);
-+
-+ void nop() { writeInst(NopInst); }
-+ void breakpoint(uint32_t value = 0) { xs_trap(); }
-+
-+ inline void retn(Imm32 n);
-+
-+ // ===============================================================
-+ // Stack operations
-+
-+ void push(Imm32 imm) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ move32(imm, scratch);
-+ push(scratch);
-+ }
-+ void push(ImmWord imm) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ movePtr(imm, scratch);
-+ push(scratch);
-+ }
-+ void push(ImmGCPtr imm) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ movePtr(imm, scratch);
-+ push(scratch);
-+ }
-+ void push(const Address& address) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ loadPtr(address, scratch);
-+ push(scratch);
-+ }
-+ void push(Register reg) { as_stdu(reg, StackPointer, -8); }
-+ void push(FloatRegister reg) {
-+ // stfdu/stfsu fuses the SP decrement and the FP store: EA=SP-8,
-+ // MEM[EA]=reg, SP=EA. 1 insn instead of addi+stfd/stfs.
-+ if (reg.isSingle()) {
-+ as_stfsu(reg, StackPointer, -8);
-+ } else {
-+ as_stfdu(reg, StackPointer, -8);
-+ }
-+ }
-+ void pop(Register reg) {
-+ as_ld(reg, StackPointer, 0);
-+ as_addi(StackPointer, StackPointer, 8);
-+ }
-+ void pop(FloatRegister reg) {
-+ if (reg.isSingle()) {
-+ as_lfs(reg, StackPointer, 0);
-+ } else {
-+ as_lfd(reg, StackPointer, 0);
-+ }
-+ as_addi(StackPointer, StackPointer, 8);
-+ }
-+
-+ CodeOffset pushWithPatch(ImmWord imm) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ CodeOffset offset = movWithPatch(imm, scratch);
-+ push(scratch);
-+ return offset;
-+ }
-+ CodeOffset movWithPatch(ImmWord imm, Register dest) {
-+ BufferOffset bo = emitLoad64Stanza(dest, (uint64_t)imm.value);
-+ return CodeOffset(bo.getOffset());
-+ }
-+ CodeOffset movWithPatch(ImmPtr imm, Register dest) {
-+ return movWithPatch(ImmWord(uintptr_t(imm.value)), dest);
-+ }
-+
-+ // ===============================================================
-+ // Tag/unbox operations
-+
-+ void splitTag(Register src, Register dest) {
-+ x_srdi(dest, src, JSVAL_TAG_SHIFT);
-+ }
-+ void splitTag(const ValueOperand& operand, Register dest) {
-+ splitTag(operand.valueReg(), dest);
-+ }
-+ void splitTagForTest(const ValueOperand& value, ScratchTagScope& tag) {
-+ splitTag(value, tag);
-+ }
-+
-+ void unboxNonDouble(const ValueOperand& operand, Register dest,
-+ JSValueType type) {
-+ unboxNonDouble(operand.valueReg(), dest, type);
-+ }
-+ template <typename T>
-+ void unboxNonDouble(T src, Register dest, JSValueType type) {
-+ MOZ_ASSERT(type != JSVAL_TYPE_DOUBLE);
-+ if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) {
-+ load32(src, dest);
-+ return;
-+ }
-+ loadPtr(src, dest);
-+ unboxNonDouble(dest, dest, type);
-+ }
-+ void unboxNonDouble(Register src, Register dest, JSValueType type) {
-+ MOZ_ASSERT(type != JSVAL_TYPE_DOUBLE);
-+ if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) {
-+ as_extsw(dest, src);
-+ return;
-+ }
-+ // Extract the payload (lower 47 bits) by clearing the tag.
-+ // This avoids acquiring a scratch register, preventing pool exhaustion
-+ // when called from nested scratch scopes (e.g., ScratchTagScope →
-+ // branchTestStringTruthy → unboxString → here).
-+ // rldicl dest, src, 0, 17 — clear upper 17 bits (tag), keep lower 47.
-+ as_rldicl(dest, src, 0, 17);
-+ }
-+ void unboxGCThingForGCBarrier(const Address& src, Register dest) {
-+ loadPtr(src, dest);
-+ // Clear tag bits (top 17 bits on 64-bit).
-+ as_rldicl(dest, dest, 0, 64 - JSVAL_TAG_SHIFT);
-+ }
-+ void unboxGCThingForGCBarrier(const ValueOperand& src, Register dest) {
-+ as_rldicl(dest, src.valueReg(), 0, 64 - JSVAL_TAG_SHIFT);
-+ }
-+ void unboxWasmAnyRefGCThingForGCBarrier(const Address& src, Register dest) {
-+ static_assert(wasm::AnyRef::TagShift == 2);
-+ loadPtr(src, dest);
-+ as_rldicr(dest, dest, 0, 61);
-+ }
-+ void getGCThingValueChunk(const Address& src, Register dest) {
-+ loadPtr(src, dest);
-+ as_rldicl(dest, dest, 0, 17);
-+ as_rldicr(dest, dest, 0, 43);
-+ }
-+ void getGCThingValueChunk(const ValueOperand& src, Register dest) {
-+ as_rldicl(dest, src.valueReg(), 0, 17);
-+ as_rldicr(dest, dest, 0, 43);
-+ }
-+
-+ void boxDouble(FloatRegister src, const ValueOperand& dest, FloatRegister) {
-+ as_mfvsrd(dest.valueReg(), src);
-+ }
-+ void boxNonDouble(JSValueType type, Register src, const ValueOperand& dest) {
-+ boxValue(type, src, dest.valueReg());
-+ }
-+ void boxNonDouble(Register type, Register src, const ValueOperand& dest) {
-+ boxValue(type, src, dest.valueReg());
-+ }
-+ void unboxInt32(const ValueOperand& operand, Register dest) {
-+ as_extsw(dest, operand.valueReg());
-+ }
-+ void unboxInt32(const Address& src, Register dest) { load32(src, dest); }
-+ void unboxInt32(const BaseIndex& src, Register dest) { load32(src, dest); }
-+ void unboxBoolean(const ValueOperand& operand, Register dest) {
-+ as_extsw(dest, operand.valueReg());
-+ }
-+ void unboxBoolean(const Address& src, Register dest) { load32(src, dest); }
-+ void unboxBoolean(const BaseIndex& src, Register dest) { load32(src, dest); }
-+ void unboxDouble(const ValueOperand& operand, FloatRegister dest) {
-+ as_mtvsrd(dest, operand.valueReg());
-+ }
-+ void unboxDouble(const Address& src, FloatRegister dest) {
-+ loadDouble(src, dest);
-+ }
-+ void unboxDouble(const BaseIndex& src, FloatRegister dest) {
-+ loadDouble(src, dest);
-+ }
-+ void unboxString(const ValueOperand& operand, Register dest) {
-+ unboxNonDouble(operand, dest, JSVAL_TYPE_STRING);
-+ }
-+ void unboxString(const Address& src, Register dest) {
-+ unboxNonDouble(src, dest, JSVAL_TYPE_STRING);
-+ }
-+ void unboxSymbol(const ValueOperand& operand, Register dest) {
-+ unboxNonDouble(operand, dest, JSVAL_TYPE_SYMBOL);
-+ }
-+ void unboxSymbol(const Address& src, Register dest) {
-+ unboxNonDouble(src, dest, JSVAL_TYPE_SYMBOL);
-+ }
-+ void unboxBigInt(const ValueOperand& operand, Register dest) {
-+ unboxNonDouble(operand, dest, JSVAL_TYPE_BIGINT);
-+ }
-+ void unboxBigInt(const Address& src, Register dest) {
-+ unboxNonDouble(src, dest, JSVAL_TYPE_BIGINT);
-+ }
-+ void unboxObject(const ValueOperand& src, Register dest) {
-+ unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
-+ }
-+ void unboxObject(const Address& src, Register dest) {
-+ unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
-+ }
-+ void unboxObject(const BaseIndex& src, Register dest) {
-+ unboxNonDouble(src, dest, JSVAL_TYPE_OBJECT);
-+ }
-+ void unboxValue(const ValueOperand& src, AnyRegister dest, JSValueType type) {
-+ if (dest.isFloat()) {
-+ unboxDouble(src, dest.fpu());
-+ } else {
-+ unboxNonDouble(src, dest.gpr(), type);
-+ }
-+ }
-+ void unboxObjectOrNull(const Address& src, Register dest) {
-+ loadPtr(src, dest);
-+ // Object pointers have the object tag in high bits; null has a different
-+ // tag. Clear the top bits to get either a valid pointer or zero.
-+ as_rldicl(dest, dest, 0, 64 - JSVAL_TAG_SHIFT);
-+ }
-+
-+ void tagValue(JSValueType type, Register payload, ValueOperand dest) {
-+ MOZ_ASSERT(type != JSVAL_TYPE_UNDEFINED && type != JSVAL_TYPE_NULL);
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(scratch != payload && scratch != dest.valueReg());
-+ tagValueWithScratch(type, payload, dest, scratch);
-+ }
-+ void tagValueWithScratch(JSValueType type, Register payload,
-+ ValueOperand dest, Register scratch) {
-+ movePtr(ImmShiftedTag(type), scratch);
-+ if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN ||
-+ type == JSVAL_TYPE_MAGIC) {
-+ if (payload != dest.valueReg()) {
-+ movePtr(payload, dest.valueReg());
-+ }
-+ as_rldicl(dest.valueReg(), dest.valueReg(), 0, 32);
-+ as_or_(dest.valueReg(), dest.valueReg(), scratch);
-+ } else {
-+ if (payload != dest.valueReg()) {
-+ movePtr(payload, dest.valueReg());
-+ }
-+ as_or_(dest.valueReg(), dest.valueReg(), scratch);
-+ }
-+ }
-+ void boxValue(JSValueType type, Register src, Register dest) {
-+ MOZ_ASSERT(src != dest);
-+ MOZ_ASSERT(type != JSVAL_TYPE_UNDEFINED && type != JSVAL_TYPE_NULL);
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ boxValueWithScratch(type, src, dest, scratch);
-+ }
-+ void boxValueWithScratch(JSValueType type, Register src, Register dest,
-+ Register scratch) {
-+ if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN ||
-+ type == JSVAL_TYPE_MAGIC) {
-+ as_rldicl(dest, src, 0, 32);
-+ movePtr(ImmShiftedTag(type), scratch);
-+ as_or_(dest, dest, scratch);
-+ } else {
-+ movePtr(ImmShiftedTag(type), scratch);
-+ xs_mr(dest, src);
-+ as_or_(dest, dest, scratch);
-+ }
-+ }
-+ void boxValue(Register type, Register src, Register dest) {
-+ MOZ_ASSERT(src != dest);
-+
-+#ifdef DEBUG
-+ Label done, isNullOrUndefined, isBoolean, isInt32OrMagic;
-+
-+ // Use ma_cmp + ma_b instead of asMasm().branch32() because
-+ // MacroAssembler is not yet fully defined at this point.
-+ Condition cond;
-+ cond = ma_cmp(type, Imm32(JSVAL_TYPE_NULL), Equal, true);
-+ ma_b(cond, &isNullOrUndefined);
-+ cond = ma_cmp(type, Imm32(JSVAL_TYPE_UNDEFINED), Equal, true);
-+ ma_b(cond, &isNullOrUndefined);
-+ cond = ma_cmp(type, Imm32(JSVAL_TYPE_BOOLEAN), Equal, true);
-+ ma_b(cond, &isBoolean);
-+ cond = ma_cmp(type, Imm32(JSVAL_TYPE_INT32), Equal, true);
-+ ma_b(cond, &isInt32OrMagic);
-+ cond = ma_cmp(type, Imm32(JSVAL_TYPE_MAGIC), Equal, true);
-+ ma_b(cond, &isInt32OrMagic);
-+ // GCThing types aren't supported, because as_rldicl truncates
-+ // payloads above UINT32_MAX.
-+ breakpoint();
-+ {
-+ bind(&isNullOrUndefined);
-+
-+ // Ensure no payload for null and undefined.
-+ cond = ma_cmp(src, ImmWord(0), Equal);
-+ ma_b(cond, &done);
-+ breakpoint();
-+ }
-+ {
-+ bind(&isBoolean);
-+
-+ // Ensure boolean values are either 0 or 1.
-+ cond = ma_cmp(src, Imm32(1), BelowOrEqual, true);
-+ ma_b(cond, &done);
-+ breakpoint();
-+ }
-+ {
-+ bind(&isInt32OrMagic);
-+
-+ // Ensure |src| is sign-extended.
-+ UseScratchRegisterScope debugTemps(*this);
-+ Register debugScratch = debugTemps.Acquire();
-+ as_extsw(debugScratch, src);
-+ cond = ma_cmp(src, debugScratch, Equal);
-+ ma_b(cond, &done);
-+ breakpoint();
-+ }
-+ bind(&done);
-+#endif
-+
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(scratch != dest && scratch != src && scratch != type);
-+ // Build tag: (type | JSVAL_TAG_MAX_DOUBLE) << JSVAL_TAG_SHIFT
-+ move32(Imm32(JSVAL_TAG_MAX_DOUBLE), scratch);
-+ as_or_(scratch, scratch, type);
-+ x_sldi(scratch, scratch, JSVAL_TAG_SHIFT);
-+ // Insert 32-bit payload.
-+ as_rldicl(dest, src, 0, 32);
-+ as_or_(dest, dest, scratch);
-+ }
-+
-+ // ===============================================================
-+ // Value store/load/push/pop
-+
-+ void storeValue(ValueOperand val, const Address& dest) {
-+ storePtr(val.valueReg(), dest);
-+ }
-+ void storeValue(ValueOperand val, const BaseIndex& dest) {
-+ storePtr(val.valueReg(), dest);
-+ }
-+ void storeValue(JSValueType type, Register reg, Address dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(dest.base != scratch);
-+ boxValue(type, reg, scratch);
-+ storePtr(scratch, dest);
-+ }
-+ void storeValue(JSValueType type, Register reg, BaseIndex dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(dest.base != scratch);
-+ boxValue(type, reg, scratch);
-+ storePtr(scratch, dest);
-+ }
-+ void storeValue(const Value& val, Address dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(dest.base != scratch);
-+ if (val.isGCThing()) {
-+ CodeOffset off = movWithPatch(ImmWord(val.asRawBits()), scratch);
-+ writeDataRelocation(off, val);
-+ } else {
-+ movePtr(ImmWord(val.asRawBits()), scratch);
-+ }
-+ storePtr(scratch, dest);
-+ }
-+ void storeValue(const Value& val, BaseIndex dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(dest.base != scratch);
-+ if (val.isGCThing()) {
-+ CodeOffset off = movWithPatch(ImmWord(val.asRawBits()), scratch);
-+ writeDataRelocation(off, val);
-+ } else {
-+ movePtr(ImmWord(val.asRawBits()), scratch);
-+ }
-+ storePtr(scratch, dest);
-+ }
-+ void storeValue(const Address& src, const Address& dest, Register temp) {
-+ loadPtr(src, temp);
-+ storePtr(temp, dest);
-+ }
-+
-+ void storePrivateValue(Register src, const Address& dest) {
-+ storePtr(src, dest);
-+ }
-+ void storePrivateValue(ImmGCPtr imm, const Address& dest) {
-+ storePtr(imm, dest);
-+ }
-+
-+ void loadValue(Address src, ValueOperand val) {
-+ loadPtr(src, val.valueReg());
-+ }
-+ void loadValue(const BaseIndex& src, ValueOperand val) {
-+ loadPtr(src, val.valueReg());
-+ }
-+ void loadUnalignedValue(const Address& src, ValueOperand dest) {
-+ loadPtr(src, dest.valueReg());
-+ }
-+
-+ void pushValue(ValueOperand val) { push(val.valueReg()); }
-+ void popValue(ValueOperand val) { pop(val.valueReg()); }
-+ void pushValue(const Value& val) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ if (val.isGCThing()) {
-+ CodeOffset off = movWithPatch(ImmWord(val.asRawBits()), scratch);
-+ writeDataRelocation(off, val);
-+ } else {
-+ movePtr(ImmWord(val.asRawBits()), scratch);
-+ }
-+ push(scratch);
-+ }
-+ void pushValue(JSValueType type, Register reg) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ boxValue(type, reg, scratch);
-+ push(scratch);
-+ }
-+ void pushValue(const Address& addr) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ loadPtr(addr, scratch);
-+ push(scratch);
-+ }
-+ void pushValue(const BaseIndex& addr, Register scratch) {
-+ loadPtr(addr, scratch);
-+ push(scratch);
-+ }
-+
-+ // ===============================================================
-+ // Load instructions
-+
-+ FaultingCodeOffset load8SignExtend(const Address& address, Register dest) {
-+ FaultingCodeOffset fco;
-+ if (is_intN(address.offset, 16)) {
-+ fco = FaultingCodeOffset(
-+ as_lbz(dest, address.base, address.offset).getOffset());
-+ } else {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(scratch != dest);
-+ movePtr(ImmWord(address.offset), scratch);
-+ fco =
-+ FaultingCodeOffset(as_lbzx(dest, address.base, scratch).getOffset());
-+ }
-+ as_extsb(dest, dest);
-+ return fco;
-+ }
-+ FaultingCodeOffset load8SignExtend(const BaseIndex& src, Register dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ computeScaledAddress(src, scratch);
-+ FaultingCodeOffset fco;
-+ if (is_intN(src.offset, 16)) {
-+ fco = FaultingCodeOffset(as_lbz(dest, scratch, src.offset).getOffset());
-+ } else {
-+ MOZ_ASSERT(scratch != dest);
-+ movePtr(ImmWord(src.offset), dest);
-+ fco = FaultingCodeOffset(as_lbzx(dest, scratch, dest).getOffset());
-+ }
-+ as_extsb(dest, dest);
-+ return fco;
-+ }
-+ FaultingCodeOffset load8ZeroExtend(const Address& address, Register dest) {
-+ if (is_intN(address.offset, 16)) {
-+ return FaultingCodeOffset(
-+ as_lbz(dest, address.base, address.offset).getOffset());
-+ }
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(scratch != dest);
-+ movePtr(ImmWord(address.offset), scratch);
-+ return FaultingCodeOffset(as_lbzx(dest, address.base, scratch).getOffset());
-+ }
-+ FaultingCodeOffset load8ZeroExtend(const BaseIndex& src, Register dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ computeScaledAddress(src, scratch);
-+ if (is_intN(src.offset, 16)) {
-+ return FaultingCodeOffset(as_lbz(dest, scratch, src.offset).getOffset());
-+ }
-+ MOZ_ASSERT(scratch != dest);
-+ movePtr(ImmWord(src.offset), dest);
-+ return FaultingCodeOffset(as_lbzx(dest, scratch, dest).getOffset());
-+ }
-+ FaultingCodeOffset load16SignExtend(const Address& address, Register dest) {
-+ if (is_intN(address.offset, 16)) {
-+ return FaultingCodeOffset(
-+ as_lha(dest, address.base, address.offset).getOffset());
-+ }
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(scratch != dest);
-+ movePtr(ImmWord(address.offset), scratch);
-+ return FaultingCodeOffset(as_lhax(dest, address.base, scratch).getOffset());
-+ }
-+ FaultingCodeOffset load16SignExtend(const BaseIndex& src, Register dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ computeScaledAddress(src, scratch);
-+ if (is_intN(src.offset, 16)) {
-+ return FaultingCodeOffset(as_lha(dest, scratch, src.offset).getOffset());
-+ }
-+ MOZ_ASSERT(scratch != dest);
-+ movePtr(ImmWord(src.offset), dest);
-+ return FaultingCodeOffset(as_lhax(dest, scratch, dest).getOffset());
-+ }
-+ template <typename S>
-+ void load16UnalignedSignExtend(const S& src, Register dest) {
-+ load16SignExtend(src, dest);
-+ }
-+ FaultingCodeOffset load16ZeroExtend(const Address& address, Register dest) {
-+ if (is_intN(address.offset, 16)) {
-+ return FaultingCodeOffset(
-+ as_lhz(dest, address.base, address.offset).getOffset());
-+ }
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(scratch != dest);
-+ movePtr(ImmWord(address.offset), scratch);
-+ return FaultingCodeOffset(as_lhzx(dest, address.base, scratch).getOffset());
-+ }
-+ FaultingCodeOffset load16ZeroExtend(const BaseIndex& src, Register dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ computeScaledAddress(src, scratch);
-+ if (is_intN(src.offset, 16)) {
-+ return FaultingCodeOffset(as_lhz(dest, scratch, src.offset).getOffset());
-+ }
-+ MOZ_ASSERT(scratch != dest);
-+ movePtr(ImmWord(src.offset), dest);
-+ return FaultingCodeOffset(as_lhzx(dest, scratch, dest).getOffset());
-+ }
-+ template <typename S>
-+ void load16UnalignedZeroExtend(const S& src, Register dest) {
-+ load16ZeroExtend(src, dest);
-+ }
-+
-+ FaultingCodeOffset load32(const Address& address, Register dest) {
-+ // lwa is DS-form (14-bit displacement × 4 = 16-bit-signed effective
-+ // range, 4-byte alignment required). lwax is X-form indexed, no
-+ // alignment constraint. Both sign-extend in one instruction; only
-+ // the misaligned 16-bit-fitting case still needs lwz + extsw.
-+ if (is_intN(address.offset, 16) && (address.offset & 3) == 0) {
-+ return FaultingCodeOffset(
-+ as_lwa(dest, address.base, address.offset).getOffset());
-+ }
-+ if (is_intN(address.offset, 16)) {
-+ FaultingCodeOffset fco(
-+ as_lwz(dest, address.base, address.offset).getOffset());
-+ as_extsw(dest, dest);
-+ return fco;
-+ }
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(scratch != dest);
-+ movePtr(ImmWord(address.offset), scratch);
-+ return FaultingCodeOffset(as_lwax(dest, address.base, scratch).getOffset());
-+ }
-+ FaultingCodeOffset load32(const BaseIndex& address, Register dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ computeScaledAddress(address, scratch);
-+ if (is_intN(address.offset, 16) && (address.offset & 3) == 0) {
-+ return FaultingCodeOffset(
-+ as_lwa(dest, scratch, address.offset).getOffset());
-+ }
-+ if (is_intN(address.offset, 16)) {
-+ FaultingCodeOffset fco(as_lwz(dest, scratch, address.offset).getOffset());
-+ as_extsw(dest, dest);
-+ return fco;
-+ }
-+ MOZ_ASSERT(scratch != dest);
-+ movePtr(ImmWord(address.offset), dest);
-+ return FaultingCodeOffset(as_lwax(dest, scratch, dest).getOffset());
-+ }
-+ void load32(AbsoluteAddress address, Register dest) {
-+ movePtr(ImmWord((uintptr_t)address.addr), dest);
-+ as_lwa(dest, dest, 0);
-+ }
-+ void load32(wasm::SymbolicAddress address, Register dest) {
-+ movePtr(address, dest);
-+ as_lwa(dest, dest, 0);
-+ }
-+ template <typename S>
-+ void load32Unaligned(const S& src, Register dest) {
-+ load32(src, dest);
-+ }
-+
-+ FaultingCodeOffset load64(const Address& address, Register64 dest) {
-+ return loadPtr(address, dest.reg);
-+ }
-+ FaultingCodeOffset load64(const BaseIndex& address, Register64 dest) {
-+ return loadPtr(address, dest.reg);
-+ }
-+ template <typename S>
-+ void load64Unaligned(const S& src, Register64 dest) {
-+ load64(src, dest);
-+ }
-+
-+ FaultingCodeOffset loadPtr(const Address& address, Register dest) {
-+ // as_ld (DS-form) requires 4-byte aligned offset.
-+ if (is_intN(address.offset, 16) && !(address.offset & 0x3)) {
-+ return FaultingCodeOffset(
-+ as_ld(dest, address.base, address.offset).getOffset());
-+ }
-+ if (HasPOWER10() && is_intN((intptr_t)address.offset, 34)) {
-+ return FaultingCodeOffset(
-+ as_pld(dest, address.base, (int64_t)address.offset, /*R=*/false)
-+ .getOffset());
-+ }
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ MOZ_ASSERT(scratch != dest);
-+ movePtr(ImmWord(address.offset), scratch);
-+ return FaultingCodeOffset(as_ldx(dest, address.base, scratch).getOffset());
-+ }
-+ FaultingCodeOffset loadPtr(const BaseIndex& src, Register dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ computeScaledAddress(src, scratch);
-+ if (is_intN(src.offset, 16) && !(src.offset & 0x3)) {
-+ return FaultingCodeOffset(as_ld(dest, scratch, src.offset).getOffset());
-+ }
-+ MOZ_ASSERT(scratch != dest);
-+ movePtr(ImmWord(src.offset), dest);
-+ return FaultingCodeOffset(as_ldx(dest, scratch, dest).getOffset());
-+ }
-+ void loadPtr(AbsoluteAddress address, Register dest) {
-+ movePtr(ImmWord((uintptr_t)address.addr), dest);
-+ as_ld(dest, dest, 0);
-+ }
-+ void loadPtr(wasm::SymbolicAddress address, Register dest) {
-+ movePtr(address, dest);
-+ as_ld(dest, dest, 0);
-+ }
-+
-+ void loadPrivate(const Address& address, Register dest) {
-+ loadPtr(address, dest);
-+ }
-+
-+ FaultingCodeOffset loadDouble(const Address& addr, FloatRegister dest) {
-+ if (is_intN(addr.offset, 16)) {
-+ return FaultingCodeOffset(
-+ as_lfd(dest, addr.base, addr.offset).getOffset());
-+ }
-+ if (HasPOWER10() && is_intN((intptr_t)addr.offset, 34)) {
-+ return FaultingCodeOffset(
-+ as_plfd(dest, addr.base, (int64_t)addr.offset, /*R=*/false)
-+ .getOffset());
-+ }
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(addr.offset), scratch);
-+ return FaultingCodeOffset(as_lfdx(dest, addr.base, scratch).getOffset());
-+ }
-+ FaultingCodeOffset loadDouble(const BaseIndex& src, FloatRegister dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ computeScaledAddress(src, scratch);
-+ if (is_intN(src.offset, 16)) {
-+ return FaultingCodeOffset(as_lfd(dest, scratch, src.offset).getOffset());
-+ }
-+ Register scratch2 = temps.Acquire();
-+ movePtr(ImmWord(src.offset), scratch2);
-+ return FaultingCodeOffset(as_lfdx(dest, scratch, scratch2).getOffset());
-+ }
-+ FaultingCodeOffset loadFloat32(const Address& addr, FloatRegister dest) {
-+ if (is_intN(addr.offset, 16)) {
-+ return FaultingCodeOffset(
-+ as_lfs(dest, addr.base, addr.offset).getOffset());
-+ }
-+ if (HasPOWER10() && is_intN((intptr_t)addr.offset, 34)) {
-+ return FaultingCodeOffset(
-+ as_plfs(dest, addr.base, (int64_t)addr.offset, /*R=*/false)
-+ .getOffset());
-+ }
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(addr.offset), scratch);
-+ return FaultingCodeOffset(as_lfsx(dest, addr.base, scratch).getOffset());
-+ }
-+ FaultingCodeOffset loadFloat32(const BaseIndex& src, FloatRegister dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ computeScaledAddress(src, scratch);
-+ if (is_intN(src.offset, 16)) {
-+ return FaultingCodeOffset(as_lfs(dest, scratch, src.offset).getOffset());
-+ }
-+ Register scratch2 = temps.Acquire();
-+ movePtr(ImmWord(src.offset), scratch2);
-+ return FaultingCodeOffset(as_lfsx(dest, scratch, scratch2).getOffset());
-+ }
-+ // Load a FP constant into `dest`.
-+ //
-+ // +0.0 / +0.0f: `xxlxor dest, dest, dest` (1 insn). No register clobbers.
-+ //
-+ // POWER9 non-zero: constant pool load via `addpcis r16, hi; lfd/lfs fD,
-+ // lo(r16); nop`. 2 real insns + nop, no LR clobber, no Return Address
-+ // Stack corruption. lfs auto-expands single-precision to double, so no
-+ // separate xscvspdpn step. Clobbers r16 (SavedScratchRegister). Pool
-+ // entries are shared across duplicate constants.
-+ //
-+ // POWER8 non-zero: inline `movePtr + mtvsrd(+xscvspdpn)` path. We do NOT
-+ // use the bcl-based pool path on POWER8: bcl clobbers LR and corrupts
-+ // the Return Address Stack, which causes catastrophic mispredicts in
-+ // hot FP-constant loops (~200x slowdown observed on cmp-bitselect.js).
-+ //
-+ // Precondition: must not be called inside an `enterNoPool` region when
-+ // HasPOWER9() is true (the pool path calls `allocEntry` which asserts
-+ // `inhibitPools_ == 0`). Audit-verified that no such call site exists
-+ // today; the POWER8 inline path is unaffected.
-+ void loadConstantDouble(double dp, FloatRegister dest) {
-+ if (mozilla::IsPositiveZero(dp)) {
-+ as_xxlxor(dest, dest, dest);
-+ return;
-+ }
-+ if (HasPOWER9()) {
-+ loadFromPoolFloat64(dest, dp);
-+ return;
-+ }
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ union {
-+ double d;
-+ uint64_t u;
-+ } u;
-+ u.d = dp;
-+ movePtr(ImmWord(u.u), scratch);
-+ as_mtvsrd(dest, scratch);
-+ }
-+ void loadConstantFloat32(float f, FloatRegister dest) {
-+ if (mozilla::IsPositiveZero(f)) {
-+ as_xxlxor(dest, dest, dest);
-+ return;
-+ }
-+ if (HasPOWER9()) {
-+ loadFromPoolFloat32(dest, f);
-+ return;
-+ }
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ union {
-+ float f;
-+ uint32_t u;
-+ } u;
-+ u.f = f;
-+ movePtr(ImmWord(u.u), scratch);
-+ x_sldi(scratch, scratch, 32);
-+ as_mtvsrd(dest, scratch);
-+ as_xscvspdpn(dest, dest);
-+ }
-+
-+ void notBoolean(const ValueOperand& val) {
-+ as_xori(val.valueReg(), val.valueReg(), 1);
-+ }
-+
-+ [[nodiscard]] Register extractTag(const Address& address, Register scratch) {
-+ loadPtr(address, scratch);
-+ x_srdi(scratch, scratch, JSVAL_TAG_SHIFT);
-+ return scratch;
-+ }
-+ [[nodiscard]] Register extractTag(const BaseIndex& address,
-+ Register scratch) {
-+ if (scratch == r0) {
-+ // r0 cannot be used as a base register in D-form/X-form loads,
-+ // so we need a separate temp for the intermediate address.
-+ UseScratchRegisterScope temps(*this);
-+ Register base = temps.Acquire();
-+ computeScaledAddress(address, base);
-+ loadPtr(Address(base, address.offset), scratch);
-+ } else {
-+ // scratch is a pool register (r11/r12) or another GPR that can
-+ // serve as a base register, so reuse it for the address computation.
-+ computeScaledAddress(address, scratch);
-+ loadPtr(Address(scratch, address.offset), scratch);
-+ }
-+ x_srdi(scratch, scratch, JSVAL_TAG_SHIFT);
-+ return scratch;
-+ }
-+ [[nodiscard]] Register extractTag(const ValueOperand& value,
-+ Register scratch) {
-+ splitTag(value, scratch);
-+ return scratch;
-+ }
-+
-+ [[nodiscard]] Register extractObject(const Address& address,
-+ Register scratch) {
-+ loadPtr(address, scratch);
-+ as_rldicl(scratch, scratch, 0, 64 - JSVAL_TAG_SHIFT);
-+ return scratch;
-+ }
-+ [[nodiscard]] Register extractObject(const ValueOperand& value,
-+ Register scratch) {
-+ unboxObject(value, scratch);
-+ return scratch;
-+ }
-+ [[nodiscard]] Register extractInt32(const ValueOperand& value,
-+ Register scratch) {
-+ unboxInt32(value, scratch);
-+ return scratch;
-+ }
-+ [[nodiscard]] Register extractString(const ValueOperand& value,
-+ Register scratch) {
-+ unboxString(value, scratch);
-+ return scratch;
-+ }
-+ [[nodiscard]] Register extractSymbol(const ValueOperand& value,
-+ Register scratch) {
-+ unboxSymbol(value, scratch);
-+ return scratch;
-+ }
-+ [[nodiscard]] Register extractBoolean(const ValueOperand& value,
-+ Register scratch) {
-+ unboxBoolean(value, scratch);
-+ return scratch;
-+ }
-+
-+ void testObjectSet(Condition cond, const ValueOperand& value, Register dest) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ {
-+ UseScratchRegisterScope temps(*this);
-+ Register tag = temps.Acquire();
-+ splitTag(value, tag);
-+ uint32_t t = JSVAL_TAG_OBJECT;
-+ as_xoris(tag, tag, t >> 16);
-+ as_cmplwi(tag, t & 0xFFFF);
-+ }
-+ ma_cmp_set(dest, cond);
-+ }
-+ void testUndefinedSet(Condition cond, const ValueOperand& value,
-+ Register dest) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ {
-+ UseScratchRegisterScope temps(*this);
-+ Register tag = temps.Acquire();
-+ splitTag(value, tag);
-+ // Use xoris+cmplwi to compare without a second scratch.
-+ uint32_t t = JSVAL_TAG_UNDEFINED;
-+ as_xoris(tag, tag, t >> 16);
-+ as_cmplwi(tag, t & 0xFFFF);
-+ }
-+ ma_cmp_set(dest, cond);
-+ }
-+ void testNullSet(Condition cond, const ValueOperand& value, Register dest) {
-+ MOZ_ASSERT(cond == Equal || cond == NotEqual);
-+ {
-+ UseScratchRegisterScope temps(*this);
-+ Register tag = temps.Acquire();
-+ splitTag(value, tag);
-+ uint32_t t = JSVAL_TAG_NULL;
-+ as_xoris(tag, tag, t >> 16);
-+ as_cmplwi(tag, t & 0xFFFF);
-+ }
-+ ma_cmp_set(dest, cond);
-+ }
-+
-+ BufferOffset ret() {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ as_ld(scratch, StackPointer, 0);
-+ as_addi(StackPointer, StackPointer, 8);
-+ xs_mtlr(scratch);
-+ return as_blr();
-+ }
-+
-+ void j(Label* dest) { jump(dest); }
-+
-+ void getWasmAnyRefGCThingChunk(Register anyref, Register dest) {
-+ static_assert(js::gc::ChunkShift == 20);
-+ as_rldicr(dest, anyref, 0, 43);
-+ }
-+
-+ template <typename T>
-+ void loadUnboxedValue(const T& address, MIRType type, AnyRegister dest) {
-+ if (dest.isFloat()) {
-+ loadInt32OrDouble(address, dest.fpu());
-+ } else {
-+ unboxNonDouble(address, dest.gpr(), ValueTypeFromMIRType(type));
-+ }
-+ }
-+
-+ void loadInt32OrDouble(const Address& src, FloatRegister dest);
-+ void loadInt32OrDouble(const BaseIndex& addr, FloatRegister dest);
-+
-+ // ===============================================================
-+ // Store instructions
-+
-+ FaultingCodeOffset store8(Register src, const Address& address) {
-+ if (is_intN(address.offset, 16)) {
-+ return FaultingCodeOffset(
-+ as_stb(src, address.base, address.offset).getOffset());
-+ }
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(address.offset), scratch);
-+ return FaultingCodeOffset(as_stbx(src, address.base, scratch).getOffset());
-+ }
-+ FaultingCodeOffset store8(Register src, const BaseIndex& address) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ computeScaledAddress(address, scratch);
-+ if (is_intN(address.offset, 16)) {
-+ return FaultingCodeOffset(
-+ as_stb(src, scratch, address.offset).getOffset());
-+ }
-+ Register scratch2 = temps.Acquire();
-+ movePtr(ImmWord(address.offset), scratch2);
-+ return FaultingCodeOffset(as_stbx(src, scratch, scratch2).getOffset());
-+ }
-+ void store8(Imm32 imm, const Address& address) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ move32(imm, scratch);
-+ store8(scratch, address);
-+ }
-+ void store8(Imm32 imm, const BaseIndex& address) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ move32(imm, scratch);
-+ store8(scratch, address);
-+ }
-+
-+ FaultingCodeOffset store16(Register src, const Address& address) {
-+ if (is_intN(address.offset, 16)) {
-+ return FaultingCodeOffset(
-+ as_sth(src, address.base, address.offset).getOffset());
-+ }
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(address.offset), scratch);
-+ return FaultingCodeOffset(as_sthx(src, address.base, scratch).getOffset());
-+ }
-+ FaultingCodeOffset store16(Register src, const BaseIndex& address) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ computeScaledAddress(address, scratch);
-+ if (is_intN(address.offset, 16)) {
-+ return FaultingCodeOffset(
-+ as_sth(src, scratch, address.offset).getOffset());
-+ }
-+ Register scratch2 = temps.Acquire();
-+ movePtr(ImmWord(address.offset), scratch2);
-+ return FaultingCodeOffset(as_sthx(src, scratch, scratch2).getOffset());
-+ }
-+ void store16(Imm32 imm, const Address& address) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ move32(imm, scratch);
-+ store16(scratch, address);
-+ }
-+ void store16(Imm32 imm, const BaseIndex& address) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ move32(imm, scratch);
-+ store16(scratch, address);
-+ }
-+ template <typename T>
-+ void store16Unaligned(Register src, const T& dest) {
-+ store16(src, dest);
-+ }
-+
-+ FaultingCodeOffset store32(Register src, const Address& address) {
-+ if (is_intN(address.offset, 16)) {
-+ return FaultingCodeOffset(
-+ as_stw(src, address.base, address.offset).getOffset());
-+ }
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(address.offset), scratch);
-+ return FaultingCodeOffset(as_stwx(src, address.base, scratch).getOffset());
-+ }
-+ FaultingCodeOffset store32(Register src, const BaseIndex& address) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ computeScaledAddress(address, scratch);
-+ if (is_intN(address.offset, 16)) {
-+ return FaultingCodeOffset(
-+ as_stw(src, scratch, address.offset).getOffset());
-+ }
-+ Register scratch2 = temps.Acquire();
-+ movePtr(ImmWord(address.offset), scratch2);
-+ return FaultingCodeOffset(as_stwx(src, scratch, scratch2).getOffset());
-+ }
-+ void store32(Register src, AbsoluteAddress address) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord((uintptr_t)address.addr), scratch);
-+ as_stw(src, scratch, 0);
-+ }
-+ void store32(Imm32 src, const Address& address) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ move32(src, scratch);
-+ store32(scratch, address);
-+ }
-+ void store32(Imm32 src, const BaseIndex& address) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ move32(src, scratch);
-+ store32(scratch, address);
-+ }
-+ template <typename T>
-+ void store32Unaligned(Register src, const T& dest) {
-+ store32(src, dest);
-+ }
-+
-+ void store64(Imm64 imm, Address address) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(imm.value), scratch);
-+ storePtr(scratch, address);
-+ }
-+ void store64(Imm64 imm, const BaseIndex& address) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(imm.value), scratch);
-+ storePtr(scratch, address);
-+ }
-+ FaultingCodeOffset store64(Register64 src, Address address) {
-+ return storePtr(src.reg, address);
-+ }
-+ FaultingCodeOffset store64(Register64 src, const BaseIndex& address) {
-+ return storePtr(src.reg, address);
-+ }
-+ template <typename T>
-+ void store64Unaligned(Register64 src, const T& dest) {
-+ store64(src, dest);
-+ }
-+
-+ template <typename T>
-+ void storePtr(ImmWord imm, T address) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ movePtr(imm, scratch);
-+ storePtr(scratch, address);
-+ }
-+ template <typename T>
-+ void storePtr(ImmPtr imm, T address) {
-+ storePtr(ImmWord(uintptr_t(imm.value)), address);
-+ }
-+ template <typename T>
-+ void storePtr(ImmGCPtr imm, T address) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ movePtr(imm, scratch);
-+ storePtr(scratch, address);
-+ }
-+ void storePtr(Register src, AbsoluteAddress dest) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord((uintptr_t)dest.addr), scratch);
-+ as_std(src, scratch, 0);
-+ }
-+ FaultingCodeOffset storePtr(Register src, const Address& address) {
-+ // as_std (DS-form) requires 4-byte aligned offset.
-+ if (is_intN(address.offset, 16) && !(address.offset & 0x3)) {
-+ return FaultingCodeOffset(
-+ as_std(src, address.base, address.offset).getOffset());
-+ }
-+ if (HasPOWER10() && is_intN((intptr_t)address.offset, 34)) {
-+ return FaultingCodeOffset(
-+ as_pstd(src, address.base, (int64_t)address.offset, /*R=*/false)
-+ .getOffset());
-+ }
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ movePtr(ImmWord(address.offset), scratch);
-+ return FaultingCodeOffset(as_stdx(src, address.base, scratch).getOffset());
-+ }
-+ FaultingCodeOffset storePtr(Register src, const BaseIndex& address) {
-+ UseScratchRegisterScope temps(*this);
-+ Register scratch = temps.Acquire();
-+ computeScaledAddress(address, scratch);
-+ if (is_intN(address.offset, 16) && !(address.offset & 0x3)) {
-+ return FaultingCodeOffset(
-+ as_std(src, scratch, address.offset).getOffset());
-+ }
-+ Register scratch2 = temps.Acquire();
-+ movePtr(ImmWord(address.offset), scratch2);
-+ return FaultingCodeOffset(as_stdx(src, scratch, scratch2).getOffset());
-+ }
-+
-+ // ===============================================================
-+ // Misc
-+
-+ void handleFailureWithHandlerTail(Label* profilerExitTail, Label* bailoutTail,
-+ uint32_t* returnValueCheckOffset);
-+
-+ inline void incrementInt32Value(const Address& addr);
-+
-+ void zeroDouble(FloatRegister reg) { as_xxlxor(reg, reg, reg); }
-+
-+ void writeCodePointer(CodeLabel* label) {
-+ label->patchAt()->bind(currentOffset());
-+ label->setLinkMode(CodeLabel::RawPointer);
-+ m_buffer.ensureSpace(sizeof(void*));
-+ writeInst(-1);
-+ writeInst(-1);
-+ }
-+ void writeDataRelocation(const Value& val) {
-+ if (val.isGCThing()) {
-+ gc::Cell* cell = val.toGCThing();
-+ if (cell && gc::IsInsideNursery(cell)) {
-+ embedsNurseryPointers_ = true;
-+ }
-+ dataRelocations_.writeUnsigned(currentOffset());
-+ }
-+ }
-+ void writeDataRelocation(CodeOffset off, const Value& val) {
-+ if (val.isGCThing()) {
-+ gc::Cell* cell = val.toGCThing();
-+ if (cell && gc::IsInsideNursery(cell)) {
-+ embedsNurseryPointers_ = true;
-+ }
-+ dataRelocations_.writeUnsigned(off.offset());
-+ }
-+ }
-+
-+ CodeOffset toggledJump(Label* label) {
-+ CodeOffset ret(nextOffset().getOffset());
-+ jump(label);
-+ return ret;
-+ }
-+ CodeOffset toggledCall(JitCode* target, bool enabled);
-+ // 8 instructions for load64 + mtctr + bctrl = 10 instructions total.
-+ static size_t ToggledCallSize(uint8_t* code) { return 10 * sizeof(uint32_t); }
-+
-+ void checkStackAlignment() {}
-+
-+ static void calculateAlignedStackPointer(void** stackPointer) {
-+ *stackPointer = reinterpret_cast<void*>((uintptr_t(*stackPointer)) &
-+ ~(ABIStackAlignment - 1));
-+ }
-+
-+ void lea(Operand addr, Register dest) {
-+ // x86-ism; on PPC, compute effective address manually.
-+ MOZ_CRASH("PPC64: lea not supported; use computeEffectiveAddress");
-+ }
-+
-+ void abiret() { as_blr(); }
-+
-+ void profilerEnterFrame(Register framePtr, Register scratch);
-+ void profilerExitFrame();
-+
-+ void outOfLineWasmTruncateToInt32Check(
-+ FloatRegister input, Register output, MIRType fromType, TruncFlags flags,
-+ Label* rejoin, const wasm::TrapSiteDesc& trapSiteDesc);
-+ void outOfLineWasmTruncateToInt64Check(
-+ FloatRegister input, Register64 output, MIRType fromType,
-+ TruncFlags flags, Label* rejoin, const wasm::TrapSiteDesc& trapSiteDesc);
-+
-+ void wasmLoadImpl(const wasm::MemoryAccessDesc& access, Register memoryBase,
-+ Register ptr, Register ptrScratch, AnyRegister output);
-+ void wasmStoreImpl(const wasm::MemoryAccessDesc& access, AnyRegister value,
-+ Register memoryBase, Register ptr, Register ptrScratch);
-+ void wasmLoadI64Impl(const wasm::MemoryAccessDesc& access,
-+ Register memoryBase, Register ptr, Register ptrScratch,
-+ Register64 output);
-+ void wasmStoreI64Impl(const wasm::MemoryAccessDesc& access, Register64 value,
-+ Register memoryBase, Register ptr, Register ptrScratch);
-+
-+ // Last-byte probing load to enforce wasm-spec atomicity for multi-byte
-+ // wasm accesses on POWER ISA. POWER permits unaligned page-spanning
-+ // accesses to commit one half before the other half takes a DSI; wasm
-+ // requires atomicity. Touching the last byte of the upcoming access
-+ // with a 1-byte lbzx triggers SIGSEGV (→ wasm trap via the signal
-+ // handler) before the actual access executes — POWER's precise-
-+ // interrupt model guarantees the subsequent access is never
-+ // architecturally executed if the probe faults.
-+ //
-+ // Wasm linear memory is one contiguous mapped region followed by an
-+ // mprotect'd guard, so last-byte-mapped ⇒ all-bytes-mapped, and a
-+ // single-byte probe is sufficient regardless of access size.
-+ //
-+ // No-op when HasPOWER9() (real POWER9/POWER10 silicon handles page-
-+ // spanning unaligned stores atomically at the µarch level), and when
-+ // access size is 1. Never called on the atomic path: atomic ops are
-+ // naturally aligned per wasm spec + ISA-enforced lwarx alignment, so
-+ // they cannot span pages; misaligned atomics take a precise SIGBUS
-+ // before any commit.
-+ //
-+ // 2 instructions when emitted (addi + lbzx).
-+ void wasmProbeLastByte(const wasm::MemoryAccessDesc& access,
-+ Register memoryBase, Register ptr);
-+};
-+
-+typedef MacroAssemblerPPC64Compat MacroAssemblerSpecific;
-+
-+} // namespace jit
-+} // namespace js
-+
-+#endif /* jit_ppc64_MacroAssembler_ppc64_h */
-diff --git a/js/src/jit/ppc64/MoveEmitter-ppc64.cpp b/js/src/jit/ppc64/MoveEmitter-ppc64.cpp
-new file mode 100644
-index 000000000000..989d3f61f121
---- /dev/null
-+++ b/js/src/jit/ppc64/MoveEmitter-ppc64.cpp
-@@ -0,0 +1,357 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#include "jit/ppc64/MoveEmitter-ppc64.h"
-+
-+#include "jit/MacroAssembler-inl.h"
-+
-+using namespace js;
-+using namespace js::jit;
-+
-+void MoveEmitterPPC64::breakCycle(const MoveOperand& from,
-+ const MoveOperand& to, MoveOp::Type type,
-+ uint32_t slotId) {
-+ switch (type) {
-+ case MoveOp::FLOAT32:
-+ if (to.isMemory()) {
-+ ScratchFloat32Scope fpscratch32(masm);
-+ masm.loadFloat32(getAdjustedAddress(to), fpscratch32);
-+ masm.storeFloat32(fpscratch32, cycleSlot(slotId));
-+ } else {
-+ masm.storeFloat32(to.floatReg(), cycleSlot(slotId));
-+ }
-+ break;
-+ case MoveOp::DOUBLE:
-+ if (to.isMemory()) {
-+ ScratchDoubleScope fpscratch64(masm);
-+ masm.loadDouble(getAdjustedAddress(to), fpscratch64);
-+ masm.storeDouble(fpscratch64, cycleSlot(slotId));
-+ } else {
-+ masm.storeDouble(to.floatReg(), cycleSlot(slotId));
-+ }
-+ break;
-+ case MoveOp::INT32:
-+ if (to.isMemory()) {
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.load32(getAdjustedAddress(to), scratch);
-+ masm.store32(scratch, cycleSlot(0));
-+ } else {
-+ masm.store32(to.reg(), cycleSlot(0));
-+ }
-+ break;
-+ case MoveOp::GENERAL:
-+ if (to.isMemory()) {
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.loadPtr(getAdjustedAddress(to), scratch);
-+ masm.storePtr(scratch, cycleSlot(0));
-+ } else {
-+ masm.storePtr(to.reg(), cycleSlot(0));
-+ }
-+ break;
-+ case MoveOp::SIMD128:
-+ if (to.isMemory()) {
-+ ScratchSimd128Scope scratch(masm);
-+ masm.loadUnalignedSimd128(getAdjustedAddress(to), scratch);
-+ masm.storeUnalignedSimd128(scratch, cycleSlot(slotId));
-+ } else {
-+ masm.storeUnalignedSimd128(to.floatReg(), cycleSlot(slotId));
-+ }
-+ break;
-+ default:
-+ MOZ_CRASH("Unexpected move type");
-+ }
-+}
-+
-+void MoveEmitterPPC64::completeCycle(const MoveOperand& from,
-+ const MoveOperand& to, MoveOp::Type type,
-+ uint32_t slotId) {
-+ switch (type) {
-+ case MoveOp::FLOAT32:
-+ if (to.isMemory()) {
-+ ScratchFloat32Scope fpscratch32(masm);
-+ masm.loadFloat32(cycleSlot(slotId), fpscratch32);
-+ masm.storeFloat32(fpscratch32, getAdjustedAddress(to));
-+ } else {
-+ masm.loadFloat32(cycleSlot(slotId), to.floatReg());
-+ }
-+ break;
-+ case MoveOp::DOUBLE:
-+ if (to.isMemory()) {
-+ ScratchDoubleScope fpscratch64(masm);
-+ masm.loadDouble(cycleSlot(slotId), fpscratch64);
-+ masm.storeDouble(fpscratch64, getAdjustedAddress(to));
-+ } else {
-+ masm.loadDouble(cycleSlot(slotId), to.floatReg());
-+ }
-+ break;
-+ case MoveOp::INT32:
-+ MOZ_ASSERT(slotId == 0);
-+ if (to.isMemory()) {
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.load32(cycleSlot(0), scratch);
-+ masm.store32(scratch, getAdjustedAddress(to));
-+ } else {
-+ masm.load32(cycleSlot(0), to.reg());
-+ }
-+ break;
-+ case MoveOp::GENERAL:
-+ MOZ_ASSERT(slotId == 0);
-+ if (to.isMemory()) {
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.loadPtr(cycleSlot(0), scratch);
-+ masm.storePtr(scratch, getAdjustedAddress(to));
-+ } else {
-+ masm.loadPtr(cycleSlot(0), to.reg());
-+ }
-+ break;
-+ case MoveOp::SIMD128:
-+ if (to.isMemory()) {
-+ ScratchSimd128Scope scratch(masm);
-+ masm.loadUnalignedSimd128(cycleSlot(slotId), scratch);
-+ masm.storeUnalignedSimd128(scratch, getAdjustedAddress(to));
-+ } else {
-+ masm.loadUnalignedSimd128(cycleSlot(slotId), to.floatReg());
-+ }
-+ break;
-+ default:
-+ MOZ_CRASH("Unexpected move type");
-+ }
-+}
-+
-+void MoveEmitterPPC64::emit(const MoveResolver& moves) {
-+ if (moves.numCycles()) {
-+ // SpillSlotSize must be wide enough for the widest cycled value
-+ // (SIMD128 = 16 bytes). The stride below assumes the same. See
-+ // Architecture-ppc64.h for the rationale.
-+ static_assert(SpillSlotSize == 16);
-+ masm.reserveStack(moves.numCycles() * SpillSlotSize);
-+ pushedAtCycle_ = masm.framePushed();
-+ }
-+
-+ for (size_t i = 0; i < moves.numMoves(); i++) {
-+ emit(moves.getMove(i));
-+ }
-+}
-+
-+Address MoveEmitterPPC64::cycleSlot(uint32_t slot, uint32_t subslot) const {
-+ int32_t offset = masm.framePushed() - pushedAtCycle_;
-+ // Stride must match the per-cycle reservation in emit(); using a
-+ // narrower stride causes adjacent SIMD128 slots to overlap.
-+ return Address(StackPointer, offset + slot * SpillSlotSize + subslot);
-+}
-+
-+int32_t MoveEmitterPPC64::getAdjustedOffset(const MoveOperand& operand) {
-+ MOZ_ASSERT(operand.isMemoryOrEffectiveAddress());
-+ if (operand.base() != StackPointer) {
-+ return operand.disp();
-+ }
-+
-+ return operand.disp() + masm.framePushed() - pushedAtStart_;
-+}
-+
-+Address MoveEmitterPPC64::getAdjustedAddress(const MoveOperand& operand) {
-+ return Address(operand.base(), getAdjustedOffset(operand));
-+}
-+
-+void MoveEmitterPPC64::emitMove(const MoveOperand& from,
-+ const MoveOperand& to) {
-+ if (from.isGeneralReg()) {
-+ if (to.isGeneralReg()) {
-+ masm.movePtr(from.reg(), to.reg());
-+ } else if (to.isMemory()) {
-+ masm.storePtr(from.reg(), getAdjustedAddress(to));
-+ } else {
-+ MOZ_CRASH("Invalid emitMove arguments.");
-+ }
-+ } else if (from.isMemory()) {
-+ if (to.isGeneralReg()) {
-+ masm.loadPtr(getAdjustedAddress(from), to.reg());
-+ } else if (to.isMemory()) {
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.loadPtr(getAdjustedAddress(from), scratch);
-+ masm.storePtr(scratch, getAdjustedAddress(to));
-+ } else {
-+ MOZ_CRASH("Invalid emitMove arguments.");
-+ }
-+ } else if (from.isEffectiveAddress()) {
-+ if (to.isGeneralReg()) {
-+ masm.computeEffectiveAddress(getAdjustedAddress(from), to.reg());
-+ } else if (to.isMemory()) {
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.computeEffectiveAddress(getAdjustedAddress(from), scratch);
-+ masm.storePtr(scratch, getAdjustedAddress(to));
-+ } else {
-+ MOZ_CRASH("Invalid emitMove arguments.");
-+ }
-+ } else {
-+ MOZ_CRASH("Invalid emitMove arguments.");
-+ }
-+}
-+
-+void MoveEmitterPPC64::emitInt32Move(const MoveOperand& from,
-+ const MoveOperand& to) {
-+ if (from.isGeneralReg()) {
-+ if (to.isGeneralReg()) {
-+ masm.move32(from.reg(), to.reg());
-+ } else if (to.isMemory()) {
-+ masm.store32(from.reg(), getAdjustedAddress(to));
-+ } else {
-+ MOZ_CRASH("Invalid emitInt32Move arguments.");
-+ }
-+ } else if (from.isMemory()) {
-+ if (to.isGeneralReg()) {
-+ masm.load32(getAdjustedAddress(from), to.reg());
-+ } else if (to.isMemory()) {
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.load32(getAdjustedAddress(from), scratch);
-+ masm.store32(scratch, getAdjustedAddress(to));
-+ } else {
-+ MOZ_CRASH("Invalid emitInt32Move arguments.");
-+ }
-+ } else if (from.isEffectiveAddress()) {
-+ if (to.isGeneralReg()) {
-+ masm.computeEffectiveAddress(getAdjustedAddress(from), to.reg());
-+ } else if (to.isMemory()) {
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.computeEffectiveAddress(getAdjustedAddress(from), scratch);
-+ masm.store32(scratch, getAdjustedAddress(to));
-+ } else {
-+ MOZ_CRASH("Invalid emitInt32Move arguments.");
-+ }
-+ } else {
-+ MOZ_CRASH("Invalid emitInt32Move arguments.");
-+ }
-+}
-+
-+void MoveEmitterPPC64::emitFloat32Move(const MoveOperand& from,
-+ const MoveOperand& to) {
-+ if (from.isFloatReg()) {
-+ if (to.isFloatReg()) {
-+ masm.moveFloat32(from.floatReg(), to.floatReg());
-+ } else {
-+ MOZ_ASSERT(to.isMemory());
-+ masm.storeFloat32(from.floatReg(), getAdjustedAddress(to));
-+ }
-+ } else if (to.isFloatReg()) {
-+ MOZ_ASSERT(from.isMemory());
-+ masm.loadFloat32(getAdjustedAddress(from), to.floatReg());
-+ } else {
-+ MOZ_ASSERT(from.isMemory());
-+ MOZ_ASSERT(to.isMemory());
-+ ScratchFloat32Scope fpscratch32(masm);
-+ masm.loadFloat32(getAdjustedAddress(from), fpscratch32);
-+ masm.storeFloat32(fpscratch32, getAdjustedAddress(to));
-+ }
-+}
-+
-+void MoveEmitterPPC64::emitDoubleMove(const MoveOperand& from,
-+ const MoveOperand& to) {
-+ if (from.isFloatReg()) {
-+ if (to.isFloatReg()) {
-+ masm.moveDouble(from.floatReg(), to.floatReg());
-+ } else if (to.isGeneralReg()) {
-+ // FPR -> GPR: use mfvsrd directly.
-+ masm.as_mfvsrd(to.reg(), from.floatReg());
-+ } else {
-+ MOZ_ASSERT(to.isMemory());
-+ masm.storeDouble(from.floatReg(), getAdjustedAddress(to));
-+ }
-+ } else if (to.isFloatReg()) {
-+ if (from.isMemory()) {
-+ masm.loadDouble(getAdjustedAddress(from), to.floatReg());
-+ } else {
-+ // GPR -> FPR: use mtvsrd directly.
-+ masm.as_mtvsrd(to.floatReg(), from.reg());
-+ }
-+ } else {
-+ MOZ_ASSERT(from.isMemory());
-+ MOZ_ASSERT(to.isMemory());
-+ ScratchDoubleScope fpscratch64(masm);
-+ masm.loadDouble(getAdjustedAddress(from), fpscratch64);
-+ masm.storeDouble(fpscratch64, getAdjustedAddress(to));
-+ }
-+}
-+
-+void MoveEmitterPPC64::emitSimd128Move(const MoveOperand& from,
-+ const MoveOperand& to) {
-+ if (from.isFloatReg()) {
-+ if (to.isFloatReg()) {
-+ masm.moveSimd128(from.floatReg(), to.floatReg());
-+ } else {
-+ MOZ_ASSERT(to.isMemory());
-+ masm.storeUnalignedSimd128(from.floatReg(), getAdjustedAddress(to));
-+ }
-+ } else if (to.isFloatReg()) {
-+ MOZ_ASSERT(from.isMemory());
-+ masm.loadUnalignedSimd128(getAdjustedAddress(from), to.floatReg());
-+ } else {
-+ MOZ_ASSERT(from.isMemory());
-+ MOZ_ASSERT(to.isMemory());
-+ ScratchSimd128Scope scratch(masm);
-+ masm.loadUnalignedSimd128(getAdjustedAddress(from), scratch);
-+ masm.storeUnalignedSimd128(scratch, getAdjustedAddress(to));
-+ }
-+}
-+
-+void MoveEmitterPPC64::emit(const MoveOp& move) {
-+ const MoveOperand& from = move.from();
-+ const MoveOperand& to = move.to();
-+
-+ if (move.isCycleEnd() && move.isCycleBegin()) {
-+ breakCycle(from, to, move.endCycleType(), move.cycleBeginSlot());
-+ completeCycle(from, to, move.type(), move.cycleEndSlot());
-+ return;
-+ }
-+
-+ if (move.isCycleEnd()) {
-+ MOZ_ASSERT(inCycle_);
-+ completeCycle(from, to, move.type(), move.cycleEndSlot());
-+ MOZ_ASSERT(inCycle_ > 0);
-+ inCycle_--;
-+ return;
-+ }
-+
-+ if (move.isCycleBegin()) {
-+ breakCycle(from, to, move.endCycleType(), move.cycleBeginSlot());
-+ inCycle_++;
-+ }
-+
-+ switch (move.type()) {
-+ case MoveOp::FLOAT32:
-+ emitFloat32Move(from, to);
-+ break;
-+ case MoveOp::DOUBLE:
-+ emitDoubleMove(from, to);
-+ break;
-+ case MoveOp::SIMD128:
-+ emitSimd128Move(from, to);
-+ break;
-+ case MoveOp::INT32:
-+ emitInt32Move(from, to);
-+ break;
-+ case MoveOp::GENERAL:
-+ emitMove(from, to);
-+ break;
-+ default:
-+ MOZ_CRASH("Unexpected move type");
-+ }
-+}
-+
-+void MoveEmitterPPC64::assertDone() { MOZ_ASSERT(inCycle_ == 0); }
-+
-+void MoveEmitterPPC64::finish() {
-+ assertDone();
-+
-+ masm.freeStack(masm.framePushed() - pushedAtStart_);
-+}
-diff --git a/js/src/jit/ppc64/MoveEmitter-ppc64.h b/js/src/jit/ppc64/MoveEmitter-ppc64.h
-new file mode 100644
-index 000000000000..a9faa34de6bb
---- /dev/null
-+++ b/js/src/jit/ppc64/MoveEmitter-ppc64.h
-@@ -0,0 +1,64 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#ifndef jit_ppc64_MoveEmitter_ppc64_h
-+#define jit_ppc64_MoveEmitter_ppc64_h
-+
-+#include "jit/MacroAssembler.h"
-+#include "jit/MoveResolver.h"
-+
-+namespace js {
-+namespace jit {
-+
-+class MoveEmitterPPC64 {
-+ void emitDoubleMove(const MoveOperand& from, const MoveOperand& to);
-+ void emitSimd128Move(const MoveOperand& from, const MoveOperand& to);
-+ void breakCycle(const MoveOperand& from, const MoveOperand& to,
-+ MoveOp::Type type, uint32_t slot);
-+ void completeCycle(const MoveOperand& from, const MoveOperand& to,
-+ MoveOp::Type type, uint32_t slot);
-+
-+ protected:
-+ uint32_t inCycle_;
-+ MacroAssembler& masm;
-+
-+ uint32_t pushedAtStart_;
-+
-+ int32_t pushedAtCycle_;
-+
-+ void assertDone();
-+ Address cycleSlot(uint32_t slot, uint32_t subslot = 0) const;
-+ int32_t getAdjustedOffset(const MoveOperand& operand);
-+ Address getAdjustedAddress(const MoveOperand& operand);
-+
-+ void emitMove(const MoveOperand& from, const MoveOperand& to);
-+ void emitInt32Move(const MoveOperand& from, const MoveOperand& to);
-+ void emitFloat32Move(const MoveOperand& from, const MoveOperand& to);
-+ void emit(const MoveOp& move);
-+
-+ public:
-+ explicit MoveEmitterPPC64(MacroAssembler& masm)
-+ : inCycle_(0),
-+ masm(masm),
-+ pushedAtStart_(masm.framePushed()),
-+ pushedAtCycle_(-1) {}
-+
-+ ~MoveEmitterPPC64() { assertDone(); }
-+
-+ void emit(const MoveResolver& moves);
-+ void finish();
-+ // setScratchRegister is part of the cross-arch MoveEmitter interface
-+ // but we never spill, so there's no scratch to set. No-op kept for
-+ // shared-code compatibility.
-+ void setScratchRegister(Register reg) {}
-+};
-+
-+typedef MoveEmitterPPC64 MoveEmitter;
-+
-+} // namespace jit
-+} // namespace js
-+
-+#endif /* jit_ppc64_MoveEmitter_ppc64_h */
-diff --git a/js/src/jit/ppc64/SharedICHelpers-ppc64-inl.h b/js/src/jit/ppc64/SharedICHelpers-ppc64-inl.h
-new file mode 100644
-index 000000000000..aa874dfd6732
---- /dev/null
-+++ b/js/src/jit/ppc64/SharedICHelpers-ppc64-inl.h
-@@ -0,0 +1,83 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#ifndef jit_ppc64_SharedICHelpers_ppc64_inl_h
-+#define jit_ppc64_SharedICHelpers_ppc64_inl_h
-+
-+#include "jit/BaselineFrame.h"
-+#include "jit/SharedICHelpers.h"
-+
-+#include "jit/MacroAssembler-inl.h"
-+
-+namespace js {
-+namespace jit {
-+
-+inline void EmitBaselineTailCallVM(TrampolinePtr target, MacroAssembler& masm,
-+ uint32_t argSize) {
-+#ifdef DEBUG
-+ Register scratch = R2.scratchReg();
-+
-+ // Compute frame size.
-+ masm.movePtr(FramePointer, scratch);
-+ masm.subPtr(StackPointer, scratch);
-+
-+ // Store frame size without VMFunction arguments for debug assertions.
-+ masm.subPtr(Imm32(argSize), scratch);
-+ Address frameSizeAddr(FramePointer,
-+ BaselineFrame::reverseOffsetOfDebugFrameSize());
-+ masm.store32(scratch, frameSizeAddr);
-+ masm.addPtr(Imm32(argSize), scratch);
-+#endif
-+
-+ // Push frame descriptor and perform the tail call.
-+ masm.push(FrameDescriptor(FrameType::BaselineJS));
-+
-+ // The return address is in LR (set by the original bl/bctrl call).
-+ // The VMWrapper code will push it via pushReturnAddress().
-+
-+ masm.jump(target);
-+}
-+
-+inline void EmitBaselineCallVM(TrampolinePtr target, MacroAssembler& masm) {
-+ masm.push(FrameDescriptor(FrameType::BaselineStub));
-+ masm.call(target);
-+}
-+
-+inline void EmitBaselineEnterStubFrame(MacroAssembler& masm, Register scratch) {
-+ MOZ_ASSERT(scratch != ICTailCallReg);
-+
-+#ifdef DEBUG
-+ // Compute frame size.
-+ masm.movePtr(FramePointer, scratch);
-+ masm.subPtr(StackPointer, scratch);
-+
-+ Address frameSizeAddr(FramePointer,
-+ BaselineFrame::reverseOffsetOfDebugFrameSize());
-+ masm.store32(scratch, frameSizeAddr);
-+#endif
-+
-+ // Note: when making changes here, don't forget to update
-+ // BaselineStubFrame if needed.
-+
-+ // Push frame descriptor and return address.
-+ // LR holds the return address; read it into ICTailCallReg to push.
-+ masm.Push(FrameDescriptor(FrameType::BaselineJS));
-+ masm.xs_mflr(ICTailCallReg);
-+ masm.Push(ICTailCallReg);
-+
-+ // Save old frame pointer, stack pointer and stub reg.
-+ masm.Push(FramePointer);
-+ masm.movePtr(StackPointer, FramePointer);
-+ masm.Push(ICStubReg);
-+
-+ // Stack should remain aligned.
-+ masm.assertStackAlignment(sizeof(Value), 0);
-+}
-+
-+} // namespace jit
-+} // namespace js
-+
-+#endif /* jit_ppc64_SharedICHelpers_ppc64_inl_h */
-diff --git a/js/src/jit/ppc64/SharedICHelpers-ppc64.h b/js/src/jit/ppc64/SharedICHelpers-ppc64.h
-new file mode 100644
-index 000000000000..31ba830d2609
---- /dev/null
-+++ b/js/src/jit/ppc64/SharedICHelpers-ppc64.h
-@@ -0,0 +1,97 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#ifndef jit_ppc64_SharedICHelpers_ppc64_h
-+#define jit_ppc64_SharedICHelpers_ppc64_h
-+
-+#include "jit/BaselineIC.h"
-+#include "jit/JitFrames.h"
-+#include "jit/MacroAssembler.h"
-+#include "jit/SharedICRegisters.h"
-+
-+namespace js {
-+namespace jit {
-+
-+// Distance from sp to the top Value inside an IC stub (no return address on
-+// the stack on PPC64).
-+static const size_t ICStackValueOffset = 0;
-+
-+struct BaselineStubFrame {
-+ uintptr_t savedFrame;
-+ uintptr_t savedStub;
-+ uintptr_t returnAddress;
-+ uintptr_t descriptor;
-+};
-+
-+inline void EmitRestoreTailCallReg(MacroAssembler& masm) {
-+ // On PPC64, LR always holds the return address after a bl/bctrl call.
-+ // No-op: LR is the hardware link register, not a GPR on the stack.
-+}
-+
-+inline void EmitRepushTailCallReg(MacroAssembler& masm) {
-+ // No-op: LR already holds the return address.
-+}
-+
-+inline void EmitCallIC(MacroAssembler& masm, CodeOffset* callOffset) {
-+ // The stub pointer must already be in ICStubReg.
-+ // Load stubcode pointer from the ICStub.
-+ // R2 won't be active when we call ICs, so we can use it as scratch.
-+ masm.loadPtr(Address(ICStubReg, ICStub::offsetOfStubCode()), R2.scratchReg());
-+
-+ // Call the stubcode. On PPC64 call(Register) emits mtctr + bctrl,
-+ // which sets LR to the address after bctrl.
-+ masm.call(R2.scratchReg());
-+ *callOffset = CodeOffset(masm.currentOffset());
-+}
-+
-+inline void EmitReturnFromIC(MacroAssembler& masm) {
-+ // Return via hardware LR (set by the original bl/bctrl call).
-+ masm.as_blr();
-+}
-+
-+inline void EmitBaselineLeaveStubFrame(MacroAssembler& masm) {
-+ masm.loadPtr(
-+ Address(FramePointer, BaselineStubFrameLayout::ICStubOffsetFromFP),
-+ ICStubReg);
-+
-+ masm.movePtr(FramePointer, StackPointer);
-+ masm.Pop(FramePointer);
-+
-+ // Load the return address and restore it to LR.
-+ masm.Pop(ICTailCallReg);
-+ masm.xs_mtlr(ICTailCallReg);
-+
-+ // Discard the frame descriptor.
-+ {
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.Pop(scratch);
-+ }
-+}
-+
-+template <typename AddrType>
-+inline void EmitPreBarrier(MacroAssembler& masm, const AddrType& addr,
-+ MIRType type) {
-+ // On PPC64, LR is clobbered by guardedCallPreBarrier. Save it first.
-+ masm.xs_mflr(r0);
-+ masm.push(r0);
-+ masm.guardedCallPreBarrier(addr, type);
-+ masm.pop(r0);
-+ masm.xs_mtlr(r0);
-+}
-+
-+inline void EmitStubGuardFailure(MacroAssembler& masm) {
-+ // Load next stub into ICStubReg.
-+ masm.loadPtr(Address(ICStubReg, ICCacheIRStub::offsetOfNext()), ICStubReg);
-+
-+ // Return address is in LR. Jump to the next stubcode.
-+ masm.jump(Address(ICStubReg, ICStub::offsetOfStubCode()));
-+}
-+
-+} // namespace jit
-+} // namespace js
-+
-+#endif /* jit_ppc64_SharedICHelpers_ppc64_h */
-diff --git a/js/src/jit/ppc64/SharedICRegisters-ppc64.h b/js/src/jit/ppc64/SharedICRegisters-ppc64.h
-new file mode 100644
-index 000000000000..ddf67342f855
---- /dev/null
-+++ b/js/src/jit/ppc64/SharedICRegisters-ppc64.h
-@@ -0,0 +1,46 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#ifndef jit_ppc64_SharedICRegisters_ppc64_h
-+#define jit_ppc64_SharedICRegisters_ppc64_h
-+
-+#include "jit/ppc64/Assembler-ppc64.h"
-+#include "jit/Registers.h"
-+#include "jit/RegisterSets.h"
-+
-+namespace js {
-+namespace jit {
-+
-+// ValueOperands R0, R1, and R2.
-+// R0 == JSReturnReg, and R2 uses registers not preserved across calls. R1 value
-+// should be preserved across calls.
-+static constexpr ValueOperand R0(r5);
-+static constexpr ValueOperand R1(r15);
-+static constexpr ValueOperand R2(r4);
-+
-+// ICTailCallReg and ICStubReg.
-+// On PPC64, LR is not a GPR, so ICTailCallReg must be a normal GPR.
-+// PPC64 ELFv2 has no volatile non-arg GPRs (r3-r10 are all arg regs), so
-+// using an arg register risks clobbering by ABI calls with enough arguments.
-+// We use callee-saved registers instead, matching MIPS64/RISC-V strategy.
-+// These are excluded from BaselineICAvailableGeneralRegs.
-+static constexpr Register ICTailCallReg = r27;
-+static constexpr Register ICStubReg = r26;
-+
-+// FloatReg0 must be equal to ReturnFloatReg.
-+static constexpr FloatRegister FloatReg0 = {FloatRegisters::f1,
-+ FloatRegisters::Double};
-+static constexpr FloatRegister FloatReg1 = {FloatRegisters::f2,
-+ FloatRegisters::Double};
-+static constexpr FloatRegister FloatReg2 = {FloatRegisters::f3,
-+ FloatRegisters::Double};
-+static constexpr FloatRegister FloatReg3 = {FloatRegisters::f4,
-+ FloatRegisters::Double};
-+
-+} // namespace jit
-+} // namespace js
-+
-+#endif /* jit_ppc64_SharedICRegisters_ppc64_h */
-diff --git a/js/src/jit/ppc64/Simulator-ppc64.cpp b/js/src/jit/ppc64/Simulator-ppc64.cpp
-new file mode 100644
-index 000000000000..8b29eb3add04
---- /dev/null
-+++ b/js/src/jit/ppc64/Simulator-ppc64.cpp
-@@ -0,0 +1,7296 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#include "jit/ppc64/Simulator-ppc64.h"
-+
-+#include <cinttypes>
-+#include <cmath>
-+#include <cstring>
-+#include <float.h>
-+#include <limits>
-+
-+#include "jit/AtomicOperations.h"
-+#include "jit/ppc64/Assembler-ppc64.h"
-+#include "js/Conversions.h"
-+#include "threading/LockGuard.h"
-+#include "vm/Float16.h"
-+#include "vm/JSContext.h"
-+#include "vm/Runtime.h"
-+#include "wasm/WasmInstance.h"
-+#include "wasm/WasmSignalHandlers.h"
-+
-+#define I8(v) static_cast<int8_t>(v)
-+#define I16(v) static_cast<int16_t>(v)
-+#define U16(v) static_cast<uint16_t>(v)
-+#define I32(v) static_cast<int32_t>(v)
-+#define U32(v) static_cast<uint32_t>(v)
-+#define I64(v) static_cast<int64_t>(v)
-+#define U64(v) static_cast<uint64_t>(v)
-+#define I128(v) static_cast<__int128_t>(v)
-+#define U128(v) static_cast<__uint128_t>(v)
-+
-+namespace js {
-+namespace jit {
-+
-+static int64_t MultiplyHighSigned(int64_t u, int64_t v) {
-+ uint64_t u0, v0, w0;
-+ int64_t u1, v1, w1, w2, t;
-+
-+ u0 = u & 0xFFFFFFFFL;
-+ u1 = u >> 32;
-+ v0 = v & 0xFFFFFFFFL;
-+ v1 = v >> 32;
-+
-+ w0 = u0 * v0;
-+ t = u1 * v0 + (w0 >> 32);
-+ w1 = t & 0xFFFFFFFFL;
-+ w2 = t >> 32;
-+ w1 = u0 * v1 + w1;
-+
-+ return u1 * v1 + w2 + (w1 >> 32);
-+}
-+
-+static uint64_t MultiplyHighUnsigned(uint64_t u, uint64_t v) {
-+ uint64_t u0, v0, w0;
-+ uint64_t u1, v1, w1, w2, t;
-+
-+ u0 = u & 0xFFFFFFFFL;
-+ u1 = u >> 32;
-+ v0 = v & 0xFFFFFFFFL;
-+ v1 = v >> 32;
-+
-+ w0 = u0 * v0;
-+ t = u1 * v0 + (w0 >> 32);
-+ w1 = t & 0xFFFFFFFFL;
-+ w2 = t >> 32;
-+ w1 = u0 * v1 + w1;
-+
-+ return u1 * v1 + w2 + (w1 >> 32);
-+}
-+
-+inline constexpr uint32_t RotateLeft32(uint32_t value, uint32_t shift) {
-+ return (value << shift) | (value >> ((32 - shift) & 31));
-+}
-+
-+inline constexpr uint64_t RotateLeft64(uint64_t value, uint64_t shift) {
-+ return (value << shift) | (value >> ((64 - shift) & 63));
-+}
-+
-+// Generate a 64-bit mask with bits mb..me set (PPC numbering: 0 = MSB = bit
-+// 63 in C). When mb <= me, a contiguous range is set; when mb > me, the
-+// mask wraps around (bits 0..me and mb..63 are set).
-+static inline uint64_t MASK64(unsigned mb, unsigned me) {
-+ MOZ_ASSERT(mb < 64 && me < 64);
-+ uint64_t mask_begin = ~0ULL >> mb;
-+ uint64_t mask_end = ~0ULL << (63 - me);
-+ if (mb <= me) {
-+ return mask_begin & mask_end;
-+ }
-+ return mask_begin | mask_end;
-+}
-+
-+static inline uint32_t MASK32(unsigned mb, unsigned me) {
-+ MOZ_ASSERT(mb < 32 && me < 32);
-+ uint32_t mask_begin = ~0U >> mb;
-+ uint32_t mask_end = ~0U << (31 - me);
-+ if (mb <= me) {
-+ return mask_begin & mask_end;
-+ }
-+ return mask_begin | mask_end;
-+}
-+
-+// Count leading zeros.
-+static inline int CountLeadingZeros64(uint64_t value) {
-+ if (value == 0) return 64;
-+ return __builtin_clzll(value);
-+}
-+
-+static inline int CountLeadingZeros32(uint32_t value) {
-+ if (value == 0) return 32;
-+ return __builtin_clz(value);
-+}
-+
-+static inline int CountTrailingZeros64(uint64_t value) {
-+ if (value == 0) return 64;
-+ return __builtin_ctzll(value);
-+}
-+
-+static inline int CountTrailingZeros32(uint32_t value) {
-+ if (value == 0) return 32;
-+ return __builtin_ctz(value);
-+}
-+
-+static inline int PopCount64(uint64_t value) {
-+ return __builtin_popcountll(value);
-+}
-+
-+static inline int PopCount32(uint32_t value) {
-+ return __builtin_popcount(value);
-+}
-+
-+static inline uint64_t PopCountPerByte(uint64_t value) {
-+ uint64_t result = 0;
-+ for (int i = 0; i < 8; i++) {
-+ uint8_t byte = (value >> (i * 8)) & 0xFF;
-+ result |= (uint64_t)__builtin_popcount(byte) << (i * 8);
-+ }
-+ return result;
-+}
-+
-+// PPC64 C argument slots: PPC64 ELFv2 ABI does not require C argument
-+// slots on the stack for register-passed arguments, but we reserve the
-+// link area (32 bytes).
-+const int kCArgSlotCount = 0;
-+const int kCArgsSlotsSize = kCArgSlotCount * sizeof(uintptr_t);
-+
-+// -----------------------------------------------------------------------------
-+// PPC64 SimInstruction.
-+
-+class SimInstruction {
-+ public:
-+ enum {
-+ kInstrSize = 4,
-+ kPCReadOffset = 0
-+ };
-+
-+ inline Instr instructionBits() const {
-+ return *reinterpret_cast<const Instr*>(this);
-+ }
-+
-+ inline void setInstructionBits(Instr value) {
-+ *reinterpret_cast<Instr*>(this) = value;
-+ }
-+
-+ inline int bit(int nr) const { return (instructionBits() >> nr) & 1; }
-+
-+ inline uint32_t bits(int hi, int lo) const {
-+ return (instructionBits() >> lo) & ((2U << (hi - lo)) - 1);
-+ }
-+
-+ inline uint32_t opcode() const { return bits(31, 26); }
-+
-+ inline uint32_t rtValue() const { return bits(25, 21); }
-+ inline uint32_t rsValue() const { return bits(25, 21); }
-+ inline uint32_t raValue() const { return bits(20, 16); }
-+ inline uint32_t rbValue() const { return bits(15, 11); }
-+ inline uint32_t rcValue() const { return bits(10, 6); }
-+
-+ inline uint32_t boValue() const { return bits(25, 21); }
-+ inline uint32_t biValue() const { return bits(20, 16); }
-+
-+ // D-form 16-bit immediate (sign-extend to get signed value).
-+ inline int16_t imm16Value() const { return I16(bits(15, 0)); }
-+ inline uint16_t uimm16Value() const { return U16(bits(15, 0)); }
-+
-+ // DS-form 14-bit displacement (bits 2..15, 4-byte aligned).
-+ inline int16_t ds14Value() const {
-+ return I16(bits(15, 2) << 2);
-+ }
-+
-+ // B-form 14-bit branch displacement (bits 2..15, 4-byte aligned).
-+ inline int32_t bd16Value() const {
-+ int16_t raw = I16(bits(15, 2) << 2);
-+ return (int32_t)raw;
-+ }
-+
-+ // I-form 24-bit branch offset (bits 2..25, sign-extended, 4-byte aligned).
-+ inline int32_t li26Value() const {
-+ int32_t raw = I32(bits(25, 2) << 2);
-+ // Sign-extend from 26 bits.
-+ return (raw << 6) >> 6;
-+ }
-+
-+ // Extended opcode for X-form / XO-form (bits 1..10).
-+ inline uint32_t xoValue() const { return bits(10, 1); }
-+
-+ // Extended opcode for XL-form (bits 1..10).
-+ inline uint32_t xlValue() const { return bits(10, 1); }
-+
-+ // MD-form SH field: sh[0:4] in instruction bits 15:11, sh[5] in bit 1.
-+ // Assembler encodes: ((sh & 0x1f) << 11) | ((sh & 0x20) >> 4).
-+ inline uint32_t mdSHValue() const {
-+ return bits(15, 11) | (bit(1) << 5);
-+ }
-+ // mb/me for MD-form (rldicl/rldicr/rldic/rldimi): 6-bit field split as
-+ // mb[0:4] in instruction bits 10:6 and mb[5] in bit 5.
-+ inline uint32_t mdMBValue() const {
-+ return bits(10, 6) | (bit(5) << 5);
-+ }
-+ inline uint32_t mdMEValue() const { return mdMBValue(); }
-+
-+ // MD-form XO (bits 2..4).
-+ inline uint32_t mdXOValue() const { return bits(4, 2); }
-+
-+ // MDS-form (rldcl, rldcr): mb[0:4] in bits 10:6, mb[5] in bit 5.
-+ inline uint32_t mdsMBValue() const {
-+ return bits(10, 6) | (bit(5) << 5);
-+ }
-+
-+ // M-form fields (32-bit rotate/mask).
-+ inline uint32_t mSHValue() const { return bits(15, 11); }
-+ inline uint32_t mMBValue() const { return bits(10, 6); }
-+ inline uint32_t mMEValue() const { return bits(5, 1); }
-+
-+ // Rc bit.
-+ inline bool rcBit() const { return bit(0); }
-+
-+ // AA bit for branch instructions.
-+ inline bool aaBit() const { return bit(1); }
-+
-+ // LK bit for branch instructions.
-+ inline bool lkBit() const { return bit(0); }
-+
-+ // OE bit for XO-form arithmetic.
-+ inline bool oeBit() const { return bit(10); }
-+
-+ // L bit for compare instructions (bit 21).
-+ inline bool lBit() const { return bit(21); }
-+
-+ // BF field (bits 23..25) for compares.
-+ inline uint32_t bfValue() const { return bits(25, 23); }
-+
-+ bool isTrap() const {
-+ uint32_t instr = instructionBits();
-+ // PPC_trap = 0x7FE00008 (tw 31,0,0).
-+ // Don't treat the call-redirection instruction or wasm trap as a
-+ // debugger trap.
-+ if (instr == kCallRedirInstr) return false;
-+ if (instr == 0x7FE00008) return false;
-+ // Any other tw instruction with TO=31 is a trap.
-+ if (opcode() == 31 && (xoValue() == 4)) return true;
-+ return false;
-+ }
-+
-+ private:
-+ SimInstruction() = delete;
-+ SimInstruction(const SimInstruction& other) = delete;
-+ void operator=(const SimInstruction& other) = delete;
-+};
-+
-+// -----------------------------------------------------------------------------
-+// ICache.
-+
-+class CachePage {
-+ public:
-+ static const int LINE_VALID = 0;
-+ static const int LINE_INVALID = 1;
-+
-+ static const int kPageShift = 12;
-+ static const int kPageSize = 1 << kPageShift;
-+ static const int kPageMask = kPageSize - 1;
-+ static const int kLineShift = 2;
-+ static const int kLineLength = 1 << kLineShift;
-+ static const int kLineMask = kLineLength - 1;
-+
-+ CachePage() { memset(&validity_map_, LINE_INVALID, sizeof(validity_map_)); }
-+
-+ char* validityByte(int offset) {
-+ return &validity_map_[offset >> kLineShift];
-+ }
-+
-+ char* cachedData(int offset) { return &data_[offset]; }
-+
-+ private:
-+ char data_[kPageSize];
-+ static const int kValidityMapSize = kPageSize >> kLineShift;
-+ char validity_map_[kValidityMapSize];
-+};
-+
-+class AutoLockSimulatorCache : public LockGuard<Mutex> {
-+ using Base = LockGuard<Mutex>;
-+
-+ public:
-+ explicit AutoLockSimulatorCache()
-+ : Base(SimulatorProcess::singleton_->cacheLock_) {}
-+};
-+
-+mozilla::Atomic<size_t, mozilla::ReleaseAcquire>
-+ SimulatorProcess::ICacheCheckingDisableCount(1);
-+SimulatorProcess* SimulatorProcess::singleton_ = nullptr;
-+
-+int64_t Simulator::StopSimAt = -1;
-+
-+// -----------------------------------------------------------------------------
-+// Simulator Create / Destroy.
-+
-+Simulator* Simulator::Create() {
-+ auto sim = MakeUnique<Simulator>();
-+ if (!sim) {
-+ return nullptr;
-+ }
-+
-+ if (!sim->init()) {
-+ return nullptr;
-+ }
-+
-+ int64_t stopAt;
-+ char* stopAtStr = getenv("PPC64_SIM_STOP_AT");
-+ if (stopAtStr && sscanf(stopAtStr, "%" PRIi64, &stopAt) == 1) {
-+ fprintf(stderr, "\nStopping simulation at icount %" PRIi64 "\n", stopAt);
-+ Simulator::StopSimAt = stopAt;
-+ }
-+
-+ return sim.release();
-+}
-+
-+void Simulator::Destroy(Simulator* sim) { js_delete(sim); }
-+
-+// -----------------------------------------------------------------------------
-+// Debugger.
-+
-+class ppc64Debugger {
-+ public:
-+ explicit ppc64Debugger(Simulator* sim) : sim_(sim) {}
-+
-+ void stop(SimInstruction* instr);
-+ void debug();
-+ void printAllRegs();
-+ void printAllRegsIncludingFPU();
-+
-+ private:
-+ static const Instr kBreakpointInstr = 0x7FE00008; // PPC_trap
-+ static const Instr kNopInstr = 0x60000000; // PPC_nop
-+
-+ Simulator* sim_;
-+
-+ int64_t getRegisterValue(int regnum);
-+ int64_t getFPURegisterValueLong(int regnum);
-+ float getFPURegisterValueFloat(int regnum);
-+ double getFPURegisterValueDouble(int regnum);
-+ bool getValue(const char* desc, int64_t* value);
-+
-+ bool setBreakpoint(SimInstruction* breakpc);
-+ bool deleteBreakpoint(SimInstruction* breakpc);
-+
-+ void undoBreakpoints();
-+ void redoBreakpoints();
-+};
-+
-+[[maybe_unused]] static void UNIMPLEMENTED() {
-+ printf("UNIMPLEMENTED instruction.\n");
-+ MOZ_CRASH();
-+}
-+[[maybe_unused]] static void UNREACHABLE() {
-+ printf("UNREACHABLE instruction.\n");
-+ MOZ_CRASH();
-+}
-+[[maybe_unused]] static void UNSUPPORTED() {
-+ printf("Unsupported instruction.\n");
-+ MOZ_CRASH();
-+}
-+
-+void ppc64Debugger::stop(SimInstruction* instr) {
-+ uint32_t code = 0;
-+ char* msg = *reinterpret_cast<char**>(sim_->get_pc() +
-+ SimInstruction::kInstrSize);
-+ if (!sim_->watchedStops_[code].desc_) {
-+ sim_->watchedStops_[code].desc_ = msg;
-+ }
-+ if (code != kMaxStopCode) {
-+ printf("Simulator hit stop %u: %s\n", code, msg);
-+ } else {
-+ printf("Simulator hit %s\n", msg);
-+ }
-+ sim_->set_pc(sim_->get_pc() + 2 * SimInstruction::kInstrSize);
-+ debug();
-+}
-+
-+int64_t ppc64Debugger::getRegisterValue(int regnum) {
-+ if (regnum == kPCRegister) {
-+ return sim_->get_pc();
-+ }
-+ return sim_->getRegister(regnum);
-+}
-+
-+int64_t ppc64Debugger::getFPURegisterValueLong(int regnum) {
-+ return sim_->getFpuRegister(regnum);
-+}
-+
-+float ppc64Debugger::getFPURegisterValueFloat(int regnum) {
-+ return sim_->getFpuRegisterFloat(regnum);
-+}
-+
-+double ppc64Debugger::getFPURegisterValueDouble(int regnum) {
-+ return sim_->getFpuRegisterDouble(regnum);
-+}
-+
-+bool ppc64Debugger::getValue(const char* desc, int64_t* value) {
-+ Register reg = Register::FromName(desc);
-+ if (reg != InvalidReg) {
-+ *value = getRegisterValue(reg.code());
-+ return true;
-+ }
-+
-+ if (strncmp(desc, "0x", 2) == 0) {
-+ return sscanf(desc + 2, "%" PRIx64, reinterpret_cast<uint64_t*>(value)) ==
-+ 1;
-+ }
-+ return sscanf(desc, "%" PRIu64, reinterpret_cast<uint64_t*>(value)) == 1;
-+}
-+
-+bool ppc64Debugger::setBreakpoint(SimInstruction* breakpc) {
-+ if (sim_->break_pc_ != nullptr) {
-+ return false;
-+ }
-+
-+ sim_->break_pc_ = breakpc;
-+ sim_->break_instr_ = breakpc->instructionBits();
-+ return true;
-+}
-+
-+bool ppc64Debugger::deleteBreakpoint(SimInstruction* breakpc) {
-+ if (sim_->break_pc_ != nullptr) {
-+ sim_->break_pc_->setInstructionBits(sim_->break_instr_);
-+ }
-+
-+ sim_->break_pc_ = nullptr;
-+ sim_->break_instr_ = 0;
-+ return true;
-+}
-+
-+void ppc64Debugger::undoBreakpoints() {
-+ if (sim_->break_pc_) {
-+ sim_->break_pc_->setInstructionBits(sim_->break_instr_);
-+ }
-+}
-+
-+void ppc64Debugger::redoBreakpoints() {
-+ if (sim_->break_pc_) {
-+ sim_->break_pc_->setInstructionBits(kBreakpointInstr);
-+ }
-+}
-+
-+void ppc64Debugger::printAllRegs() {
-+ int64_t value;
-+ for (uint32_t i = 0; i < Registers::Total; i++) {
-+ value = getRegisterValue(i);
-+ printf("%3s: 0x%016" PRIx64 " %20" PRIi64 " ", Registers::GetName(i),
-+ value, value);
-+
-+ if (i % 2) {
-+ printf("\n");
-+ }
-+ }
-+ printf("\n");
-+
-+ value = getRegisterValue(Simulator::pc);
-+ printf(" pc: 0x%016" PRIx64 "\n", value);
-+ printf(" lr: 0x%016" PRIx64 "\n", sim_->getLR());
-+ printf(" ctr: 0x%016" PRIx64 "\n", sim_->getCTR());
-+ printf(" cr: 0x%08x\n", sim_->getCR());
-+ printf(" xer: 0x%016" PRIx64 "\n", sim_->getXER());
-+}
-+
-+void ppc64Debugger::printAllRegsIncludingFPU() {
-+ printAllRegs();
-+
-+ printf("\n\n");
-+ for (uint32_t i = 0; i < FloatRegisters::TotalPhys; i++) {
-+ printf("%3s: 0x%016" PRIx64 "\tflt: %-8.4g\tdbl: %-16.4g\n",
-+ FloatRegisters::GetName(i), getFPURegisterValueLong(i),
-+ getFPURegisterValueFloat(i), getFPURegisterValueDouble(i));
-+ }
-+}
-+
-+static char* ReadLine(const char* prompt) {
-+ UniqueChars result;
-+ char lineBuf[256];
-+ int offset = 0;
-+ bool keepGoing = true;
-+ fprintf(stdout, "%s", prompt);
-+ fflush(stdout);
-+ while (keepGoing) {
-+ if (fgets(lineBuf, sizeof(lineBuf), stdin) == nullptr) {
-+ return nullptr;
-+ }
-+ int len = strlen(lineBuf);
-+ if (len > 0 && lineBuf[len - 1] == '\n') {
-+ keepGoing = false;
-+ }
-+ if (!result) {
-+ result.reset(js_pod_malloc<char>(len + 1));
-+ if (!result) {
-+ return nullptr;
-+ }
-+ } else {
-+ int new_len = offset + len + 1;
-+ char* new_result = js_pod_malloc<char>(new_len);
-+ if (!new_result) {
-+ return nullptr;
-+ }
-+ memcpy(new_result, result.get(), offset * sizeof(char));
-+ result.reset(new_result);
-+ }
-+ memcpy(result.get() + offset, lineBuf, len * sizeof(char));
-+ offset += len;
-+ }
-+
-+ MOZ_ASSERT(result);
-+ result[offset] = '\0';
-+ return result.release();
-+}
-+
-+static void DisassembleInstruction(uint64_t pc) {
-+ printf(" 0x%016" PRIx64 ": %08x\n", pc,
-+ *reinterpret_cast<uint32_t*>(pc));
-+}
-+
-+void ppc64Debugger::debug() {
-+ intptr_t lastPC = -1;
-+ bool done = false;
-+
-+#define COMMAND_SIZE 63
-+#define ARG_SIZE 255
-+
-+#define STR(a) #a
-+#define XSTR(a) STR(a)
-+
-+ char cmd[COMMAND_SIZE + 1];
-+ char arg1[ARG_SIZE + 1];
-+ char arg2[ARG_SIZE + 1];
-+ char* argv[3] = {cmd, arg1, arg2};
-+
-+ cmd[COMMAND_SIZE] = 0;
-+ arg1[ARG_SIZE] = 0;
-+ arg2[ARG_SIZE] = 0;
-+
-+ undoBreakpoints();
-+
-+ while (!done && (sim_->get_pc() != Simulator::end_sim_pc)) {
-+ if (lastPC != sim_->get_pc()) {
-+ DisassembleInstruction(sim_->get_pc());
-+ lastPC = sim_->get_pc();
-+ }
-+ char* line = ReadLine("sim> ");
-+ if (line == nullptr) {
-+ break;
-+ } else {
-+ char* last_input = sim_->lastDebuggerInput();
-+ if (strcmp(line, "\n") == 0 && last_input != nullptr) {
-+ line = last_input;
-+ } else {
-+ sim_->setLastDebuggerInput(line);
-+ }
-+ int argc = sscanf(line,
-+ "%" XSTR(COMMAND_SIZE) "s "
-+ "%" XSTR(ARG_SIZE) "s "
-+ "%" XSTR(ARG_SIZE) "s",
-+ cmd, arg1, arg2);
-+ if ((strcmp(cmd, "si") == 0) || (strcmp(cmd, "stepi") == 0)) {
-+ SimInstruction* instr =
-+ reinterpret_cast<SimInstruction*>(sim_->get_pc());
-+ if (!instr->isTrap()) {
-+ sim_->instructionDecode(instr);
-+ } else {
-+ printf("/!\\ Jumping over generated breakpoint.\n");
-+ sim_->set_pc(sim_->get_pc() + SimInstruction::kInstrSize);
-+ }
-+ sim_->icount_++;
-+ } else if ((strcmp(cmd, "c") == 0) || (strcmp(cmd, "cont") == 0)) {
-+ sim_->instructionDecode(
-+ reinterpret_cast<SimInstruction*>(sim_->get_pc()));
-+ sim_->icount_++;
-+ done = true;
-+ } else if ((strcmp(cmd, "p") == 0) || (strcmp(cmd, "print") == 0)) {
-+ if (argc == 2) {
-+ int64_t value;
-+ if (strcmp(arg1, "all") == 0) {
-+ printAllRegs();
-+ } else if (strcmp(arg1, "allf") == 0) {
-+ printAllRegsIncludingFPU();
-+ } else {
-+ Register reg = Register::FromName(arg1);
-+ FloatRegisters::Code fReg = FloatRegisters::FromName(arg1);
-+ if (reg != InvalidReg) {
-+ value = getRegisterValue(reg.code());
-+ printf("%s: 0x%016" PRIx64 " %20" PRIi64 " \n", arg1, value,
-+ value);
-+ } else if (fReg != FloatRegisters::Invalid) {
-+ printf("%3s: 0x%016" PRIx64 "\tflt: %-8.4g\tdbl: %-16.4g\n",
-+ FloatRegisters::GetName(fReg),
-+ getFPURegisterValueLong(fReg),
-+ getFPURegisterValueFloat(fReg),
-+ getFPURegisterValueDouble(fReg));
-+ } else {
-+ printf("%s unrecognized\n", arg1);
-+ }
-+ }
-+ } else {
-+ printf("print <register> or print <fpu register> single\n");
-+ }
-+ } else if (strcmp(cmd, "stack") == 0 || strcmp(cmd, "mem") == 0) {
-+ int64_t* cur = nullptr;
-+ int64_t* end = nullptr;
-+ int next_arg = 1;
-+
-+ if (strcmp(cmd, "stack") == 0) {
-+ cur = reinterpret_cast<int64_t*>(sim_->getRegister(Simulator::sp));
-+ } else {
-+ int64_t value;
-+ if (!getValue(arg1, &value)) {
-+ printf("%s unrecognized\n", arg1);
-+ continue;
-+ }
-+ cur = reinterpret_cast<int64_t*>(value);
-+ next_arg++;
-+ }
-+
-+ int64_t words;
-+ if (argc == next_arg) {
-+ words = 10;
-+ } else {
-+ if (!getValue(argv[next_arg], &words)) {
-+ words = 10;
-+ }
-+ }
-+ end = cur + words;
-+
-+ while (cur < end) {
-+ printf(" %p: 0x%016" PRIx64 " %20" PRIi64, cur, *cur, *cur);
-+ printf("\n");
-+ cur++;
-+ }
-+
-+ } else if ((strcmp(cmd, "disasm") == 0) || (strcmp(cmd, "dpc") == 0) ||
-+ (strcmp(cmd, "di") == 0)) {
-+ uint8_t* cur = nullptr;
-+ uint8_t* end = nullptr;
-+
-+ if (argc == 1) {
-+ cur = reinterpret_cast<uint8_t*>(sim_->get_pc());
-+ end = cur + (10 * SimInstruction::kInstrSize);
-+ } else if (argc == 2) {
-+ Register reg = Register::FromName(arg1);
-+ if (reg != InvalidReg || strncmp(arg1, "0x", 2) == 0) {
-+ int64_t value;
-+ if (getValue(arg1, &value)) {
-+ cur = reinterpret_cast<uint8_t*>(value);
-+ end = cur + (10 * SimInstruction::kInstrSize);
-+ }
-+ } else {
-+ int64_t value;
-+ if (getValue(arg1, &value)) {
-+ cur = reinterpret_cast<uint8_t*>(sim_->get_pc());
-+ end = cur + (value * SimInstruction::kInstrSize);
-+ }
-+ }
-+ } else {
-+ int64_t value1;
-+ int64_t value2;
-+ if (getValue(arg1, &value1) && getValue(arg2, &value2)) {
-+ cur = reinterpret_cast<uint8_t*>(value1);
-+ end = cur + (value2 * SimInstruction::kInstrSize);
-+ }
-+ }
-+
-+ while (cur < end) {
-+ DisassembleInstruction(uint64_t(cur));
-+ cur += SimInstruction::kInstrSize;
-+ }
-+ } else if (strcmp(cmd, "gdb") == 0) {
-+ printf("relinquishing control to gdb\n");
-+#if defined(__x86_64__)
-+ asm("int $3");
-+#elif defined(__aarch64__)
-+ asm("brk #0xf000");
-+#endif
-+ printf("regaining control from gdb\n");
-+ } else if (strcmp(cmd, "break") == 0) {
-+ if (argc == 2) {
-+ int64_t value;
-+ if (getValue(arg1, &value)) {
-+ if (!setBreakpoint(reinterpret_cast<SimInstruction*>(value))) {
-+ printf("setting breakpoint failed\n");
-+ }
-+ } else {
-+ printf("%s unrecognized\n", arg1);
-+ }
-+ } else {
-+ printf("break <address>\n");
-+ }
-+ } else if (strcmp(cmd, "del") == 0) {
-+ if (!deleteBreakpoint(nullptr)) {
-+ printf("deleting breakpoint failed\n");
-+ }
-+ } else if (strcmp(cmd, "flags") == 0) {
-+ printf("CR: 0x%08x XER: 0x%016" PRIx64 "\n", sim_->getCR(),
-+ sim_->getXER());
-+ } else if (strcmp(cmd, "stop") == 0) {
-+ int64_t value;
-+ intptr_t stop_pc = sim_->get_pc() - 2 * SimInstruction::kInstrSize;
-+ SimInstruction* stop_instr =
-+ reinterpret_cast<SimInstruction*>(stop_pc);
-+ SimInstruction* msg_address = reinterpret_cast<SimInstruction*>(
-+ stop_pc + SimInstruction::kInstrSize);
-+ if ((argc == 2) && (strcmp(arg1, "unstop") == 0)) {
-+ if (sim_->isStopInstruction(stop_instr)) {
-+ stop_instr->setInstructionBits(kNopInstr);
-+ msg_address->setInstructionBits(kNopInstr);
-+ } else {
-+ printf("Not at debugger stop.\n");
-+ }
-+ } else if (argc == 3) {
-+ if (strcmp(arg1, "info") == 0) {
-+ if (strcmp(arg2, "all") == 0) {
-+ printf("Stop information:\n");
-+ for (uint32_t i = kMaxWatchpointCode + 1; i <= kMaxStopCode;
-+ i++) {
-+ sim_->printStopInfo(i);
-+ }
-+ } else if (getValue(arg2, &value)) {
-+ sim_->printStopInfo(value);
-+ } else {
-+ printf("Unrecognized argument.\n");
-+ }
-+ } else if (strcmp(arg1, "enable") == 0) {
-+ if (strcmp(arg2, "all") == 0) {
-+ for (uint32_t i = kMaxWatchpointCode + 1; i <= kMaxStopCode;
-+ i++) {
-+ sim_->enableStop(i);
-+ }
-+ } else if (getValue(arg2, &value)) {
-+ sim_->enableStop(value);
-+ } else {
-+ printf("Unrecognized argument.\n");
-+ }
-+ } else if (strcmp(arg1, "disable") == 0) {
-+ if (strcmp(arg2, "all") == 0) {
-+ for (uint32_t i = kMaxWatchpointCode + 1; i <= kMaxStopCode;
-+ i++) {
-+ sim_->disableStop(i);
-+ }
-+ } else if (getValue(arg2, &value)) {
-+ sim_->disableStop(value);
-+ } else {
-+ printf("Unrecognized argument.\n");
-+ }
-+ }
-+ } else {
-+ printf("Wrong usage. Use help command for more information.\n");
-+ }
-+ } else if ((strcmp(cmd, "h") == 0) || (strcmp(cmd, "help") == 0)) {
-+ printf("cont\n");
-+ printf(" continue execution (alias 'c')\n");
-+ printf("stepi\n");
-+ printf(" step one instruction (alias 'si')\n");
-+ printf("print <register>\n");
-+ printf(" print register content (alias 'p')\n");
-+ printf(" use register name 'all' to print all registers\n");
-+ printf("stack [<words>]\n");
-+ printf(" dump stack content, default dump 10 words)\n");
-+ printf("mem <address> [<words>]\n");
-+ printf(" dump memory content, default dump 10 words)\n");
-+ printf("flags\n");
-+ printf(" print CR and XER\n");
-+ printf("disasm [<instructions>]\n");
-+ printf("disasm [<address/register>]\n");
-+ printf("disasm [[<address/register>] <instructions>]\n");
-+ printf(" disassemble code, default is 10 instructions\n");
-+ printf(" from pc (alias 'di')\n");
-+ printf("gdb\n");
-+ printf(" enter gdb\n");
-+ printf("break <address>\n");
-+ printf(" set a break point on the address\n");
-+ printf("del\n");
-+ printf(" delete the breakpoint\n");
-+ } else {
-+ printf("Unknown command: %s\n", cmd);
-+ }
-+ }
-+ }
-+
-+ redoBreakpoints();
-+
-+#undef COMMAND_SIZE
-+#undef ARG_SIZE
-+
-+#undef STR
-+#undef XSTR
-+}
-+
-+// -----------------------------------------------------------------------------
-+// ICache helpers.
-+
-+static bool AllOnOnePage(uintptr_t start, int size) {
-+ intptr_t start_page = (start & ~CachePage::kPageMask);
-+ intptr_t end_page = ((start + size) & ~CachePage::kPageMask);
-+ return start_page == end_page;
-+}
-+
-+void Simulator::setLastDebuggerInput(char* input) {
-+ js_free(lastDebuggerInput_);
-+ lastDebuggerInput_ = input;
-+}
-+
-+static CachePage* GetCachePageLocked(SimulatorProcess::ICacheMap& i_cache,
-+ void* page) {
-+ SimulatorProcess::ICacheMap::AddPtr p = i_cache.lookupForAdd(page);
-+ if (p) {
-+ return p->value();
-+ }
-+ AutoEnterOOMUnsafeRegion oomUnsafe;
-+ CachePage* new_page = js_new<CachePage>();
-+ if (!new_page || !i_cache.add(p, page, new_page)) {
-+ oomUnsafe.crash("Simulator CachePage");
-+ }
-+ return new_page;
-+}
-+
-+static void FlushOnePageLocked(SimulatorProcess::ICacheMap& i_cache,
-+ intptr_t start, int size) {
-+ MOZ_ASSERT(size <= CachePage::kPageSize);
-+ MOZ_ASSERT(AllOnOnePage(start, size - 1));
-+ MOZ_ASSERT((start & CachePage::kLineMask) == 0);
-+ MOZ_ASSERT((size & CachePage::kLineMask) == 0);
-+ void* page = reinterpret_cast<void*>(start & (~CachePage::kPageMask));
-+ int offset = (start & CachePage::kPageMask);
-+ CachePage* cache_page = GetCachePageLocked(i_cache, page);
-+ char* valid_bytemap = cache_page->validityByte(offset);
-+ memset(valid_bytemap, CachePage::LINE_INVALID, size >> CachePage::kLineShift);
-+}
-+
-+static void FlushICacheLocked(SimulatorProcess::ICacheMap& i_cache,
-+ void* start_addr, size_t size) {
-+ intptr_t start = reinterpret_cast<intptr_t>(start_addr);
-+ int intra_line = (start & CachePage::kLineMask);
-+ start -= intra_line;
-+ size += intra_line;
-+ size = ((size - 1) | CachePage::kLineMask) + 1;
-+ int offset = (start & CachePage::kPageMask);
-+ while (!AllOnOnePage(start, size - 1)) {
-+ int bytes_to_flush = CachePage::kPageSize - offset;
-+ FlushOnePageLocked(i_cache, start, bytes_to_flush);
-+ start += bytes_to_flush;
-+ size -= bytes_to_flush;
-+ MOZ_ASSERT((start & CachePage::kPageMask) == 0);
-+ offset = 0;
-+ }
-+ if (size != 0) {
-+ FlushOnePageLocked(i_cache, start, size);
-+ }
-+}
-+
-+/* static */
-+void SimulatorProcess::checkICacheLocked(SimInstruction* instr) {
-+ intptr_t address = reinterpret_cast<intptr_t>(instr);
-+ void* page = reinterpret_cast<void*>(address & (~CachePage::kPageMask));
-+ void* line = reinterpret_cast<void*>(address & (~CachePage::kLineMask));
-+ int offset = (address & CachePage::kPageMask);
-+ CachePage* cache_page = GetCachePageLocked(icache(), page);
-+ char* cache_valid_byte = cache_page->validityByte(offset);
-+ bool cache_hit = (*cache_valid_byte == CachePage::LINE_VALID);
-+ char* cached_line = cache_page->cachedData(offset & ~CachePage::kLineMask);
-+
-+ if (cache_hit) {
-+ mozilla::DebugOnly<int> cmpret =
-+ memcmp(reinterpret_cast<void*>(instr), cache_page->cachedData(offset),
-+ SimInstruction::kInstrSize);
-+ MOZ_ASSERT(cmpret == 0);
-+ } else {
-+ memcpy(cached_line, line, CachePage::kLineLength);
-+ *cache_valid_byte = CachePage::LINE_VALID;
-+ }
-+}
-+
-+HashNumber SimulatorProcess::ICacheHasher::hash(const Lookup& l) {
-+ return U32(reinterpret_cast<uintptr_t>(l)) >> 2;
-+}
-+
-+bool SimulatorProcess::ICacheHasher::match(const Key& k, const Lookup& l) {
-+ MOZ_ASSERT((reinterpret_cast<intptr_t>(k) & CachePage::kPageMask) == 0);
-+ MOZ_ASSERT((reinterpret_cast<intptr_t>(l) & CachePage::kPageMask) == 0);
-+ return k == l;
-+}
-+
-+/* static */
-+void SimulatorProcess::FlushICache(void* start_addr, size_t size) {
-+ if (!ICacheCheckingDisableCount) {
-+ AutoLockSimulatorCache als;
-+ js::jit::FlushICacheLocked(icache(), start_addr, size);
-+ }
-+}
-+
-+// -----------------------------------------------------------------------------
-+// Redirection.
-+
-+class Redirection {
-+ friend class SimulatorProcess;
-+
-+ Redirection(void* nativeFunction, ABIFunctionType type)
-+ : nativeFunction_(nativeFunction),
-+ swiInstruction_(kCallRedirInstr),
-+ type_(type),
-+ next_(nullptr) {
-+ next_ = SimulatorProcess::redirection();
-+ if (!SimulatorProcess::ICacheCheckingDisableCount) {
-+ FlushICacheLocked(SimulatorProcess::icache(), addressOfSwiInstruction(),
-+ SimInstruction::kInstrSize);
-+ }
-+ SimulatorProcess::setRedirection(this);
-+ }
-+
-+ public:
-+ void* addressOfSwiInstruction() { return &swiInstruction_; }
-+ void* nativeFunction() const { return nativeFunction_; }
-+ ABIFunctionType type() const { return type_; }
-+
-+ static Redirection* Get(void* nativeFunction, ABIFunctionType type) {
-+ AutoLockSimulatorCache als;
-+
-+ Redirection* current = SimulatorProcess::redirection();
-+ for (; current != nullptr; current = current->next_) {
-+ if (current->nativeFunction_ == nativeFunction) {
-+ MOZ_ASSERT(current->type() == type);
-+ return current;
-+ }
-+ }
-+
-+ AutoEnterOOMUnsafeRegion oomUnsafe;
-+ Redirection* redir = js_pod_malloc<Redirection>(1);
-+ if (!redir) {
-+ oomUnsafe.crash("Simulator redirection");
-+ }
-+ new (redir) Redirection(nativeFunction, type);
-+ return redir;
-+ }
-+
-+ static Redirection* FromSwiInstruction(SimInstruction* swiInstruction) {
-+ uint8_t* addrOfSwi = reinterpret_cast<uint8_t*>(swiInstruction);
-+ uint8_t* addrOfRedirection =
-+ addrOfSwi - offsetof(Redirection, swiInstruction_);
-+ return reinterpret_cast<Redirection*>(addrOfRedirection);
-+ }
-+
-+ private:
-+ void* nativeFunction_;
-+ uint32_t swiInstruction_;
-+ ABIFunctionType type_;
-+ Redirection* next_;
-+};
-+
-+// -----------------------------------------------------------------------------
-+// Simulator constructor / destructor / init.
-+
-+Simulator::Simulator() {
-+ stack_ = nullptr;
-+ stackLimit_ = 0;
-+ pc_modified_ = false;
-+ icount_ = 0;
-+ break_count_ = 0;
-+ break_pc_ = nullptr;
-+ break_instr_ = 0;
-+ single_stepping_ = false;
-+ single_step_callback_ = nullptr;
-+ single_step_callback_arg_ = nullptr;
-+
-+ for (int i = 0; i < Register::kNumSimuRegisters; i++) {
-+ registers_[i] = 0;
-+ }
-+ for (int i = 0; i < Simulator::FPURegister::kNumFPURegisters; i++) {
-+ FPUregisters_[i] = 0;
-+ }
-+
-+ LR_ = 0;
-+ CTR_ = 0;
-+ CR_ = 0;
-+ XER_ = 0;
-+ FPSCR_ = 0;
-+ LLBit_ = false;
-+ LLAddr_ = 0;
-+ lastLLValue_ = 0;
-+
-+ // Initialize PC and LR to a known bad value that will cause an
-+ // access violation if the simulator ever tries to execute it.
-+ registers_[pc] = bad_ra;
-+ LR_ = bad_ra;
-+
-+ lastDebuggerInput_ = nullptr;
-+}
-+
-+bool Simulator::init() {
-+ static const size_t stackSize = 2 * 1024 * 1024;
-+ stack_ = js_pod_malloc<char>(stackSize);
-+ if (!stack_) {
-+ return false;
-+ }
-+
-+ // Leave a safety margin of 1MB to prevent overrunning the stack.
-+ stackLimit_ = reinterpret_cast<uintptr_t>(stack_) + 1024 * 1024;
-+
-+ // The sp is initialized to point to the bottom (high address) of the
-+ // allocated stack area.
-+ registers_[sp] = reinterpret_cast<int64_t>(stack_) + stackSize - 64;
-+
-+ // Zero-initialize VR namespace. Simulated PPC64 does not guarantee any
-+ // value in VRs at entry, but zeroing avoids uninitialized-read false
-+ // positives in tools and makes regression traces deterministic.
-+ memset(VRregisters_, 0, sizeof(VRregisters_));
-+
-+ return true;
-+}
-+
-+Simulator::~Simulator() { js_free(stack_); }
-+
-+SimulatorProcess::SimulatorProcess()
-+ : cacheLock_(mutexid::SimulatorCacheLock), redirection_(nullptr) {
-+ if (getenv("PPC64_SIM_ICACHE_CHECKS")) {
-+ ICacheCheckingDisableCount = 0;
-+ }
-+}
-+
-+SimulatorProcess::~SimulatorProcess() {
-+ Redirection* r = redirection_;
-+ while (r) {
-+ Redirection* next = r->next_;
-+ js_delete(r);
-+ r = next;
-+ }
-+}
-+
-+/* static */
-+void* Simulator::RedirectNativeFunction(void* nativeFunction,
-+ ABIFunctionType type) {
-+ Redirection* redirection = Redirection::Get(nativeFunction, type);
-+ return redirection->addressOfSwiInstruction();
-+}
-+
-+Simulator* Simulator::Current() {
-+ JSContext* cx = TlsContext.get();
-+ MOZ_ASSERT(CurrentThreadCanAccessRuntime(cx->runtime()));
-+ return cx->simulator();
-+}
-+
-+// -----------------------------------------------------------------------------
-+// Register accessors.
-+
-+void Simulator::setRegister(int reg, int64_t value) {
-+ MOZ_ASSERT((reg >= 0) && (reg < Register::kNumSimuRegisters));
-+ if (reg == pc) {
-+ pc_modified_ = true;
-+ }
-+ registers_[reg] = value;
-+}
-+
-+int64_t Simulator::getRegister(int reg) const {
-+ MOZ_ASSERT((reg >= 0) && (reg < Register::kNumSimuRegisters));
-+ return registers_[reg] + ((reg == pc) ? SimInstruction::kPCReadOffset : 0);
-+}
-+
-+void Simulator::setFpuRegister(int fpureg, int64_t value) {
-+ MOZ_ASSERT((fpureg >= 0) &&
-+ (fpureg < Simulator::FPURegister::kNumFPURegisters));
-+ FPUregisters_[fpureg] = value;
-+}
-+
-+void Simulator::setFpuRegisterWord(int fpureg, int32_t value) {
-+ MOZ_ASSERT((fpureg >= 0) &&
-+ (fpureg < Simulator::FPURegister::kNumFPURegisters));
-+ int32_t* pword;
-+ pword = reinterpret_cast<int32_t*>(&FPUregisters_[fpureg]);
-+ *pword = value;
-+}
-+
-+// Promote f32 → f64 preserving NaN payload, like PPC64's `lfs` and
-+// `xscvspdpn`. The plain C cast `(double)f32_nan` is permitted by the
-+// standard to quiet a signaling NaN, which on x86/ARM hosts visibly
-+// transforms 0x7FA00000 (sNaN) into a qNaN such as 0x7FE00000 — breaking
-+// every wasm test that loads a constant sNaN bit pattern. Manually
-+// reconstruct the f64 NaN with the same sign + payload (payload shifted
-+// left by 29 to fill the wider mantissa).
-+static double promoteFloatPreservingNaN(float f) {
-+ uint32_t bits;
-+ memcpy(&bits, &f, sizeof(bits));
-+ if ((bits & 0x7F800000u) == 0x7F800000u && (bits & 0x007FFFFFu) != 0u) {
-+ uint64_t sign = uint64_t(bits >> 31) & 1u;
-+ uint64_t payload = uint64_t(bits & 0x007FFFFFu);
-+ uint64_t dbits = (sign << 63) | (uint64_t(0x7FFu) << 52) | (payload << 29);
-+ double d;
-+ memcpy(&d, &dbits, sizeof(d));
-+ return d;
-+ }
-+ return (double)f;
-+}
-+
-+// Demote f64 → f32 preserving NaN payload (non-signaling: matches PPC64
-+// `stfs` / `xscvdpspn`, and wasm `lfs`-equivalent stores). Truncates the
-+// lower 29 bits of the f64 payload (those bits cannot be represented in
-+// the narrower f32 mantissa); if the truncation would yield a payload of
-+// zero (which would degrade the NaN to an Infinity), force the LSB so
-+// the result is still a NaN. This intentionally does NOT set the quiet
-+// bit — that's the job of the explicit-quieting op `xscvdpsp` and
-+// f32.demote_f64's wasm-level lowering.
-+static float demoteDoublePreservingNaN(double d) {
-+ uint64_t bits;
-+ memcpy(&bits, &d, sizeof(bits));
-+ if ((bits & 0x7FF0000000000000ULL) == 0x7FF0000000000000ULL &&
-+ (bits & 0x000FFFFFFFFFFFFFULL) != 0) {
-+ uint32_t sign = uint32_t(bits >> 63) & 1u;
-+ uint32_t payload = uint32_t((bits >> 29) & 0x007FFFFFu);
-+ if (payload == 0) payload = 1;
-+ uint32_t fbits = (sign << 31) | 0x7F800000u | payload;
-+ float f;
-+ memcpy(&f, &fbits, sizeof(f));
-+ return f;
-+ }
-+ return (float)d;
-+}
-+
-+void Simulator::setFpuRegisterFloat(int fpureg, float value) {
-+ MOZ_ASSERT((fpureg >= 0) &&
-+ (fpureg < Simulator::FPURegister::kNumFPURegisters));
-+ // ELFv2 ABI: single-precision values in FPRs are stored as their
-+ // double-precision representation. Promote and store the full 8 bytes,
-+ // not just the low 4. (Otherwise the upper 4 bytes are stale, matching
-+ // the layout that fctid/fcfid/lfd would read but NOT what the JIT and
-+ // the C ABI expect for a 'float' parameter.) Use the NaN-preserving
-+ // helper so a signaling-NaN return value isn't quieted into a qNaN.
-+ double promoted = promoteFloatPreservingNaN(value);
-+ memcpy(&FPUregisters_[fpureg], &promoted, sizeof(promoted));
-+}
-+
-+void Simulator::setFpuRegisterDouble(int fpureg, double value) {
-+ MOZ_ASSERT((fpureg >= 0) &&
-+ (fpureg < Simulator::FPURegister::kNumFPURegisters));
-+ *mozilla::BitwiseCast<double*>(&FPUregisters_[fpureg]) = value;
-+}
-+
-+int64_t Simulator::getFpuRegister(int fpureg) const {
-+ MOZ_ASSERT((fpureg >= 0) &&
-+ (fpureg < Simulator::FPURegister::kNumFPURegisters));
-+ return FPUregisters_[fpureg];
-+}
-+
-+int32_t Simulator::getFpuRegisterWord(int fpureg) const {
-+ MOZ_ASSERT((fpureg >= 0) &&
-+ (fpureg < Simulator::FPURegister::kNumFPURegisters));
-+ return *mozilla::BitwiseCast<int32_t*>(&FPUregisters_[fpureg]);
-+}
-+
-+int32_t Simulator::getFpuRegisterSignedWord(int fpureg) const {
-+ MOZ_ASSERT((fpureg >= 0) &&
-+ (fpureg < Simulator::FPURegister::kNumFPURegisters));
-+ return *mozilla::BitwiseCast<int32_t*>(&FPUregisters_[fpureg]);
-+}
-+
-+float Simulator::getFpuRegisterFloat(int fpureg) const {
-+ MOZ_ASSERT((fpureg >= 0) &&
-+ (fpureg < Simulator::FPURegister::kNumFPURegisters));
-+ // ELFv2 ABI: single-precision values are passed/returned in FPRs as their
-+ // double-precision representation. Read the full 8 bytes as double, then
-+ // narrow to float — matching the `frsp` the C callee would do, and matching
-+ // what real PPC64 hardware sees when the FPR was loaded via `lfs`. Use the
-+ // NaN-preserving helper so a signaling-NaN parameter isn't quieted.
-+ double promoted;
-+ memcpy(&promoted, &FPUregisters_[fpureg], sizeof(promoted));
-+ return demoteDoublePreservingNaN(promoted);
-+}
-+
-+double Simulator::getFpuRegisterDouble(int fpureg) const {
-+ MOZ_ASSERT((fpureg >= 0) &&
-+ (fpureg < Simulator::FPURegister::kNumFPURegisters));
-+ return *mozilla::BitwiseCast<double*>(&FPUregisters_[fpureg]);
-+}
-+
-+void Simulator::setVRBytes(int vreg, const uint8_t bytes[16]) {
-+ MOZ_ASSERT((vreg >= 0) && (vreg < kNumVRRegisters));
-+ memcpy(VRregisters_[vreg], bytes, 16);
-+}
-+
-+void Simulator::getVRBytes(int vreg, uint8_t bytes[16]) const {
-+ MOZ_ASSERT((vreg >= 0) && (vreg < kNumVRRegisters));
-+ memcpy(bytes, VRregisters_[vreg], 16);
-+}
-+
-+void Simulator::getVSR128(int vsr, uint8_t bytes[16]) const {
-+ MOZ_ASSERT((vsr >= 0) && (vsr < kNumFPURegisters + kNumVRRegisters));
-+ if (vsr < kNumFPURegisters) {
-+ // VSR 0-31: FPR view. The FPR scalar lives in BE DW0 of the VSR,
-+ // which on PPC64LE register storage maps to LE bytes 8-15.
-+ // DW1 is undefined per ISA; we model it as zero.
-+ // `lfd f0,(mem); xxlor <vr>,f0,f0; stxvx <vr>,...` writes the
-+ // double's 8 bytes to the HIGH half of the 16-byte store (LE
-+ // bytes 8-15).
-+ int64_t val = FPUregisters_[vsr];
-+ memset(bytes, 0, 8);
-+ memcpy(bytes + 8, &val, 8);
-+ } else {
-+ memcpy(bytes, VRregisters_[vsr - kNumFPURegisters], 16);
-+ }
-+}
-+
-+void Simulator::setVSR128(int vsr, const uint8_t bytes[16]) {
-+ MOZ_ASSERT((vsr >= 0) && (vsr < kNumFPURegisters + kNumVRRegisters));
-+ if (vsr < kNumFPURegisters) {
-+ // FPR scalar at BE DW0 = LE bytes 8-15. DW1 is architecturally
-+ // discarded on VSR-to-FPR writes.
-+ int64_t val;
-+ memcpy(&val, bytes + 8, 8);
-+ FPUregisters_[vsr] = val;
-+ } else {
-+ memcpy(VRregisters_[vsr - kNumFPURegisters], bytes, 16);
-+ }
-+}
-+
-+void Simulator::setCallResultDouble(double result) {
-+ setFpuRegisterDouble(Simulator::f1, result);
-+}
-+
-+void Simulator::setCallResultFloat(float result) {
-+ setFpuRegisterFloat(Simulator::f1, result);
-+}
-+
-+void Simulator::setCallResult(int64_t res) { setRegister(r3, res); }
-+
-+#ifdef XP_DARWIN
-+void Simulator::setCallResult(intptr_t res) {
-+ setRegister(r3, I64(res));
-+}
-+#endif
-+
-+void Simulator::setCallResult(__int128 res) {
-+ setRegister(r3, I64(res));
-+ setRegister(r4, I64(res >> 64));
-+}
-+
-+void Simulator::set_pc(int64_t value) {
-+ pc_modified_ = true;
-+ registers_[pc] = value;
-+}
-+
-+bool Simulator::has_bad_pc() const {
-+ return ((registers_[pc] == bad_ra) || (registers_[pc] == end_sim_pc));
-+}
-+
-+int64_t Simulator::get_pc() const { return registers_[pc]; }
-+
-+JS::ProfilingFrameIterator::RegisterState Simulator::registerState() {
-+ wasm::RegisterState state;
-+ state.pc = (void*)get_pc();
-+ state.fp = (void*)getRegister(fp);
-+ state.sp = (void*)getRegister(sp);
-+ state.lr = (void*)getLR();
-+ return state;
-+}
-+
-+// -----------------------------------------------------------------------------
-+// Memory access helpers.
-+
-+uint8_t Simulator::readBU(uint64_t addr) {
-+ if (handleWasmSegFault(addr, 1)) {
-+ return 0xff;
-+ }
-+ uint8_t* ptr = reinterpret_cast<uint8_t*>(addr);
-+ return *ptr;
-+}
-+
-+int8_t Simulator::readB(uint64_t addr) {
-+ if (handleWasmSegFault(addr, 1)) {
-+ return -1;
-+ }
-+ int8_t* ptr = reinterpret_cast<int8_t*>(addr);
-+ return *ptr;
-+}
-+
-+void Simulator::writeB(uint64_t addr, uint8_t value) {
-+ if (handleWasmSegFault(addr, 1)) {
-+ return;
-+ }
-+ uint8_t* ptr = reinterpret_cast<uint8_t*>(addr);
-+ *ptr = value;
-+}
-+
-+void Simulator::writeB(uint64_t addr, int8_t value) {
-+ if (handleWasmSegFault(addr, 1)) {
-+ return;
-+ }
-+ int8_t* ptr = reinterpret_cast<int8_t*>(addr);
-+ *ptr = value;
-+}
-+
-+uint16_t Simulator::readHU(uint64_t addr, SimInstruction* instr) {
-+ if (handleWasmSegFault(addr, 2)) {
-+ return 0xffff;
-+ }
-+ uint16_t* ptr = reinterpret_cast<uint16_t*>(addr);
-+ return *ptr;
-+}
-+
-+int16_t Simulator::readH(uint64_t addr, SimInstruction* instr) {
-+ if (handleWasmSegFault(addr, 2)) {
-+ return -1;
-+ }
-+ int16_t* ptr = reinterpret_cast<int16_t*>(addr);
-+ return *ptr;
-+}
-+
-+void Simulator::writeH(uint64_t addr, uint16_t value, SimInstruction* instr) {
-+ if (handleWasmSegFault(addr, 2)) {
-+ return;
-+ }
-+ uint16_t* ptr = reinterpret_cast<uint16_t*>(addr);
-+ LLBit_ = false;
-+ *ptr = value;
-+}
-+
-+void Simulator::writeH(uint64_t addr, int16_t value, SimInstruction* instr) {
-+ if (handleWasmSegFault(addr, 2)) {
-+ return;
-+ }
-+ int16_t* ptr = reinterpret_cast<int16_t*>(addr);
-+ LLBit_ = false;
-+ *ptr = value;
-+}
-+
-+uint32_t Simulator::readWU(uint64_t addr, SimInstruction* instr) {
-+ if (handleWasmSegFault(addr, 4)) {
-+ return -1;
-+ }
-+ uint32_t* ptr = reinterpret_cast<uint32_t*>(addr);
-+ return *ptr;
-+}
-+
-+int32_t Simulator::readW(uint64_t addr, SimInstruction* instr) {
-+ if (handleWasmSegFault(addr, 4)) {
-+ return -1;
-+ }
-+ int32_t* ptr = reinterpret_cast<int32_t*>(addr);
-+ return *ptr;
-+}
-+
-+void Simulator::writeW(uint64_t addr, uint32_t value, SimInstruction* instr) {
-+ if (handleWasmSegFault(addr, 4)) {
-+ return;
-+ }
-+ uint32_t* ptr = reinterpret_cast<uint32_t*>(addr);
-+ LLBit_ = false;
-+ *ptr = value;
-+}
-+
-+void Simulator::writeW(uint64_t addr, int32_t value, SimInstruction* instr) {
-+ if (handleWasmSegFault(addr, 4)) {
-+ return;
-+ }
-+ int32_t* ptr = reinterpret_cast<int32_t*>(addr);
-+ LLBit_ = false;
-+ *ptr = value;
-+}
-+
-+int64_t Simulator::readDW(uint64_t addr, SimInstruction* instr) {
-+ if (handleWasmSegFault(addr, 8)) {
-+ return -1;
-+ }
-+ int64_t* ptr = reinterpret_cast<int64_t*>(addr);
-+ return *ptr;
-+}
-+
-+void Simulator::writeDW(uint64_t addr, int64_t value, SimInstruction* instr) {
-+ if (handleWasmSegFault(addr, 8)) {
-+ return;
-+ }
-+ int64_t* ptr = reinterpret_cast<int64_t*>(addr);
-+ LLBit_ = false;
-+ *ptr = value;
-+}
-+
-+double Simulator::readD(uint64_t addr, SimInstruction* instr) {
-+ if (handleWasmSegFault(addr, 8)) {
-+ return NAN;
-+ }
-+ double* ptr = reinterpret_cast<double*>(addr);
-+ return *ptr;
-+}
-+
-+void Simulator::writeD(uint64_t addr, double value, SimInstruction* instr) {
-+ if (handleWasmSegFault(addr, 8)) {
-+ return;
-+ }
-+ double* ptr = reinterpret_cast<double*>(addr);
-+ LLBit_ = false;
-+ *ptr = value;
-+}
-+
-+// Byte-wide load-reserve / store-conditional (lbarx / stbcx.).
-+// Byte accesses have no alignment requirement.
-+uint8_t Simulator::loadLinkedB(uint64_t addr, SimInstruction* instr) {
-+ if (handleWasmSegFault(addr, 1)) {
-+ return 0;
-+ }
-+ volatile uint8_t* ptr = reinterpret_cast<volatile uint8_t*>(addr);
-+ uint8_t value = *ptr;
-+ lastLLValue_ = value;
-+ LLAddr_ = addr;
-+ LLBit_ = true;
-+ return value;
-+}
-+
-+int Simulator::storeConditionalB(uint64_t addr, uint8_t value,
-+ SimInstruction* instr) {
-+ if (addr != LLAddr_) {
-+ printf("stbcx. to bad address: 0x%016" PRIx64 ", pc=0x%016" PRIxPTR
-+ ", expected: 0x%016" PRIxPTR "\n",
-+ addr, reinterpret_cast<intptr_t>(instr), LLAddr_);
-+ MOZ_CRASH();
-+ }
-+ SharedMem<uint8_t*> ptr =
-+ SharedMem<uint8_t*>::shared(reinterpret_cast<uint8_t*>(addr));
-+ if (!LLBit_) {
-+ return 0;
-+ }
-+ LLBit_ = false;
-+ LLAddr_ = 0;
-+ uint8_t expected = uint8_t(lastLLValue_);
-+ uint8_t old =
-+ AtomicOperations::compareExchangeSeqCst(ptr, expected, value);
-+ return (old == expected) ? 1 : 0;
-+}
-+
-+// Halfword-wide load-reserve / store-conditional (lharx / sthcx.).
-+// 2-byte aligned per ISA.
-+uint16_t Simulator::loadLinkedH(uint64_t addr, SimInstruction* instr) {
-+ if ((addr & 1) == 0) {
-+ if (handleWasmSegFault(addr, 2)) {
-+ return 0;
-+ }
-+ volatile uint16_t* ptr = reinterpret_cast<volatile uint16_t*>(addr);
-+ uint16_t value = *ptr;
-+ lastLLValue_ = value;
-+ LLAddr_ = addr;
-+ LLBit_ = true;
-+ return value;
-+ }
-+ printf("Unaligned lharx at 0x%016" PRIx64 ", pc=0x%016" PRIxPTR "\n", addr,
-+ reinterpret_cast<intptr_t>(instr));
-+ MOZ_CRASH();
-+ return 0;
-+}
-+
-+int Simulator::storeConditionalH(uint64_t addr, uint16_t value,
-+ SimInstruction* instr) {
-+ if (addr != LLAddr_) {
-+ printf("sthcx. to bad address: 0x%016" PRIx64 ", pc=0x%016" PRIxPTR
-+ ", expected: 0x%016" PRIxPTR "\n",
-+ addr, reinterpret_cast<intptr_t>(instr), LLAddr_);
-+ MOZ_CRASH();
-+ }
-+ if ((addr & 1) == 0) {
-+ SharedMem<uint16_t*> ptr =
-+ SharedMem<uint16_t*>::shared(reinterpret_cast<uint16_t*>(addr));
-+ if (!LLBit_) {
-+ return 0;
-+ }
-+ LLBit_ = false;
-+ LLAddr_ = 0;
-+ uint16_t expected = uint16_t(lastLLValue_);
-+ uint16_t old =
-+ AtomicOperations::compareExchangeSeqCst(ptr, expected, value);
-+ return (old == expected) ? 1 : 0;
-+ }
-+ printf("Unaligned sthcx. at 0x%016" PRIx64 ", pc=0x%016" PRIxPTR "\n", addr,
-+ reinterpret_cast<intptr_t>(instr));
-+ MOZ_CRASH();
-+ return 0;
-+}
-+
-+int32_t Simulator::loadLinkedW(uint64_t addr, SimInstruction* instr) {
-+ if ((addr & 3) == 0) {
-+ if (handleWasmSegFault(addr, 4)) {
-+ return -1;
-+ }
-+
-+ volatile int32_t* ptr = reinterpret_cast<volatile int32_t*>(addr);
-+ int32_t value = *ptr;
-+ lastLLValue_ = value;
-+ LLAddr_ = addr;
-+ LLBit_ = true;
-+ return value;
-+ }
-+ printf("Unaligned lwarx at 0x%016" PRIx64 ", pc=0x%016" PRIxPTR "\n", addr,
-+ reinterpret_cast<intptr_t>(instr));
-+ MOZ_CRASH();
-+ return 0;
-+}
-+
-+int Simulator::storeConditionalW(uint64_t addr, int32_t value,
-+ SimInstruction* instr) {
-+ if (addr != LLAddr_) {
-+ printf("stwcx. to bad address: 0x%016" PRIx64 ", pc=0x%016" PRIxPTR
-+ ", expected: 0x%016" PRIxPTR "\n",
-+ addr, reinterpret_cast<intptr_t>(instr), LLAddr_);
-+ MOZ_CRASH();
-+ }
-+
-+ if ((addr & 3) == 0) {
-+ SharedMem<int32_t*> ptr =
-+ SharedMem<int32_t*>::shared(reinterpret_cast<int32_t*>(addr));
-+
-+ if (!LLBit_) {
-+ return 0;
-+ }
-+
-+ LLBit_ = false;
-+ LLAddr_ = 0;
-+ int32_t expected = int32_t(lastLLValue_);
-+ int32_t old =
-+ AtomicOperations::compareExchangeSeqCst(ptr, expected, value);
-+ return (old == expected) ? 1 : 0;
-+ }
-+ printf("Unaligned stwcx. at 0x%016" PRIx64 ", pc=0x%016" PRIxPTR "\n", addr,
-+ reinterpret_cast<intptr_t>(instr));
-+ MOZ_CRASH();
-+ return 0;
-+}
-+
-+int64_t Simulator::loadLinkedD(uint64_t addr, SimInstruction* instr) {
-+ if ((addr & kPointerAlignmentMask) == 0) {
-+ if (handleWasmSegFault(addr, 8)) {
-+ return -1;
-+ }
-+
-+ volatile int64_t* ptr = reinterpret_cast<volatile int64_t*>(addr);
-+ int64_t value = *ptr;
-+ lastLLValue_ = value;
-+ LLAddr_ = addr;
-+ LLBit_ = true;
-+ return value;
-+ }
-+ printf("Unaligned ldarx at 0x%016" PRIx64 ", pc=0x%016" PRIxPTR "\n", addr,
-+ reinterpret_cast<intptr_t>(instr));
-+ MOZ_CRASH();
-+ return 0;
-+}
-+
-+int Simulator::storeConditionalD(uint64_t addr, int64_t value,
-+ SimInstruction* instr) {
-+ if (addr != LLAddr_) {
-+ printf("stdcx. to bad address: 0x%016" PRIx64 ", pc=0x%016" PRIxPTR
-+ ", expected: 0x%016" PRIxPTR "\n",
-+ addr, reinterpret_cast<intptr_t>(instr), LLAddr_);
-+ MOZ_CRASH();
-+ }
-+
-+ if ((addr & kPointerAlignmentMask) == 0) {
-+ SharedMem<int64_t*> ptr =
-+ SharedMem<int64_t*>::shared(reinterpret_cast<int64_t*>(addr));
-+
-+ if (!LLBit_) {
-+ return 0;
-+ }
-+
-+ LLBit_ = false;
-+ LLAddr_ = 0;
-+ int64_t expected = lastLLValue_;
-+ int64_t old =
-+ AtomicOperations::compareExchangeSeqCst(ptr, expected, value);
-+ return (old == expected) ? 1 : 0;
-+ }
-+ printf("Unaligned stdcx. at 0x%016" PRIx64 ", pc=0x%016" PRIxPTR "\n", addr,
-+ reinterpret_cast<intptr_t>(instr));
-+ MOZ_CRASH();
-+ return 0;
-+}
-+
-+// -----------------------------------------------------------------------------
-+// Stack limit / recursion helpers.
-+
-+uintptr_t Simulator::stackLimit() const { return stackLimit_; }
-+
-+uintptr_t* Simulator::addressOfStackLimit() { return &stackLimit_; }
-+
-+bool Simulator::overRecursed(uintptr_t newsp) const {
-+ if (newsp == 0) {
-+ newsp = getRegister(sp);
-+ }
-+ return newsp <= stackLimit();
-+}
-+
-+bool Simulator::overRecursedWithExtra(uint32_t extra) const {
-+ uintptr_t newsp = getRegister(sp) - extra;
-+ return newsp <= stackLimit();
-+}
-+
-+void Simulator::format(SimInstruction* instr, const char* format) {
-+ printf("Simulator found unsupported instruction:\n 0x%016" PRIxPTR
-+ ": %08x %s\n",
-+ reinterpret_cast<intptr_t>(instr), instr->instructionBits(), format);
-+ MOZ_CRASH();
-+}
-+
-+// -----------------------------------------------------------------------------
-+// softwareInterrupt - handle kCallRedirInstr (PPC_stop) and PPC_trap.
-+
-+ABI_FUNCTION_TYPE_SIM_PROTOTYPES
-+
-+void Simulator::softwareInterrupt(SimInstruction* instr) {
-+ uint32_t instrBits = instr->instructionBits();
-+
-+ if (instrBits == kCallRedirInstr) {
-+ Redirection* redirection = Redirection::FromSwiInstruction(instr);
-+ uintptr_t nativeFn =
-+ reinterpret_cast<uintptr_t>(redirection->nativeFunction());
-+
-+ // Get the SP for reading stack arguments.
-+ int64_t* sp_ = reinterpret_cast<int64_t*>(getRegister(sp));
-+ // Skip past the PPC64 ELFv2 link area (4 doublewords = 32 bytes).
-+ sp_ = reinterpret_cast<int64_t*>(reinterpret_cast<uintptr_t>(sp_) + 32);
-+
-+ // PPC64 ELFv2: integer args in r3-r10, FP args in f1-f13.
-+ int64_t a0_ = getRegister(r3);
-+ int64_t a1_ = getRegister(r4);
-+ int64_t a2_ = getRegister(r5);
-+ int64_t a3_ = getRegister(r6);
-+ int64_t a4_ = getRegister(r7);
-+ int64_t a5_ = getRegister(r8);
-+ int64_t a6_ = getRegister(r9);
-+ int64_t a7_ = getRegister(r10);
-+ // PPC64 ELFv2: FP args in f1-f13, mapped to f0_s..f12_s and f0_d..f12_d.
-+ float f0_s = getFpuRegisterFloat(Simulator::f1);
-+ float f1_s = getFpuRegisterFloat(Simulator::f2);
-+ float f2_s = getFpuRegisterFloat(Simulator::f3);
-+ float f3_s = getFpuRegisterFloat(Simulator::f4);
-+ float f4_s = getFpuRegisterFloat(Simulator::f5);
-+ float f5_s = getFpuRegisterFloat(Simulator::f6);
-+ float f6_s = getFpuRegisterFloat(Simulator::f7);
-+ float f7_s = getFpuRegisterFloat(Simulator::f8);
-+ float f8_s = getFpuRegisterFloat(Simulator::f9);
-+ float f9_s = getFpuRegisterFloat(Simulator::f10);
-+ float f10_s = getFpuRegisterFloat(Simulator::f11);
-+ float f11_s = getFpuRegisterFloat(Simulator::f12);
-+ float f12_s = getFpuRegisterFloat(Simulator::f13);
-+ double f0_d = getFpuRegisterDouble(Simulator::f1);
-+ double f1_d = getFpuRegisterDouble(Simulator::f2);
-+ double f2_d = getFpuRegisterDouble(Simulator::f3);
-+ double f3_d = getFpuRegisterDouble(Simulator::f4);
-+ double f4_d = getFpuRegisterDouble(Simulator::f5);
-+ double f5_d = getFpuRegisterDouble(Simulator::f6);
-+ double f6_d = getFpuRegisterDouble(Simulator::f7);
-+ double f7_d = getFpuRegisterDouble(Simulator::f8);
-+ double f8_d = getFpuRegisterDouble(Simulator::f9);
-+ double f9_d = getFpuRegisterDouble(Simulator::f10);
-+ double f10_d = getFpuRegisterDouble(Simulator::f11);
-+ double f11_d = getFpuRegisterDouble(Simulator::f12);
-+ double f12_d = getFpuRegisterDouble(Simulator::f13);
-+
-+ // Suppress unused-variable warnings for higher FP arg registers.
-+ // They exist for ABI completeness but few function types use >5 FP args.
-+ (void)f4_s; (void)f5_s; (void)f6_s; (void)f7_s; (void)f8_s; (void)f9_s;
-+ (void)f10_s; (void)f11_s; (void)f12_s;
-+ (void)f4_d; (void)f5_d; (void)f6_d; (void)f7_d; (void)f8_d; (void)f9_d;
-+ (void)f10_d; (void)f11_d; (void)f12_d;
-+
-+ int64_t saved_lr = getLR();
-+
-+ bool stack_aligned = (getRegister(sp) & (ABIStackAlignment - 1)) == 0;
-+ if (!stack_aligned) {
-+ fprintf(stderr, "Runtime call with unaligned stack!\n");
-+ MOZ_CRASH();
-+ }
-+
-+ if (single_stepping_) {
-+ single_step_callback_(single_step_callback_arg_, this, nullptr);
-+ }
-+
-+ switch (redirection->type()) {
-+ ABI_FUNCTION_TYPE_PPC64_SIM_DISPATCH
-+
-+ default:
-+ MOZ_CRASH("Unknown function type.");
-+ }
-+
-+ if (single_stepping_) {
-+ single_step_callback_(single_step_callback_arg_, this, nullptr);
-+ }
-+
-+ setLR(saved_lr);
-+ set_pc(getLR());
-+ } else if (instrBits == 0x7FE00008) {
-+ // PPC_trap: used for wasm traps.
-+ uint8_t* newPC;
-+ if (wasm::HandleIllegalInstruction(registerState(), &newPC)) {
-+ set_pc(int64_t(newPC));
-+ return;
-+ }
-+ MOZ_CRASH("Unexpected trap instruction");
-+ } else {
-+ // Other trap-like instructions: enter debugger.
-+ ppc64Debugger dbg(this);
-+ dbg.debug();
-+ }
-+}
-+
-+// -----------------------------------------------------------------------------
-+// Stop/breakpoint helpers.
-+
-+bool Simulator::isWatchpoint(uint32_t code) {
-+ return (code <= kMaxWatchpointCode);
-+}
-+
-+void Simulator::printWatchpoint(uint32_t code) {
-+ ppc64Debugger dbg(this);
-+ ++break_count_;
-+ printf("\n---- break %d marker: %20" PRIi64 " (instr count: %20" PRIi64
-+ ") ----\n",
-+ code, break_count_, icount_);
-+ dbg.printAllRegs();
-+}
-+
-+void Simulator::handleStop(uint32_t code, SimInstruction* instr) {
-+ if (isEnabledStop(code)) {
-+ ppc64Debugger dbg(this);
-+ dbg.stop(instr);
-+ } else {
-+ set_pc(get_pc() + SimInstruction::kInstrSize);
-+ }
-+}
-+
-+bool Simulator::isStopInstruction(SimInstruction* instr) {
-+ return instr->instructionBits() == kCallRedirInstr;
-+}
-+
-+bool Simulator::isEnabledStop(uint32_t code) {
-+ MOZ_ASSERT(code <= kMaxStopCode);
-+ MOZ_ASSERT(code > kMaxWatchpointCode);
-+ return !(watchedStops_[code].count_ & kStopDisabledBit);
-+}
-+
-+void Simulator::enableStop(uint32_t code) {
-+ if (!isEnabledStop(code)) {
-+ watchedStops_[code].count_ &= ~kStopDisabledBit;
-+ }
-+}
-+
-+void Simulator::disableStop(uint32_t code) {
-+ if (isEnabledStop(code)) {
-+ watchedStops_[code].count_ |= kStopDisabledBit;
-+ }
-+}
-+
-+void Simulator::increaseStopCounter(uint32_t code) {
-+ MOZ_ASSERT(code <= kMaxStopCode);
-+ if ((watchedStops_[code].count_ & ~(1 << 31)) == 0x7fffffff) {
-+ printf(
-+ "Stop counter for code %i has overflowed.\n"
-+ "Enabling this code and reseting the counter to 0.\n",
-+ code);
-+ watchedStops_[code].count_ = 0;
-+ enableStop(code);
-+ } else {
-+ watchedStops_[code].count_++;
-+ }
-+}
-+
-+void Simulator::printStopInfo(uint32_t code) {
-+ if (code <= kMaxWatchpointCode) {
-+ printf("That is a watchpoint, not a stop.\n");
-+ return;
-+ } else if (code > kMaxStopCode) {
-+ printf("Code too large, only %u stops can be used\n", kMaxStopCode + 1);
-+ return;
-+ }
-+ const char* state = isEnabledStop(code) ? "Enabled" : "Disabled";
-+ int32_t count = watchedStops_[code].count_ & ~kStopDisabledBit;
-+ if (count != 0) {
-+ if (watchedStops_[code].desc_) {
-+ printf("stop %i - 0x%x: \t%s, \tcounter = %i, \t%s\n", code, code,
-+ state, count, watchedStops_[code].desc_);
-+ } else {
-+ printf("stop %i - 0x%x: \t%s, \tcounter = %i\n", code, code, state,
-+ count);
-+ }
-+ }
-+}
-+
-+// =============================================================================
-+// Instruction decoders.
-+// =============================================================================
-+
-+// Compute effective address for D-form instructions.
-+// If RA==0, the base is 0 (not GPR[0]).
-+static inline int64_t DFormEA(Simulator* sim, SimInstruction* instr,
-+ int16_t offset) {
-+ uint32_t ra = instr->raValue();
-+ int64_t base = (ra == 0) ? 0 : sim->getRegister(ra);
-+ return base + offset;
-+}
-+
-+// Compute effective address for DS-form instructions.
-+static inline int64_t DSFormEA(Simulator* sim, SimInstruction* instr,
-+ int16_t offset) {
-+ uint32_t ra = instr->raValue();
-+ int64_t base = (ra == 0) ? 0 : sim->getRegister(ra);
-+ return base + offset;
-+}
-+
-+// Compute effective address for X-form indexed instructions.
-+// If RA==0, base is 0 (not GPR[0]).
-+static inline int64_t XFormEA(Simulator* sim, SimInstruction* instr) {
-+ uint32_t ra = instr->raValue();
-+ uint32_t rb = instr->rbValue();
-+ int64_t base = (ra == 0) ? 0 : sim->getRegister(ra);
-+ return base + sim->getRegister(rb);
-+}
-+
-+// Compute effective address for X-form indexed updates (RA != 0 required).
-+static inline int64_t XFormEAUpdate(Simulator* sim, SimInstruction* instr) {
-+ uint32_t ra = instr->raValue();
-+ uint32_t rb = instr->rbValue();
-+ return sim->getRegister(ra) + sim->getRegister(rb);
-+}
-+
-+// -----------------------------------------------------------------------------
-+// decodeDFormALU: addi, addis, ori, oris, xori, xoris, andi., andis.,
-+// cmpi, cmpli, subfic, addic, addic., mulli, twi
-+
-+void Simulator::decodeDFormALU(SimInstruction* instr) {
-+ uint32_t opcode = instr->opcode();
-+ uint32_t rt = instr->rtValue();
-+ uint32_t ra = instr->raValue();
-+ int16_t si = instr->imm16Value();
-+ uint16_t ui = instr->uimm16Value();
-+
-+ switch (opcode) {
-+ case 14: {
-+ // addi: RT = (RA|0) + SI
-+ int64_t base = (ra == 0) ? 0 : getRegister(ra);
-+ setRegister(rt, base + (int64_t)si);
-+ break;
-+ }
-+ case 15: {
-+ // addis: RT = (RA|0) + (SI << 16)
-+ int64_t base = (ra == 0) ? 0 : getRegister(ra);
-+ setRegister(rt, base + ((int64_t)si << 16));
-+ break;
-+ }
-+ case 24: {
-+ // ori: RA = RS | UI
-+ setRegister(ra, getRegister(rt) | (uint64_t)ui);
-+ break;
-+ }
-+ case 25: {
-+ // oris: RA = RS | (UI << 16)
-+ setRegister(ra, getRegister(rt) | ((uint64_t)ui << 16));
-+ break;
-+ }
-+ case 26: {
-+ // xori: RA = RS ^ UI
-+ setRegister(ra, getRegister(rt) ^ (uint64_t)ui);
-+ break;
-+ }
-+ case 27: {
-+ // xoris: RA = RS ^ (UI << 16)
-+ setRegister(ra, getRegister(rt) ^ ((uint64_t)ui << 16));
-+ break;
-+ }
-+ case 28: {
-+ // andi.: RA = RS & UI, update CR0
-+ int64_t result = getRegister(rt) & (uint64_t)ui;
-+ setRegister(ra, result);
-+ updateCR0(result);
-+ break;
-+ }
-+ case 29: {
-+ // andis.: RA = RS & (UI << 16), update CR0
-+ int64_t result = getRegister(rt) & ((uint64_t)ui << 16);
-+ setRegister(ra, result);
-+ updateCR0(result);
-+ break;
-+ }
-+ case 11: {
-+ // cmpi: compare RA with SI, signed
-+ uint32_t bf = instr->bfValue();
-+ bool l = instr->lBit();
-+ if (l) {
-+ // 64-bit compare
-+ setCRFieldCmp(bf, getRegister(ra), (int64_t)si);
-+ } else {
-+ // 32-bit compare
-+ int32_t ra32 = I32(getRegister(ra));
-+ setCRFieldCmp(bf, (int64_t)ra32, (int64_t)(int32_t)si);
-+ }
-+ break;
-+ }
-+ case 10: {
-+ // cmpli: compare RA with UI, unsigned
-+ uint32_t bf = instr->bfValue();
-+ bool l = instr->lBit();
-+ if (l) {
-+ // 64-bit unsigned compare
-+ setCRFieldCmpU(bf, U64(getRegister(ra)), (uint64_t)ui);
-+ } else {
-+ // 32-bit unsigned compare
-+ uint32_t ra32 = U32(getRegister(ra));
-+ setCRFieldCmpU(bf, (uint64_t)ra32, (uint64_t)ui);
-+ }
-+ break;
-+ }
-+ case 8: {
-+ // subfic: RT = SI - RA, set CA
-+ uint64_t ra_val = U64(getRegister(ra));
-+ uint64_t imm = U64((int64_t)si);
-+ uint64_t result = imm + ~ra_val + 1;
-+ setRegister(rt, I64(result));
-+ // CA is set if there is a carry out of the addition (~RA + IMM + 1).
-+ // Equivalently, CA = (IMM >= RA) for unsigned interpretation of the
-+ // full 64-bit subtraction.
-+ bool carry = (imm >= ra_val) || (imm == 0 && ra_val == 0);
-+ // More precise: carry = (~ra_val + imm) would overflow, or adding 1
-+ // overflows.
-+ uint64_t tmp = ~ra_val + imm;
-+ carry = (tmp < ~ra_val) || (tmp < imm) || (result < tmp);
-+ // Simplify: CA if no borrow.
-+ carry = (U64((int64_t)si) >= ra_val);
-+ if (ra_val == 0) carry = true;
-+ // Actually, subfic CA: carry out of ~RA + IMM + 1.
-+ // CA = (IMM > RA - 1) when RA != 0, CA = 1 when RA == 0.
-+ // Or just: the unsigned result of (SI - RA) is valid (no borrow).
-+ // Let's compute it correctly:
-+ {
-+ __uint128_t wide = (__uint128_t)(~ra_val) + (__uint128_t)imm + 1;
-+ carry = (wide >> 64) != 0;
-+ }
-+ setXERCA(carry);
-+ break;
-+ }
-+ case 12: {
-+ // addic: RT = RA + SI, set CA
-+ uint64_t ra_val = U64(getRegister(ra));
-+ uint64_t imm = U64((int64_t)si);
-+ uint64_t result = ra_val + imm;
-+ setRegister(rt, I64(result));
-+ setXERCA(result < ra_val);
-+ break;
-+ }
-+ case 13: {
-+ // addic.: RT = RA + SI, set CA, update CR0
-+ uint64_t ra_val = U64(getRegister(ra));
-+ uint64_t imm = U64((int64_t)si);
-+ uint64_t result = ra_val + imm;
-+ setRegister(rt, I64(result));
-+ setXERCA(result < ra_val);
-+ updateCR0(I64(result));
-+ break;
-+ }
-+ case 7: {
-+ // mulli: RT = RA * SI (low 64 bits)
-+ int64_t result = getRegister(ra) * (int64_t)si;
-+ setRegister(rt, result);
-+ break;
-+ }
-+ case 3: {
-+ // twi: Trap Word Immediate. We don't implement trapping in the
-+ // simulator; just continue.
-+ break;
-+ }
-+ default:
-+ MOZ_CRASH_UNSAFE_PRINTF("decodeDFormALU: unhandled opcode %u", opcode);
-+ }
-+}
-+
-+// -----------------------------------------------------------------------------
-+// decodeDFormLoad: lwz(32), lbz(34), lhz(40), lha(42), lfs(48), lfd(50)
-+// and update variants
-+
-+void Simulator::decodeDFormLoad(SimInstruction* instr) {
-+ uint32_t opcode = instr->opcode();
-+ uint32_t rt = instr->rtValue();
-+ int16_t si = instr->imm16Value();
-+ uint64_t ea = DFormEA(this, instr, si);
-+
-+ switch (opcode) {
-+ case 32:
-+ // lwz
-+ setRegister(rt, U64(readWU(ea, instr)));
-+ break;
-+ case 33: {
-+ // lwzu: RA != 0, load and update RA
-+ setRegister(rt, U64(readWU(ea, instr)));
-+ setRegister(instr->raValue(), ea);
-+ break;
-+ }
-+ case 34:
-+ // lbz
-+ setRegister(rt, U64(readBU(ea)));
-+ break;
-+ case 35: {
-+ // lbzu
-+ setRegister(rt, U64(readBU(ea)));
-+ setRegister(instr->raValue(), ea);
-+ break;
-+ }
-+ case 40:
-+ // lhz
-+ setRegister(rt, U64(readHU(ea, instr)));
-+ break;
-+ case 41: {
-+ // lhzu
-+ setRegister(rt, U64(readHU(ea, instr)));
-+ setRegister(instr->raValue(), ea);
-+ break;
-+ }
-+ case 42:
-+ // lha (half-word, sign-extended)
-+ setRegister(rt, (int64_t)readH(ea, instr));
-+ break;
-+ case 43: {
-+ // lhau
-+ setRegister(rt, (int64_t)readH(ea, instr));
-+ setRegister(instr->raValue(), ea);
-+ break;
-+ }
-+ case 48: {
-+ // lfs: load float single, widen to double in FPR (NaN-preserving;
-+ // matches Power ISA `lfs` which uses xscvspdpn semantics)
-+ if (handleWasmSegFault(ea, 4)) break;
-+ float val = *reinterpret_cast<float*>(ea);
-+ setFpuRegisterDouble(rt, promoteFloatPreservingNaN(val));
-+ break;
-+ }
-+ case 49: {
-+ // lfsu
-+ if (handleWasmSegFault(ea, 4)) break;
-+ float val = *reinterpret_cast<float*>(ea);
-+ setFpuRegisterDouble(rt, promoteFloatPreservingNaN(val));
-+ setRegister(instr->raValue(), ea);
-+ break;
-+ }
-+ case 50: {
-+ // lfd: load float double
-+ double val = readD(ea, instr);
-+ setFpuRegisterDouble(rt, val);
-+ break;
-+ }
-+ case 51: {
-+ // lfdu
-+ double val = readD(ea, instr);
-+ setFpuRegisterDouble(rt, val);
-+ setRegister(instr->raValue(), ea);
-+ break;
-+ }
-+ default:
-+ MOZ_CRASH_UNSAFE_PRINTF("decodeDFormLoad: unhandled opcode %u", opcode);
-+ }
-+}
-+
-+// -----------------------------------------------------------------------------
-+// decodeDFormStore: stw(36), stwu(37), stb(38), sth(44), stfs(52), stfd(54)
-+// and update variants
-+
-+void Simulator::decodeDFormStore(SimInstruction* instr) {
-+ uint32_t opcode = instr->opcode();
-+ uint32_t rs = instr->rsValue();
-+ int16_t si = instr->imm16Value();
-+
-+ // For stores, the effective address calculation differs for update forms:
-+ // - Non-update: EA = (RA|0) + D
-+ // - Update: EA = RA + D (RA must not be 0)
-+ bool isUpdate = false;
-+ switch (opcode) {
-+ case 37: case 39: case 45: case 53: case 55:
-+ isUpdate = true;
-+ break;
-+ }
-+
-+ uint64_t ea;
-+ if (isUpdate) {
-+ ea = getRegister(instr->raValue()) + (int64_t)si;
-+ } else {
-+ ea = DFormEA(this, instr, si);
-+ }
-+
-+ switch (opcode) {
-+ case 36:
-+ // stw
-+ writeW(ea, I32(getRegister(rs)), instr);
-+ break;
-+ case 38:
-+ // stb
-+ writeB(ea, (uint8_t)(getRegister(rs) & 0xFF));
-+ break;
-+ case 39:
-+ // stbu
-+ writeB(ea, (uint8_t)(getRegister(rs) & 0xFF));
-+ setRegister(instr->raValue(), ea);
-+ break;
-+ case 44:
-+ // sth
-+ writeH(ea, U16(getRegister(rs)), instr);
-+ break;
-+ case 45:
-+ // sthu
-+ writeH(ea, U16(getRegister(rs)), instr);
-+ setRegister(instr->raValue(), ea);
-+ break;
-+ case 52: {
-+ // stfs: convert double in FPR to single and store (NaN-preserving;
-+ // matches Power ISA `stfs` which uses xscvdpspn semantics)
-+ double dval = getFpuRegisterDouble(rs);
-+ float fval = demoteDoublePreservingNaN(dval);
-+ if (handleWasmSegFault(ea, 4)) break;
-+ *reinterpret_cast<float*>(ea) = fval;
-+ LLBit_ = false;
-+ break;
-+ }
-+ case 53: {
-+ // stfsu
-+ double dval = getFpuRegisterDouble(rs);
-+ float fval = demoteDoublePreservingNaN(dval);
-+ if (handleWasmSegFault(ea, 4)) break;
-+ *reinterpret_cast<float*>(ea) = fval;
-+ LLBit_ = false;
-+ setRegister(instr->raValue(), ea);
-+ break;
-+ }
-+ case 54:
-+ // stfd
-+ writeD(ea, getFpuRegisterDouble(rs), instr);
-+ break;
-+ case 55:
-+ // stfdu
-+ writeD(ea, getFpuRegisterDouble(rs), instr);
-+ setRegister(instr->raValue(), ea);
-+ break;
-+ default:
-+ MOZ_CRASH_UNSAFE_PRINTF("decodeDFormStore: unhandled opcode %u", opcode);
-+ }
-+}
-+
-+// -----------------------------------------------------------------------------
-+// decodeDSForm: ld(58/0), lwa(58/2), std(62/0), stdu(62/1)
-+
-+void Simulator::decodeDSForm(SimInstruction* instr) {
-+ uint32_t opcode = instr->opcode();
-+ uint32_t rt = instr->rtValue();
-+ int16_t ds = instr->ds14Value();
-+ uint32_t xo = instr->bits(1, 0);
-+
-+ if (opcode == 58) {
-+ uint64_t ea = DSFormEA(this, instr, ds);
-+ switch (xo) {
-+ case 0:
-+ // ld
-+ setRegister(rt, readDW(ea, instr));
-+ break;
-+ case 1: {
-+ // ldu
-+ setRegister(rt, readDW(ea, instr));
-+ setRegister(instr->raValue(), ea);
-+ break;
-+ }
-+ case 2:
-+ // lwa (load word algebraic, sign-extended to 64)
-+ setRegister(rt, (int64_t)readW(ea, instr));
-+ break;
-+ default:
-+ MOZ_CRASH_UNSAFE_PRINTF("decodeDSForm: opcode 58, xo=%u", xo);
-+ }
-+ } else if (opcode == 62) {
-+ // For std/stdu, EA uses RA directly (no RA|0 rule).
-+ uint64_t ea;
-+ if (xo == 1) {
-+ // stdu: update form
-+ ea = getRegister(instr->raValue()) + (int64_t)ds;
-+ } else {
-+ ea = DSFormEA(this, instr, ds);
-+ }
-+ switch (xo) {
-+ case 0:
-+ // std
-+ writeDW(ea, getRegister(rt), instr);
-+ break;
-+ case 1:
-+ // stdu
-+ writeDW(ea, getRegister(rt), instr);
-+ setRegister(instr->raValue(), ea);
-+ break;
-+ default:
-+ MOZ_CRASH_UNSAFE_PRINTF("decodeDSForm: opcode 62, xo=%u", xo);
-+ }
-+ } else {
-+ MOZ_CRASH_UNSAFE_PRINTF("decodeDSForm: unhandled opcode %u", opcode);
-+ }
-+}
-+
-+// -----------------------------------------------------------------------------
-+// decodeXForm: Major opcode 31 (X-form, XO-form, etc.)
-+// This is the largest decoder covering most ALU, indexed load/store, SPR,
-+// and atomic instructions.
-+
-+void Simulator::decodeXForm(SimInstruction* instr) {
-+ uint32_t xo = instr->xoValue();
-+ uint32_t rt = instr->rtValue();
-+ uint32_t ra = instr->raValue();
-+ uint32_t rb = instr->rbValue();
-+ bool rc = instr->rcBit();
-+
-+ // Many instructions share major opcode 31. Switch on extended opcode.
-+ // For XO-form with OE=1, the xoValue() includes bit 10, so
-+ // addo (266 | 512 = 778) etc. are separate cases.
-+
-+ // First check for isel which uses bits 1-5 = 15 (XO = 15 in bits 1..5).
-+ if ((xo & 0x1F) == 15) {
-+ // isel: if CR[BC] then RT=RA else RT=RB
-+ // BC is in bits 6..10 (the rc field position).
-+ uint32_t bc = instr->rcValue();
-+ uint32_t crField = bc / 4;
-+ uint32_t crBit = bc % 4;
-+ uint8_t crFieldVal = getCRField(crField);
-+ // PPC CR field bits: bit3=LT(8), bit2=GT(4), bit1=EQ(2), bit0=SO(1)
-+ // Bit numbering within field: 0=LT, 1=GT, 2=EQ, 3=SO
-+ bool bitSet;
-+ switch (crBit) {
-+ case 0: bitSet = (crFieldVal & kCRFieldLT) != 0; break;
-+ case 1: bitSet = (crFieldVal & kCRFieldGT) != 0; break;
-+ case 2: bitSet = (crFieldVal & kCRFieldEQ) != 0; break;
-+ case 3: bitSet = (crFieldVal & kCRFieldSO) != 0; break;
-+ default: bitSet = false; break;
-+ }
-+ int64_t raVal = (ra == 0) ? 0 : getRegister(ra);
-+ int64_t rbVal = getRegister(rb);
-+ setRegister(rt, bitSet ? raVal : rbVal);
-+ return;
-+ }
-+
-+ switch (xo) {
-+ // --- Arithmetic ---
-+ case 266: {
-+ // add
-+ int64_t result = getRegister(ra) + getRegister(rb);
-+ setRegister(rt, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 778: {
-+ // addo
-+ int64_t ra_val = getRegister(ra);
-+ int64_t rb_val = getRegister(rb);
-+ int64_t result = ra_val + rb_val;
-+ setRegister(rt, result);
-+ // Overflow if signs of inputs are same but result sign differs.
-+ bool ov = ((ra_val ^ result) & (rb_val ^ result)) < 0;
-+ setXEROV(ov);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 10: {
-+ // addc
-+ uint64_t ra_val = U64(getRegister(ra));
-+ uint64_t rb_val = U64(getRegister(rb));
-+ uint64_t result = ra_val + rb_val;
-+ setRegister(rt, I64(result));
-+ setXERCA(result < ra_val);
-+ if (rc) updateCR0(I64(result));
-+ break;
-+ }
-+ case 138: {
-+ // adde: RT = RA + RB + CA
-+ uint64_t ra_val = U64(getRegister(ra));
-+ uint64_t rb_val = U64(getRegister(rb));
-+ uint64_t ca = getXERCA() ? 1ULL : 0ULL;
-+ uint64_t result = ra_val + rb_val + ca;
-+ setRegister(rt, I64(result));
-+ // Carry-out: when ca==0, only the ra+rb wrap matters; when ca==1,
-+ // an additional wrap occurs iff result <= ra_val.
-+ bool newCA = ca ? (result <= ra_val) : (result < ra_val);
-+ setXERCA(newCA);
-+ if (rc) updateCR0(I64(result));
-+ break;
-+ }
-+ case 234: {
-+ // addme: RT = RA + CA - 1
-+ uint64_t ra_val = U64(getRegister(ra));
-+ uint64_t ca = getXERCA() ? 1ULL : 0ULL;
-+ uint64_t result = ra_val + ca + ~0ULL; // + CA + (-1)
-+ setRegister(rt, I64(result));
-+ // CA if carry out of (RA + CA + 0xFFFFFFFFFFFFFFFF)
-+ bool newCA = (ra_val != 0) || (ca != 0);
-+ setXERCA(newCA);
-+ if (rc) updateCR0(I64(result));
-+ break;
-+ }
-+ case 202: {
-+ // addze: RT = RA + CA
-+ uint64_t ra_val = U64(getRegister(ra));
-+ uint64_t ca = getXERCA() ? 1ULL : 0ULL;
-+ uint64_t result = ra_val + ca;
-+ setRegister(rt, I64(result));
-+ setXERCA(result < ra_val);
-+ if (rc) updateCR0(I64(result));
-+ break;
-+ }
-+ case 40: {
-+ // subf: RT = RB - RA
-+ int64_t result = getRegister(rb) - getRegister(ra);
-+ setRegister(rt, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 552: {
-+ // subfo: RT = RB - RA, set OV
-+ int64_t ra_val = getRegister(ra);
-+ int64_t rb_val = getRegister(rb);
-+ int64_t result = rb_val - ra_val;
-+ setRegister(rt, result);
-+ bool ov = ((rb_val ^ ra_val) & (rb_val ^ result)) < 0;
-+ setXEROV(ov);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 8: {
-+ // subfc: RT = ~RA + RB + 1
-+ uint64_t ra_val = U64(getRegister(ra));
-+ uint64_t rb_val = U64(getRegister(rb));
-+ uint64_t result = ~ra_val + rb_val + 1;
-+ setRegister(rt, I64(result));
-+ // CA = no borrow = (RB >= RA unsigned)
-+ setXERCA(rb_val >= ra_val);
-+ if (rc) updateCR0(I64(result));
-+ break;
-+ }
-+ case 136: {
-+ // subfe: RT = ~RA + RB + CA
-+ uint64_t ra_val = U64(getRegister(ra));
-+ uint64_t rb_val = U64(getRegister(rb));
-+ uint64_t ca = getXERCA() ? 1ULL : 0ULL;
-+ uint64_t result = ~ra_val + rb_val + ca;
-+ setRegister(rt, I64(result));
-+ __uint128_t wide = (__uint128_t)(~ra_val) + (__uint128_t)rb_val + ca;
-+ setXERCA((wide >> 64) != 0);
-+ if (rc) updateCR0(I64(result));
-+ break;
-+ }
-+ case 232: {
-+ // subfze: RT = ~RA + CA
-+ uint64_t ra_val = U64(getRegister(ra));
-+ uint64_t ca = getXERCA() ? 1ULL : 0ULL;
-+ uint64_t result = ~ra_val + ca;
-+ setRegister(rt, I64(result));
-+ setXERCA(ca > ra_val); // CA if ~RA + CA overflows
-+ if (rc) updateCR0(I64(result));
-+ break;
-+ }
-+ case 104: {
-+ // neg: RT = -RA
-+ int64_t result = -getRegister(ra);
-+ setRegister(rt, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+
-+ // --- Multiply ---
-+ case 233: {
-+ // mulld: RT = RA * RB (low 64 bits)
-+ int64_t result = getRegister(ra) * getRegister(rb);
-+ setRegister(rt, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 745: {
-+ // mulldo: RT = RA * RB, set OV
-+ int64_t ra_val = getRegister(ra);
-+ int64_t rb_val = getRegister(rb);
-+ int64_t result = ra_val * rb_val;
-+ setRegister(rt, result);
-+ // OV if high part of full 128-bit product is not all-sign.
-+ int64_t hi = MultiplyHighSigned(ra_val, rb_val);
-+ bool ov = (hi != (result >> 63));
-+ setXEROV(ov);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 235: {
-+ // mullw: RT = sign_ext(RA[32:63] * RB[32:63])
-+ int64_t result = (int64_t)I32(getRegister(ra)) *
-+ (int64_t)I32(getRegister(rb));
-+ setRegister(rt, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 747: {
-+ // mullwo
-+ int64_t ra_val = I32(getRegister(ra));
-+ int64_t rb_val = I32(getRegister(rb));
-+ int64_t result = ra_val * rb_val;
-+ setRegister(rt, result);
-+ bool ov = (result != (int64_t)I32(result));
-+ setXEROV(ov);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 73: {
-+ // mulhd: RT = high 64 bits of RA * RB (signed)
-+ setRegister(rt, MultiplyHighSigned(getRegister(ra), getRegister(rb)));
-+ if (rc) updateCR0(getRegister(rt));
-+ break;
-+ }
-+ case 9: {
-+ // mulhdu: RT = high 64 bits of RA * RB (unsigned)
-+ setRegister(rt, I64(MultiplyHighUnsigned(U64(getRegister(ra)),
-+ U64(getRegister(rb)))));
-+ if (rc) updateCR0(getRegister(rt));
-+ break;
-+ }
-+ case 75: {
-+ // mulhw: RT = high 32 bits of (RA[32:63] * RB[32:63]), signed
-+ int64_t result =
-+ (int64_t)I32(getRegister(ra)) * (int64_t)I32(getRegister(rb));
-+ setRegister(rt, result >> 32);
-+ if (rc) updateCR0(getRegister(rt));
-+ break;
-+ }
-+ case 11: {
-+ // mulhwu: RT = high 32 bits, unsigned
-+ uint64_t result =
-+ (uint64_t)U32(getRegister(ra)) * (uint64_t)U32(getRegister(rb));
-+ setRegister(rt, I64(result >> 32));
-+ if (rc) updateCR0(getRegister(rt));
-+ break;
-+ }
-+
-+ // --- Divide ---
-+ case 489: {
-+ // divd: RT = RA / RB (signed, 64-bit)
-+ int64_t ra_val = getRegister(ra);
-+ int64_t rb_val = getRegister(rb);
-+ if (rb_val == 0 || (ra_val == INT64_MIN && rb_val == -1)) {
-+ setRegister(rt, 0);
-+ } else {
-+ setRegister(rt, ra_val / rb_val);
-+ }
-+ if (rc) updateCR0(getRegister(rt));
-+ break;
-+ }
-+ case 1001: {
-+ // divdo
-+ int64_t ra_val = getRegister(ra);
-+ int64_t rb_val = getRegister(rb);
-+ bool ov = (rb_val == 0) || (ra_val == INT64_MIN && rb_val == -1);
-+ if (ov) {
-+ setRegister(rt, 0);
-+ } else {
-+ setRegister(rt, ra_val / rb_val);
-+ }
-+ setXEROV(ov);
-+ if (rc) updateCR0(getRegister(rt));
-+ break;
-+ }
-+ case 457: {
-+ // divdu: unsigned 64-bit divide
-+ uint64_t ra_val = U64(getRegister(ra));
-+ uint64_t rb_val = U64(getRegister(rb));
-+ if (rb_val == 0) {
-+ setRegister(rt, 0);
-+ } else {
-+ setRegister(rt, I64(ra_val / rb_val));
-+ }
-+ if (rc) updateCR0(getRegister(rt));
-+ break;
-+ }
-+ case 969: {
-+ // divduo
-+ uint64_t ra_val = U64(getRegister(ra));
-+ uint64_t rb_val = U64(getRegister(rb));
-+ bool ov = (rb_val == 0);
-+ if (ov) {
-+ setRegister(rt, 0);
-+ } else {
-+ setRegister(rt, I64(ra_val / rb_val));
-+ }
-+ setXEROV(ov);
-+ if (rc) updateCR0(getRegister(rt));
-+ break;
-+ }
-+ case 491: {
-+ // divw: signed 32-bit divide
-+ int32_t ra_val = I32(getRegister(ra));
-+ int32_t rb_val = I32(getRegister(rb));
-+ if (rb_val == 0 || (ra_val == INT32_MIN && rb_val == -1)) {
-+ setRegister(rt, 0);
-+ } else {
-+ setRegister(rt, (int64_t)(ra_val / rb_val));
-+ }
-+ if (rc) updateCR0(getRegister(rt));
-+ break;
-+ }
-+ case 1003: {
-+ // divwo
-+ int32_t ra_val = I32(getRegister(ra));
-+ int32_t rb_val = I32(getRegister(rb));
-+ bool ov = (rb_val == 0) || (ra_val == INT32_MIN && rb_val == -1);
-+ if (ov) {
-+ setRegister(rt, 0);
-+ } else {
-+ setRegister(rt, (int64_t)(ra_val / rb_val));
-+ }
-+ setXEROV(ov);
-+ if (rc) updateCR0(getRegister(rt));
-+ break;
-+ }
-+ case 459: {
-+ // divwu: unsigned 32-bit divide
-+ uint32_t ra_val = U32(getRegister(ra));
-+ uint32_t rb_val = U32(getRegister(rb));
-+ if (rb_val == 0) {
-+ setRegister(rt, 0);
-+ } else {
-+ setRegister(rt, (int64_t)(ra_val / rb_val));
-+ }
-+ if (rc) updateCR0(getRegister(rt));
-+ break;
-+ }
-+ case 971: {
-+ // divwuo
-+ uint32_t ra_val = U32(getRegister(ra));
-+ uint32_t rb_val = U32(getRegister(rb));
-+ bool ov = (rb_val == 0);
-+ if (ov) {
-+ setRegister(rt, 0);
-+ } else {
-+ setRegister(rt, (int64_t)(ra_val / rb_val));
-+ }
-+ setXEROV(ov);
-+ if (rc) updateCR0(getRegister(rt));
-+ break;
-+ }
-+
-+ // --- POWER9 modulo (ISA 3.0) ---
-+ // Result of "undefined" division (rb_val == 0, or signed INT_MIN / -1)
-+ // is implementation-defined per Power ISA; matching the divX behaviour
-+ // above, we yield 0 in those cases. Rc has no encoding for these ops.
-+ case 779: {
-+ // modsw: RT = RA % RB (signed, 32-bit)
-+ int32_t ra_val = I32(getRegister(ra));
-+ int32_t rb_val = I32(getRegister(rb));
-+ if (rb_val == 0 || (ra_val == INT32_MIN && rb_val == -1)) {
-+ setRegister(rt, 0);
-+ } else {
-+ setRegister(rt, (int64_t)(ra_val % rb_val));
-+ }
-+ break;
-+ }
-+ case 267: {
-+ // moduw: RT = RA % RB (unsigned, 32-bit)
-+ uint32_t ra_val = U32(getRegister(ra));
-+ uint32_t rb_val = U32(getRegister(rb));
-+ if (rb_val == 0) {
-+ setRegister(rt, 0);
-+ } else {
-+ setRegister(rt, (int64_t)(ra_val % rb_val));
-+ }
-+ break;
-+ }
-+ case 777: {
-+ // modsd: RT = RA % RB (signed, 64-bit)
-+ int64_t ra_val = getRegister(ra);
-+ int64_t rb_val = getRegister(rb);
-+ if (rb_val == 0 || (ra_val == INT64_MIN && rb_val == -1)) {
-+ setRegister(rt, 0);
-+ } else {
-+ setRegister(rt, ra_val % rb_val);
-+ }
-+ break;
-+ }
-+ case 265: {
-+ // modud: RT = RA % RB (unsigned, 64-bit)
-+ uint64_t ra_val = U64(getRegister(ra));
-+ uint64_t rb_val = U64(getRegister(rb));
-+ if (rb_val == 0) {
-+ setRegister(rt, 0);
-+ } else {
-+ setRegister(rt, I64(ra_val % rb_val));
-+ }
-+ break;
-+ }
-+
-+ // --- Logical ---
-+ case 28: {
-+ // and: RA = RS & RB
-+ int64_t result = getRegister(rt) & getRegister(rb);
-+ setRegister(ra, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 60: {
-+ // andc: RA = RS & ~RB
-+ int64_t result = getRegister(rt) & ~getRegister(rb);
-+ setRegister(ra, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 444: {
-+ // or: RA = RS | RB
-+ int64_t result = getRegister(rt) | getRegister(rb);
-+ setRegister(ra, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 412: {
-+ // orc: RA = RS | ~RB
-+ int64_t result = getRegister(rt) | ~getRegister(rb);
-+ setRegister(ra, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 316: {
-+ // xor: RA = RS ^ RB
-+ int64_t result = getRegister(rt) ^ getRegister(rb);
-+ setRegister(ra, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 476: {
-+ // nand: RA = ~(RS & RB)
-+ int64_t result = ~(getRegister(rt) & getRegister(rb));
-+ setRegister(ra, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 124: {
-+ // nor: RA = ~(RS | RB)
-+ int64_t result = ~(getRegister(rt) | getRegister(rb));
-+ setRegister(ra, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 284: {
-+ // eqv: RA = ~(RS ^ RB)
-+ int64_t result = ~(getRegister(rt) ^ getRegister(rb));
-+ setRegister(ra, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+
-+ // --- Shifts ---
-+ case 27: {
-+ // sld: RA = RS << RB[58:63] if RB[57]==0, else RA=0
-+ uint64_t shift = U64(getRegister(rb));
-+ uint64_t rs_val = U64(getRegister(rt));
-+ int64_t result;
-+ if (shift & 0x40) {
-+ result = 0;
-+ } else {
-+ result = I64(rs_val << (shift & 0x3F));
-+ }
-+ setRegister(ra, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 24: {
-+ // slw: RA = RS[32:63] << RB[59:63] if RB[58]==0, else RA=0 (32-bit)
-+ uint32_t shift = U32(getRegister(rb));
-+ uint32_t rs_val = U32(getRegister(rt));
-+ uint32_t result;
-+ if (shift & 0x20) {
-+ result = 0;
-+ } else {
-+ result = rs_val << (shift & 0x1F);
-+ }
-+ setRegister(ra, (int64_t)(int32_t)result);
-+ if (rc) updateCR0(getRegister(ra));
-+ break;
-+ }
-+ case 539: {
-+ // srd: RA = RS >> RB[58:63] if RB[57]==0, else RA=0 (logical)
-+ uint64_t shift = U64(getRegister(rb));
-+ uint64_t rs_val = U64(getRegister(rt));
-+ int64_t result;
-+ if (shift & 0x40) {
-+ result = 0;
-+ } else {
-+ result = I64(rs_val >> (shift & 0x3F));
-+ }
-+ setRegister(ra, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 536: {
-+ // srw: RA = RS[32:63] >> RB[59:63] logical (32-bit)
-+ uint32_t shift = U32(getRegister(rb));
-+ uint32_t rs_val = U32(getRegister(rt));
-+ uint32_t result;
-+ if (shift & 0x20) {
-+ result = 0;
-+ } else {
-+ result = rs_val >> (shift & 0x1F);
-+ }
-+ setRegister(ra, (int64_t)(int32_t)result);
-+ if (rc) updateCR0(getRegister(ra));
-+ break;
-+ }
-+ case 794: {
-+ // srad: RA = RS >> RB[58:63] arithmetic (64-bit), set CA
-+ uint64_t shift = U64(getRegister(rb));
-+ int64_t rs_val = getRegister(rt);
-+ int64_t result;
-+ bool carry;
-+ if (shift & 0x40) {
-+ result = rs_val >> 63; // all sign bits
-+ carry = (rs_val < 0);
-+ } else {
-+ uint32_t sh = shift & 0x3F;
-+ result = rs_val >> sh;
-+ // CA = 1 if RS is negative and any 1-bits were shifted out.
-+ carry = (rs_val < 0) && ((rs_val & ((1ULL << sh) - 1)) != 0);
-+ }
-+ setRegister(ra, result);
-+ setXERCA(carry);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 792: {
-+ // sraw: RA = RS[32:63] >> RB[59:63] arithmetic (32-bit), set CA
-+ uint32_t shift = U32(getRegister(rb));
-+ int32_t rs_val = I32(getRegister(rt));
-+ int32_t result;
-+ bool carry;
-+ if (shift & 0x20) {
-+ result = rs_val >> 31;
-+ carry = (rs_val < 0);
-+ } else {
-+ uint32_t sh = shift & 0x1F;
-+ result = rs_val >> sh;
-+ carry = (rs_val < 0) && ((rs_val & ((1U << sh) - 1)) != 0);
-+ }
-+ setRegister(ra, (int64_t)result);
-+ setXERCA(carry);
-+ if (rc) updateCR0(getRegister(ra));
-+ break;
-+ }
-+ case 826:
-+ case 827: {
-+ // sradi RA, RS, SH: RA = EXTS(RS) >> sh arithmetic (64-bit), set CA.
-+ // XS-form, XO=413 (9-bit, bits 21-29), sh[5] at bit 30, Rc at bit 31.
-+ // Our xoValue() extracts bits 10:1 (10 bits)
-+ // which yields 413*2 + sh[5] = 826 (sh[5]=0) or 827 (sh[5]=1).
-+ // sh[0:4] at instruction bits 15:11 (= raValue field position, but
-+ // for this XS-form they're the SH[0:4] subfield).
-+ uint32_t sh = instr->bits(15, 11) | (instr->bit(1) << 5);
-+ int64_t rs_val = getRegister(rt);
-+ int64_t result = (sh == 0) ? rs_val : (rs_val >> sh);
-+ // CA := rs_val < 0 && any bits shifted out are 1.
-+ bool carry = (rs_val < 0) && sh > 0 &&
-+ ((U64(rs_val) & ((1ULL << sh) - 1)) != 0);
-+ setRegister(ra, result);
-+ setXERCA(carry);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 824: {
-+ // srawi: RA = RS[32:63] >> SH arithmetic (32-bit), set CA
-+ uint32_t sh = instr->bits(15, 11);
-+ int32_t rs_val = I32(getRegister(rt));
-+ int32_t result = rs_val >> sh;
-+ bool carry = (rs_val < 0) && sh > 0 &&
-+ ((U32(rs_val) & ((1U << sh) - 1)) != 0);
-+ setRegister(ra, (int64_t)result);
-+ setXERCA(carry);
-+ if (rc) updateCR0(getRegister(ra));
-+ break;
-+ }
-+
-+ // --- Extend / count ---
-+ case 954: {
-+ // extsb: RA = sign_ext(RS[56:63])
-+ int64_t result = (int64_t)(int8_t)(getRegister(rt) & 0xFF);
-+ setRegister(ra, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 922: {
-+ // extsh: RA = sign_ext(RS[48:63])
-+ int64_t result = (int64_t)(int16_t)(getRegister(rt) & 0xFFFF);
-+ setRegister(ra, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 986: {
-+ // extsw: RA = sign_ext(RS[32:63])
-+ int64_t result = (int64_t)(int32_t)(getRegister(rt) & 0xFFFFFFFF);
-+ setRegister(ra, result);
-+ if (rc) updateCR0(result);
-+ break;
-+ }
-+ case 58: {
-+ // cntlzd: RA = count leading zeros of RS (64-bit)
-+ setRegister(ra, CountLeadingZeros64(U64(getRegister(rt))));
-+ if (rc) updateCR0(getRegister(ra));
-+ break;
-+ }
-+ case 26: {
-+ // cntlzw: RA = count leading zeros of RS[32:63] (32-bit)
-+ setRegister(ra, CountLeadingZeros32(U32(getRegister(rt))));
-+ if (rc) updateCR0(getRegister(ra));
-+ break;
-+ }
-+ case 570: {
-+ // cnttzd
-+ setRegister(ra, CountTrailingZeros64(U64(getRegister(rt))));
-+ if (rc) updateCR0(getRegister(ra));
-+ break;
-+ }
-+ case 538: {
-+ // cnttzw
-+ setRegister(ra, CountTrailingZeros32(U32(getRegister(rt))));
-+ if (rc) updateCR0(getRegister(ra));
-+ break;
-+ }
-+ case 506: {
-+ // popcntd
-+ setRegister(ra, PopCount64(U64(getRegister(rt))));
-+ break;
-+ }
-+ case 378: {
-+ // popcntw: popcount each 32-bit half independently, sum in each half
-+ uint64_t val = U64(getRegister(rt));
-+ uint32_t lo = PopCount32(U32(val));
-+ uint32_t hi = PopCount32(U32(val >> 32));
-+ setRegister(ra, I64(((uint64_t)hi << 32) | lo));
-+ break;
-+ }
-+ case 122: {
-+ // popcntb: popcount each byte independently
-+ setRegister(ra, I64(PopCountPerByte(U64(getRegister(rt)))));
-+ break;
-+ }
-+ case 187: {
-+ // brd (POWER10): RA = byte-reverse(RS) full 64-bit doubleword.
-+ setRegister(ra, I64(__builtin_bswap64(U64(getRegister(rt)))));
-+ break;
-+ }
-+ case 219: {
-+ // brh (POWER10): byte-reverse each of the 4 halfwords in RS.
-+ uint64_t v = U64(getRegister(rt));
-+ uint64_t out = ((v & 0xFF00FF00FF00FF00ULL) >> 8) |
-+ ((v & 0x00FF00FF00FF00FFULL) << 8);
-+ setRegister(ra, I64(out));
-+ break;
-+ }
-+ case 155: {
-+ // brw (POWER10): byte-reverse each of the 2 words in RS.
-+ uint64_t v = U64(getRegister(rt));
-+ uint64_t out = ((uint64_t)__builtin_bswap32((uint32_t)(v >> 32)) << 32) |
-+ (uint64_t)__builtin_bswap32((uint32_t)v);
-+ setRegister(ra, I64(out));
-+ break;
-+ }
-+
-+ // --- Compare (X-form) ---
-+ case 0: {
-+ // cmp (cmpw/cmpd): signed compare
-+ uint32_t bf = instr->bfValue();
-+ bool l = instr->lBit();
-+ if (l) {
-+ setCRFieldCmp(bf, getRegister(ra), getRegister(rb));
-+ } else {
-+ setCRFieldCmp(bf, (int64_t)I32(getRegister(ra)),
-+ (int64_t)I32(getRegister(rb)));
-+ }
-+ break;
-+ }
-+ case 32: {
-+ // cmpl (cmplw/cmpld): unsigned compare
-+ uint32_t bf = instr->bfValue();
-+ bool l = instr->lBit();
-+ if (l) {
-+ setCRFieldCmpU(bf, U64(getRegister(ra)), U64(getRegister(rb)));
-+ } else {
-+ setCRFieldCmpU(bf, (uint64_t)U32(getRegister(ra)),
-+ (uint64_t)U32(getRegister(rb)));
-+ }
-+ break;
-+ }
-+
-+ // --- Trap ---
-+ case 4: {
-+ // tw: Trap Word. The JIT uses this for debugging / tagging.
-+ // In the simulator we just treat it as a NOP (the JIT uses tagged
-+ // trap words that are never actually reached during normal execution,
-+ // they serve as metadata for the patcher).
-+ break;
-+ }
-+
-+ // --- SPR ---
-+ case 339: {
-+ // mfspr: RT = SPR
-+ // SPR encoding: spr[4:0] at bits 16..20, spr[9:5] at bits 11..15
-+ uint32_t spr_lo = instr->raValue(); // bits 16..20
-+ uint32_t spr_hi = instr->rbValue(); // bits 11..15
-+ uint32_t spr = (spr_lo) | (spr_hi << 5);
-+ switch (spr) {
-+ case 8: // LR
-+ setRegister(rt, getLR());
-+ break;
-+ case 9: // CTR
-+ setRegister(rt, getCTR());
-+ break;
-+ case 1: // XER
-+ setRegister(rt, I64(getXER()));
-+ break;
-+ default:
-+ MOZ_CRASH_UNSAFE_PRINTF("mfspr: unhandled SPR %u", spr);
-+ }
-+ break;
-+ }
-+ case 467: {
-+ // mtspr: SPR = RS
-+ uint32_t spr_lo = instr->raValue();
-+ uint32_t spr_hi = instr->rbValue();
-+ uint32_t spr = (spr_lo) | (spr_hi << 5);
-+ int64_t val = getRegister(rt);
-+ switch (spr) {
-+ case 8: // LR
-+ setLR(val);
-+ break;
-+ case 9: // CTR
-+ setCTR(val);
-+ break;
-+ case 1: // XER
-+ setXER(U64(val));
-+ break;
-+ default:
-+ MOZ_CRASH_UNSAFE_PRINTF("mtspr: unhandled SPR %u", spr);
-+ }
-+ break;
-+ }
-+ case 19: {
-+ // mfocrf: read one CR field selected by the FXM bitmask into RT.
-+ // (Plain mfcr shares this XO with FXM=0; we model both by reading
-+ // the full CR — the JIT only emits mfocrf and the bits outside the
-+ // selected field are spec'd "undefined", so reading the full CR is
-+ // a valid implementation.)
-+ setRegister(rt, (int64_t)getCR());
-+ break;
-+ }
-+ case 144: {
-+ // mtcrf: move to CR fields
-+ // FXM field is in bits 12..19.
-+ uint32_t fxm = instr->bits(19, 12);
-+ uint32_t rs_val = U32(getRegister(rt));
-+ uint32_t cr = getCR();
-+ for (int i = 0; i < 8; i++) {
-+ if (fxm & (0x80 >> i)) {
-+ uint32_t shift = 4 * (7 - i);
-+ cr = (cr & ~(0xFu << shift)) | (rs_val & (0xFu << shift));
-+ }
-+ }
-+ setCR(cr);
-+ break;
-+ }
-+ case 576: {
-+ // mcrxrx: move XER[OV,OV32,CA,CA32] to CR field BF
-+ uint32_t bf = instr->bfValue();
-+ uint8_t field = 0;
-+ if (getXEROV()) field |= 0x8;
-+ // OV32 at bit 19 of XER
-+ if ((getXER() >> kXEROV32Bit) & 1) field |= 0x4;
-+ if (getXERCA()) field |= 0x2;
-+ if ((getXER() >> kXERCA32Bit) & 1) field |= 0x1;
-+ setCRField(bf, field);
-+ break;
-+ }
-+ case 384:
-+ case 416: {
-+ // POWER10 setbc/setbcr: RT = (CR[BI]==N) ? 1 : 0
-+ // BI at bits 11..15; xo=384 (setbc, N=1), xo=416 (setbcr, N=0).
-+ uint32_t bi = instr->raValue();
-+ uint32_t crField = bi / 4;
-+ uint32_t crBit = bi % 4;
-+ uint8_t crFieldVal = getCRField(crField);
-+ bool bitSet;
-+ switch (crBit) {
-+ case 0: bitSet = (crFieldVal & kCRFieldLT) != 0; break;
-+ case 1: bitSet = (crFieldVal & kCRFieldGT) != 0; break;
-+ case 2: bitSet = (crFieldVal & kCRFieldEQ) != 0; break;
-+ case 3: bitSet = (crFieldVal & kCRFieldSO) != 0; break;
-+ default: bitSet = false; break;
-+ }
-+ bool want = (xo == 384) ? bitSet : !bitSet;
-+ setRegister(rt, want ? 1 : 0);
-+ break;
-+ }
-+
-+ // --- Indexed loads ---
-+ case 21: {
-+ // ldx: RT = [RA|0 + RB], 8 bytes
-+ uint64_t ea = XFormEA(this, instr);
-+ setRegister(rt, readDW(ea, instr));
-+ break;
-+ }
-+ case 53: {
-+ // ldux: RT = [RA + RB], update RA
-+ uint64_t ea = XFormEAUpdate(this, instr);
-+ setRegister(rt, readDW(ea, instr));
-+ setRegister(ra, ea);
-+ break;
-+ }
-+ case 23: {
-+ // lwzx: RT = zero_ext([RA|0 + RB], 4 bytes)
-+ uint64_t ea = XFormEA(this, instr);
-+ setRegister(rt, U64(readWU(ea, instr)));
-+ break;
-+ }
-+ case 341: {
-+ // lwax: RT = sign_ext([RA|0 + RB], 4 bytes)
-+ uint64_t ea = XFormEA(this, instr);
-+ setRegister(rt, (int64_t)readW(ea, instr));
-+ break;
-+ }
-+ case 87: {
-+ // lbzx
-+ uint64_t ea = XFormEA(this, instr);
-+ setRegister(rt, U64(readBU(ea)));
-+ break;
-+ }
-+ case 279: {
-+ // lhzx
-+ uint64_t ea = XFormEA(this, instr);
-+ setRegister(rt, U64(readHU(ea, instr)));
-+ break;
-+ }
-+ case 343: {
-+ // lhax
-+ uint64_t ea = XFormEA(this, instr);
-+ setRegister(rt, (int64_t)readH(ea, instr));
-+ break;
-+ }
-+ case 535: {
-+ // lfsx: load float single indexed, widen to double (NaN-preserving)
-+ uint64_t ea = XFormEA(this, instr);
-+ if (!handleWasmSegFault(ea, 4)) {
-+ float val = *reinterpret_cast<float*>(ea);
-+ setFpuRegisterDouble(rt, promoteFloatPreservingNaN(val));
-+ }
-+ break;
-+ }
-+ case 599: {
-+ // lfdx: load float double indexed
-+ uint64_t ea = XFormEA(this, instr);
-+ setFpuRegisterDouble(rt, readD(ea, instr));
-+ break;
-+ }
-+ case 855: {
-+ // lfiwax: load float as integer word algebraic
-+ uint64_t ea = XFormEA(this, instr);
-+ int32_t val = readW(ea, instr);
-+ setFpuRegister(rt, (int64_t)val);
-+ break;
-+ }
-+ case 887: {
-+ // lfiwzx: load float as integer word zero
-+ uint64_t ea = XFormEA(this, instr);
-+ uint32_t val = readWU(ea, instr);
-+ setFpuRegister(rt, (int64_t)(uint64_t)val);
-+ break;
-+ }
-+
-+ // --- Indexed stores ---
-+ case 149: {
-+ // stdx
-+ uint64_t ea = XFormEA(this, instr);
-+ writeDW(ea, getRegister(rt), instr);
-+ break;
-+ }
-+ case 151: {
-+ // stwx
-+ uint64_t ea = XFormEA(this, instr);
-+ writeW(ea, I32(getRegister(rt)), instr);
-+ break;
-+ }
-+ case 215: {
-+ // stbx
-+ uint64_t ea = XFormEA(this, instr);
-+ writeB(ea, (uint8_t)(getRegister(rt) & 0xFF));
-+ break;
-+ }
-+ case 407: {
-+ // sthx
-+ uint64_t ea = XFormEA(this, instr);
-+ writeH(ea, U16(getRegister(rt)), instr);
-+ break;
-+ }
-+ case 663: {
-+ // stfsx: store float single indexed (NaN-preserving)
-+ uint64_t ea = XFormEA(this, instr);
-+ if (!handleWasmSegFault(ea, 4)) {
-+ float fval = demoteDoublePreservingNaN(getFpuRegisterDouble(rt));
-+ *reinterpret_cast<float*>(ea) = fval;
-+ LLBit_ = false;
-+ }
-+ break;
-+ }
-+ case 727: {
-+ // stfdx: store float double indexed
-+ uint64_t ea = XFormEA(this, instr);
-+ writeD(ea, getFpuRegisterDouble(rt), instr);
-+ break;
-+ }
-+
-+ // --- Byte-reversed stores ---
-+ case 662: {
-+ // stwbrx
-+ uint64_t ea = XFormEA(this, instr);
-+ uint32_t val = U32(getRegister(rt));
-+ writeW(ea, (int32_t)__builtin_bswap32(val), instr);
-+ break;
-+ }
-+
-+ // --- Atomic load/store ---
-+ //
-+ // Load-reserve and store-conditional. Sub-word variants
-+ // (lbarx/lharx/stbcx./sthcx.) were added in ISA v2.06 (POWER7+).
-+ // Word/doubleword variants (lwarx/stwcx./ldarx/stdcx.) go back
-+ // to the base ISA.
-+ case 52: {
-+ // lbarx RT, RA, RB, EH
-+ uint64_t ea = XFormEA(this, instr);
-+ uint8_t val = loadLinkedB(ea, instr);
-+ setRegister(rt, (int64_t)val);
-+ break;
-+ }
-+ case 116: {
-+ // lharx RT, RA, RB, EH
-+ uint64_t ea = XFormEA(this, instr);
-+ uint16_t val = loadLinkedH(ea, instr);
-+ setRegister(rt, (int64_t)val);
-+ break;
-+ }
-+ case 694: {
-+ // stbcx. RS, RA, RB: always Rc=1.
-+ uint64_t ea = XFormEA(this, instr);
-+ uint8_t val = uint8_t(getRegister(rt));
-+ int result = storeConditionalB(ea, val, instr);
-+ if (result) {
-+ setCRField(0, kCRFieldEQ | (kCRFieldSO * getXERSO()));
-+ } else {
-+ setCRField(0, kCRFieldSO * getXERSO());
-+ }
-+ break;
-+ }
-+ case 726: {
-+ // sthcx. RS, RA, RB: always Rc=1.
-+ uint64_t ea = XFormEA(this, instr);
-+ uint16_t val = uint16_t(getRegister(rt));
-+ int result = storeConditionalH(ea, val, instr);
-+ if (result) {
-+ setCRField(0, kCRFieldEQ | (kCRFieldSO * getXERSO()));
-+ } else {
-+ setCRField(0, kCRFieldSO * getXERSO());
-+ }
-+ break;
-+ }
-+ case 20: {
-+ // lwarx
-+ uint64_t ea = XFormEA(this, instr);
-+ int32_t val = loadLinkedW(ea, instr);
-+ setRegister(rt, (int64_t)val);
-+ break;
-+ }
-+ case 150: {
-+ // stwcx.
-+ uint64_t ea = XFormEA(this, instr);
-+ int32_t val = I32(getRegister(rt));
-+ int result = storeConditionalW(ea, val, instr);
-+ // stwcx. always updates CR0: EQ if store succeeded, else clear.
-+ if (result) {
-+ setCRField(0, kCRFieldEQ | (kCRFieldSO * getXERSO()));
-+ } else {
-+ setCRField(0, kCRFieldSO * getXERSO());
-+ }
-+ break;
-+ }
-+ case 84: {
-+ // ldarx
-+ uint64_t ea = XFormEA(this, instr);
-+ int64_t val = loadLinkedD(ea, instr);
-+ setRegister(rt, val);
-+ break;
-+ }
-+ case 214: {
-+ // stdcx.
-+ uint64_t ea = XFormEA(this, instr);
-+ int64_t val = getRegister(rt);
-+ int result = storeConditionalD(ea, val, instr);
-+ if (result) {
-+ setCRField(0, kCRFieldEQ | (kCRFieldSO * getXERSO()));
-+ } else {
-+ setCRField(0, kCRFieldSO * getXERSO());
-+ }
-+ break;
-+ }
-+
-+ // --- Synchronization ---
-+ case 598:
-+ // sync / lwsync / ptesync: no-op in simulator
-+ break;
-+ case 854:
-+ // eieio: no-op in simulator
-+ break;
-+
-+ // --- GPR <-> VSR move (major opcode 31, XX1-form) ---
-+ //
-+ // Two sub-encodings:
-+ // mtvsr* XT,RA{,RB}: XX1Form — XT at bits 25:21 (5) + TX at bit 0 (1);
-+ // RA at bits 20:16; RB (if any) at bits 15:11.
-+ // mfvsr* RA,XS: XX1FormMfvsr — XS at bits 25:21 (5) + SX at bit 0 (1);
-+ // RA (GPR dest) at bits 20:16.
-+ //
-+ // The original decoder treated "rsValue()" (bits 25:21 = VSR field) as a
-+ // GPR index — doubly wrong: the GPR side lives at bits 20:16 (= raValue())
-+ // and the VSR side is 6 bits (5-bit field + extension bit at bit 0). Fixed
-+ // here and extended for the full VSR namespace (0-63).
-+ // The ISA names each field in BE. "XT.DW0" is the BE doubleword which on
-+ // PPC64LE register storage lives at LE bytes 8-15 (our bytes[] is LE-natural:
-+ // bytes[0] = lowest address). With `mtvsrd / mfvsrd / mtvsrdd / mfvsrld
-+ // / stxvx`: mtvsrd of 0x1122334455667788 produces `00 00 00 00 00 00 00 00
-+ // 88 77 66 55 44 33 22 11` in memory (LE bytes 8-15 hold the GPR bits with
-+ // LSB at byte 8). Matching semantics here means the sim respects
-+ // the full Power ISA, not a self-consistent LE-reversed
-+ // convention.
-+ case 51: {
-+ // mfvsrd RA, XS: GPR[RA] = XS.DW0 = LE bytes 8..15.
-+ int xs = int(instr->rtValue() | (instr->bit(0) << 5)); // T + SX(TX)
-+ uint8_t bytes[16];
-+ getVSR128(xs, bytes);
-+ int64_t val;
-+ memcpy(&val, bytes + 8, 8);
-+ setRegister(instr->raValue(), val);
-+ break;
-+ }
-+ case 211: {
-+ // mtvsrwa XT, RA: XT.DW0 = sign_ext_64(RA[32:63]); XT.DW1 = 0.
-+ // POWER8+ (ISA 2.07). Combines extsw + mtvsrd. LE layout: bytes
-+ // 8-15 ← sign-extended low 32 of RA; bytes 0-7 ← 0.
-+ int xt = int(instr->rtValue() | (instr->bit(0) << 5));
-+ uint8_t bytes[16];
-+ int64_t val = (int64_t)(int32_t)getRegister(instr->raValue());
-+ memset(bytes, 0, 8);
-+ memcpy(bytes + 8, &val, 8);
-+ setVSR128(xt, bytes);
-+ break;
-+ }
-+ case 179: {
-+ // mtvsrd XT, RA: XT.DW0 = RA; XT.DW1 = 0.
-+ // LE layout: bytes 8-15 ← RA, bytes 0-7 ← 0.
-+ int xt = int(instr->rtValue() | (instr->bit(0) << 5));
-+ uint8_t bytes[16];
-+ int64_t val = getRegister(instr->raValue());
-+ memset(bytes, 0, 8);
-+ memcpy(bytes + 8, &val, 8);
-+ setVSR128(xt, bytes);
-+ break;
-+ }
-+ case 243: {
-+ // mtvsrwz XT, RA: XT.DW0 = zero_ext(RA[32:63]); XT.DW1 = 0.
-+ // The 32-bit value lives in the low 32 bits of DW0 = BE word 1,
-+ // which on LE storage is LE bytes 8..11 (LE word 2); LE bytes
-+ // 12..15 = 0 (upper half of DW0 = BE word 0 = zero-extended).
-+ int xt = int(instr->rtValue() | (instr->bit(0) << 5));
-+ uint8_t bytes[16];
-+ uint32_t lo = U32(getRegister(instr->raValue()));
-+ memset(bytes, 0, 16);
-+ bytes[8] = (uint8_t)(lo);
-+ bytes[9] = (uint8_t)(lo >> 8);
-+ bytes[10] = (uint8_t)(lo >> 16);
-+ bytes[11] = (uint8_t)(lo >> 24);
-+ setVSR128(xt, bytes);
-+ break;
-+ }
-+ case 307: {
-+ // mfvsrld RA, XS: GPR[RA] = XS.DW1 = LE bytes 0..7.
-+ // POWER9.
-+ int xs = int(instr->rtValue() | (instr->bit(0) << 5));
-+ uint8_t bytes[16];
-+ getVSR128(xs, bytes);
-+ int64_t val;
-+ memcpy(&val, bytes, 8);
-+ setRegister(instr->raValue(), val);
-+ break;
-+ }
-+ case 403: {
-+ // mtvsrws XT, RA (POWER9): splat low 32 bits of RA into all four
-+ // word elements of XT. The same 32-bit value appears in lanes 0..3,
-+ // so the byte layout is identical in LE and BE —
-+ // bytes 0..15 = lo | lo | lo | lo.
-+ int xt = int(instr->rtValue() | (instr->bit(0) << 5));
-+ uint8_t bytes[16];
-+ uint32_t lo = U32(getRegister(instr->raValue()));
-+ uint64_t val = ((uint64_t)lo << 32) | lo;
-+ memcpy(bytes, &val, 8);
-+ memcpy(bytes + 8, &val, 8);
-+ setVSR128(xt, bytes);
-+ break;
-+ }
-+ case 435: {
-+ // mtvsrdd XT, RA, RB: XT.DW0 = RA; XT.DW1 = RB. POWER9.
-+ // LE: bytes 8-15 ← RA, bytes 0-7 ← RB.
-+ int xt = int(instr->rtValue() | (instr->bit(0) << 5));
-+ uint8_t bytes[16];
-+ int64_t dw0 = getRegister(instr->raValue());
-+ int64_t dw1 = getRegister(instr->rbValue());
-+ memcpy(bytes, &dw1, 8);
-+ memcpy(bytes + 8, &dw0, 8);
-+ setVSR128(xt, bytes);
-+ break;
-+ }
-+
-+ // --- VMX vector memory (major opcode 31) ---
-+ //
-+ // lvx / stvx / lvxl / stvxl.
-+ // EA = (RA|0) + RB; EA = EA & ~0xF (alignment)
-+ // lvx: VRT[0:127] <- MEM(EA, 16) bytes[0] = *(EA+0)
-+ // stvx: MEM(EA, 16) <- VRS[0:127] *(EA+0) = bytes[0]
-+ // lvxl / stvxl are identical in effect to lvx / stvx (the "l" form
-+ // hints "least recently used"; semantically indistinguishable).
-+ case 103: {
-+ // lvx: VRT = MEM(EA & ~0xF, 16 bytes)
-+ uint64_t ea = XFormEA(this, instr) & ~uint64_t(0xF);
-+ if (handleWasmSegFault(ea, 16)) break;
-+ memcpy(VRregisters_[rt], reinterpret_cast<const void*>(ea), 16);
-+ break;
-+ }
-+ case 231: {
-+ // stvx: MEM(EA & ~0xF, 16 bytes) = VRS
-+ uint64_t ea = XFormEA(this, instr) & ~uint64_t(0xF);
-+ if (handleWasmSegFault(ea, 16)) break;
-+ memcpy(reinterpret_cast<void*>(ea), VRregisters_[rt], 16);
-+ break;
-+ }
-+ case 359: {
-+ // lvxl: semantically identical to lvx
-+ uint64_t ea = XFormEA(this, instr) & ~uint64_t(0xF);
-+ if (handleWasmSegFault(ea, 16)) break;
-+ memcpy(VRregisters_[rt], reinterpret_cast<const void*>(ea), 16);
-+ break;
-+ }
-+ case 487: {
-+ // stvxl: semantically identical to stvx
-+ uint64_t ea = XFormEA(this, instr) & ~uint64_t(0xF);
-+ if (handleWasmSegFault(ea, 16)) break;
-+ memcpy(reinterpret_cast<void*>(ea), VRregisters_[rt], 16);
-+ break;
-+ }
-+
-+ // --- VSX vector memory indexed (major opcode 31) ---
-+ //
-+ // These ops take a 6-bit VSR register,
-+ // encoded as 5-bit T/S + 1-bit TX/SX extension at instruction LSB
-+ // bit 0 (= our instr->bit(0)). EA = (RA|0) + RB. 16-byte access,
-+ // not forced-aligned (hardware may handle misaligned via sub-access
-+ // or alignment interrupt per impl).
-+ //
-+ // Byte-order note: lxvx/stxvx perform a natural 16-byte LE
-+ // memcpy. lxvd2x/stxvd2x on real PPC64 LE hardware load/store
-+ // doublewords in BE-pair order — i.e. lxvd2x places memory bytes
-+ // 0-7 in the register's BE-DW0 (= LE bytes 8-15) and bytes 8-15
-+ // in BE-DW1 (= LE bytes 0-7). The JIT brackets every wasm SIMD
-+ // load/store with a compensating xxpermdi DM=2 so the net effect
-+ // is a natural LE byte order. The constant pool emits the same
-+ // lxvd2x + xxpermdi sequence (per PatchConstantPoolLoad) but
-+ // assumes the hardware semantics, not a plain memcpy. So the sim
-+ // must match real-hardware lxvd2x/stxvd2x semantics including the
-+ // BE-DW byte order — otherwise the post-load xxpermdi unswaps
-+ // bytes that were never swapped, and constant-pool Simd128 loads
-+ // (e.g. shuffle masks) come out with halves transposed.
-+ case 268: {
-+ // lxvx: XT = MEM((RA|0)+RB, 16)
-+ uint64_t ea = XFormEA(this, instr);
-+ if (handleWasmSegFault(ea, 16)) break;
-+ int xt = int(instr->rtValue() | (instr->bit(0) << 5));
-+ uint8_t buf[16];
-+ memcpy(buf, reinterpret_cast<const void*>(ea), 16);
-+ setVSR128(xt, buf);
-+ break;
-+ }
-+ case 396: {
-+ // stxvx: MEM((RA|0)+RB, 16) = XS
-+ uint64_t ea = XFormEA(this, instr);
-+ if (handleWasmSegFault(ea, 16)) break;
-+ int xs = int(instr->rtValue() | (instr->bit(0) << 5));
-+ uint8_t buf[16];
-+ getVSR128(xs, buf);
-+ memcpy(reinterpret_cast<void*>(ea), buf, 16);
-+ break;
-+ }
-+ case 813: {
-+ // lxsihzx XT, RA, RB: P9 (ISA 3.0). Load halfword to VSR & zero,
-+ // indexed. MEM(EA, 2) (LE-natural halfword) is placed in dw[0]
-+ // low 16 bits; the rest of the VSR is zeroed. In sim LE-byte
-+ // storage, that is bytes[8..9] (low byte at bytes[8]).
-+ uint64_t ea = XFormEA(this, instr);
-+ if (handleWasmSegFault(ea, 2)) break;
-+ int xt = int(instr->rtValue() | (instr->bit(0) << 5));
-+ uint16_t halfword = readH(ea, instr);
-+ uint8_t buf[16];
-+ memset(buf, 0, 16);
-+ buf[8] = (uint8_t)(halfword & 0xFF);
-+ buf[9] = (uint8_t)((halfword >> 8) & 0xFF);
-+ setVSR128(xt, buf);
-+ break;
-+ }
-+ case 941: {
-+ // stxsihx XS, RA, RB: P9 (ISA 3.0). Store halfword from VSR,
-+ // indexed. dw[0] low 16 bits (sim bytes[8..9] in host-LE order)
-+ // are written as a halfword at MEM(EA, 2).
-+ uint64_t ea = XFormEA(this, instr);
-+ if (handleWasmSegFault(ea, 2)) break;
-+ int xs = int(instr->rtValue() | (instr->bit(0) << 5));
-+ uint8_t buf[16];
-+ getVSR128(xs, buf);
-+ uint16_t halfword =
-+ (uint16_t)buf[8] | ((uint16_t)buf[9] << 8);
-+ writeH(ea, halfword, instr);
-+ break;
-+ }
-+ case 844: {
-+ // lxvd2x: XT = MEM((RA|0)+RB, 16) with BE-DW byte ordering.
-+ // Memory bytes 0-7 land in BE-DW0 (= LE bytes 8-15); memory
-+ // bytes 8-15 land in BE-DW1 (= LE bytes 0-7).
-+ uint64_t ea = XFormEA(this, instr);
-+ if (handleWasmSegFault(ea, 16)) break;
-+ int xt = int(instr->rtValue() | (instr->bit(0) << 5));
-+ uint8_t mem[16], buf[16];
-+ memcpy(mem, reinterpret_cast<const void*>(ea), 16);
-+ memcpy(buf, mem + 8, 8);
-+ memcpy(buf + 8, mem, 8);
-+ setVSR128(xt, buf);
-+ break;
-+ }
-+ case 972: {
-+ // stxvd2x: MEM((RA|0)+RB, 16) = XS with BE-DW byte ordering.
-+ // Inverse of lxvd2x: register LE bytes 0-7 → memory bytes 8-15;
-+ // LE bytes 8-15 → memory bytes 0-7.
-+ uint64_t ea = XFormEA(this, instr);
-+ if (handleWasmSegFault(ea, 16)) break;
-+ int xs = int(instr->rtValue() | (instr->bit(0) << 5));
-+ uint8_t buf[16], mem[16];
-+ getVSR128(xs, buf);
-+ memcpy(mem, buf + 8, 8);
-+ memcpy(mem + 8, buf, 8);
-+ memcpy(reinterpret_cast<void*>(ea), mem, 16);
-+ break;
-+ }
-+
-+ default:
-+ MOZ_CRASH_UNSAFE_PRINTF(
-+ "decodeXForm: unimplemented XO=%u (instruction 0x%08x)", xo,
-+ instr->instructionBits());
-+ }
-+}
-+
-+// -----------------------------------------------------------------------------
-+// decodeRotateMask: rlwinm(21), rlwnm(23), rlwimi(20),
-+// rldicl(30), rldicr(30), rldic(30), rldimi(30), rldcl(30), rldcr(30)
-+
-+void Simulator::decodeRotateMask(SimInstruction* instr) {
-+ uint32_t opcode = instr->opcode();
-+
-+ if (opcode == 21) {
-+ // rlwinm: RA = ROTL32(RS,SH) & MASK(MB,ME), Rc
-+ uint32_t rs_val = U32(getRegister(instr->rsValue()));
-+ uint32_t sh = instr->mSHValue();
-+ uint32_t mb = instr->mMBValue();
-+ uint32_t me = instr->mMEValue();
-+ uint32_t rotated = RotateLeft32(rs_val, sh);
-+ uint32_t mask = MASK32(mb, me);
-+ int64_t result = (int64_t)(uint64_t)(rotated & mask);
-+ setRegister(instr->raValue(), result);
-+ if (instr->rcBit()) updateCR0(result);
-+ } else if (opcode == 23) {
-+ // rlwnm: RA = ROTL32(RS,RB[27:31]) & MASK(MB,ME), Rc
-+ uint32_t rs_val = U32(getRegister(instr->rsValue()));
-+ uint32_t sh = U32(getRegister(instr->rbValue())) & 0x1F;
-+ uint32_t mb = instr->mMBValue();
-+ uint32_t me = instr->mMEValue();
-+ uint32_t rotated = RotateLeft32(rs_val, sh);
-+ uint32_t mask = MASK32(mb, me);
-+ int64_t result = (int64_t)(uint64_t)(rotated & mask);
-+ setRegister(instr->raValue(), result);
-+ if (instr->rcBit()) updateCR0(result);
-+ } else if (opcode == 20) {
-+ // rlwimi: RA = (ROTL32(RS,SH) & MASK) | (RA & ~MASK), Rc
-+ uint32_t rs_val = U32(getRegister(instr->rsValue()));
-+ uint32_t sh = instr->mSHValue();
-+ uint32_t mb = instr->mMBValue();
-+ uint32_t me = instr->mMEValue();
-+ uint32_t rotated = RotateLeft32(rs_val, sh);
-+ uint32_t mask = MASK32(mb, me);
-+ uint32_t ra_val = U32(getRegister(instr->raValue()));
-+ int64_t result = (int64_t)(uint64_t)((rotated & mask) | (ra_val & ~mask));
-+ setRegister(instr->raValue(), result);
-+ if (instr->rcBit()) updateCR0(result);
-+ } else if (opcode == 30) {
-+ // MD-form / MDS-form: 64-bit rotate/mask
-+ uint32_t rs = instr->rsValue();
-+ uint64_t rs_val = U64(getRegister(rs));
-+ uint32_t ra_reg = instr->raValue();
-+
-+ // Determine which sub-opcode: bits 2..4 for MD-form, bit 4 for MDS.
-+ // MD: bits 2..4
-+ // MDS: bit 4 (rldcl has bit4=0, bit3..2=00 with bit1=1; rldcr has
-+ // bit4=0, bit3..2=01 with bit1=1). Actually:
-+ // rldicl: 30 | MD-XO=0 (bits 2..4 = 000), bit1=0
-+ // rldicr: 30 | MD-XO=1 (bits 2..4 = 001), bit1=0
-+ // rldic: 30 | MD-XO=2 (bits 2..4 = 010), bit1=0
-+ // rldimi: 30 | MD-XO=3 (bits 2..4 = 011), bit1=0
-+ // rldcl: 30 | MDS, bit4=0, bit3..1=000, bit0=Rc => bits 1..4=1000
-+ // Actually rldcl: bits 1..4 = 1000, i.e. bit(4)=1,bit(3)=0,
-+ // bit(2)=0,bit(1)=0
-+ // rldcr: 30 | MDS, bits 1..4 = 1001
-+ //
-+ // Let's check bit 4 first: if bit(4)==1, it's MDS-form (rldcl/rldcr).
-+ if (instr->bit(4)) {
-+ // MDS-form: shift amount from RB register
-+ uint32_t sh = U32(getRegister(instr->rbValue())) & 0x3F;
-+ uint64_t rotated = RotateLeft64(rs_val, sh);
-+ uint32_t mb = instr->mdsMBValue();
-+
-+ if (!instr->bit(1)) {
-+ // rldcl: RA = ROTL64(RS, RB[58:63]) & MASK(mb, 63)
-+ uint64_t mask = MASK64(mb, 63);
-+ int64_t result = I64(rotated & mask);
-+ setRegister(ra_reg, result);
-+ if (instr->rcBit()) updateCR0(result);
-+ } else {
-+ // rldcr: RA = ROTL64(RS, RB[58:63]) & MASK(0, me)
-+ uint32_t me = instr->mdsMBValue();
-+ uint64_t mask = MASK64(0, me);
-+ int64_t result = I64(rotated & mask);
-+ setRegister(ra_reg, result);
-+ if (instr->rcBit()) updateCR0(result);
-+ }
-+ } else {
-+ // MD-form
-+ uint32_t sh = instr->mdSHValue();
-+ uint64_t rotated = RotateLeft64(rs_val, sh);
-+ uint32_t xo_md = instr->bits(3, 2);
-+
-+ switch (xo_md) {
-+ case 0: {
-+ // rldicl: RA = ROTL64(RS, SH) & MASK(mb, 63)
-+ uint32_t mb = instr->mdMBValue();
-+ uint64_t mask = MASK64(mb, 63);
-+ int64_t result = I64(rotated & mask);
-+ setRegister(ra_reg, result);
-+ if (instr->rcBit()) updateCR0(result);
-+ break;
-+ }
-+ case 1: {
-+ // rldicr: RA = ROTL64(RS, SH) & MASK(0, me)
-+ uint32_t me = instr->mdMEValue();
-+ uint64_t mask = MASK64(0, me);
-+ int64_t result = I64(rotated & mask);
-+ setRegister(ra_reg, result);
-+ if (instr->rcBit()) updateCR0(result);
-+ break;
-+ }
-+ case 2: {
-+ // rldic: RA = ROTL64(RS, SH) & MASK(mb, ~SH)
-+ // Actually: MASK(mb, 63-SH)
-+ uint32_t mb = instr->mdMBValue();
-+ uint64_t mask = MASK64(mb, 63 - sh);
-+ int64_t result = I64(rotated & mask);
-+ setRegister(ra_reg, result);
-+ if (instr->rcBit()) updateCR0(result);
-+ break;
-+ }
-+ case 3: {
-+ // rldimi: RA = (ROTL64(RS,SH) & MASK) | (RA & ~MASK)
-+ uint32_t mb = instr->mdMBValue();
-+ uint64_t mask = MASK64(mb, 63 - sh);
-+ uint64_t ra_val = U64(getRegister(ra_reg));
-+ int64_t result = I64((rotated & mask) | (ra_val & ~mask));
-+ setRegister(ra_reg, result);
-+ if (instr->rcBit()) updateCR0(result);
-+ break;
-+ }
-+ default:
-+ MOZ_CRASH_UNSAFE_PRINTF("decodeRotateMask: MD xo=%u", xo_md);
-+ }
-+ }
-+ } else {
-+ MOZ_CRASH_UNSAFE_PRINTF("decodeRotateMask: opcode=%u", opcode);
-+ }
-+}
-+
-+// -----------------------------------------------------------------------------
-+// CR-bit accessors used by the XL-form CR-logic ops (crand, crandc, cror,
-+// crorc, crxor, creqv). Bit index is in BIF*4+x form: field=b/4, bit=b%4
-+// where 0=LT, 1=GT, 2=EQ, 3=SO.
-+static inline uint8_t CRBitMask(uint32_t bitInField) {
-+ switch (bitInField) {
-+ case 0: return kCRFieldLT;
-+ case 1: return kCRFieldGT;
-+ case 2: return kCRFieldEQ;
-+ case 3: return kCRFieldSO;
-+ }
-+ return 0;
-+}
-+
-+static inline bool GetCRBit(Simulator& s, uint32_t b) {
-+ return (s.getCRField(b / 4) & CRBitMask(b % 4)) != 0;
-+}
-+
-+static inline void SetCRBit(Simulator& s, uint32_t b, bool val) {
-+ uint8_t fv = s.getCRField(b / 4);
-+ uint8_t mask = CRBitMask(b % 4);
-+ s.setCRField(b / 4, val ? (fv | mask) : (fv & ~mask));
-+}
-+
-+// -----------------------------------------------------------------------------
-+// decodeBranch: b(18), bc(16), XL-form(19)
-+
-+void Simulator::decodeBranch(SimInstruction* instr) {
-+ uint32_t opcode = instr->opcode();
-+
-+ if (opcode == 18) {
-+ // b / bl: I-form unconditional branch
-+ int32_t offset = instr->li26Value();
-+ bool lk = instr->lkBit();
-+ bool aa = instr->aaBit();
-+
-+ int64_t target;
-+ if (aa) {
-+ target = (int64_t)offset;
-+ } else {
-+ target = get_pc() + (int64_t)offset;
-+ }
-+
-+ if (lk) {
-+ setLR(get_pc() + SimInstruction::kInstrSize);
-+ }
-+
-+ set_pc(target);
-+ return;
-+ }
-+
-+ if (opcode == 16) {
-+ // bc / bcl: B-form conditional branch
-+ uint32_t bo = instr->boValue();
-+ uint32_t bi = instr->biValue();
-+ int32_t bd = instr->bd16Value();
-+ bool lk = instr->lkBit();
-+ bool aa = instr->aaBit();
-+
-+ // Decrement CTR if BO[2] (bit 2 of BO, which is bo & 0x04) is clear.
-+ if (!(bo & 0x04)) {
-+ setCTR(getCTR() - 1);
-+ }
-+
-+ // Evaluate CTR condition.
-+ bool ctr_ok = (bo & 0x04) ||
-+ ((getCTR() != 0) ^ ((bo & 0x02) != 0));
-+
-+ // Evaluate CR condition.
-+ uint32_t crField = bi / 4;
-+ uint32_t crBit = bi % 4;
-+ uint8_t crFieldVal = getCRField(crField);
-+ bool crBitSet;
-+ switch (crBit) {
-+ case 0: crBitSet = (crFieldVal & kCRFieldLT) != 0; break;
-+ case 1: crBitSet = (crFieldVal & kCRFieldGT) != 0; break;
-+ case 2: crBitSet = (crFieldVal & kCRFieldEQ) != 0; break;
-+ case 3: crBitSet = (crFieldVal & kCRFieldSO) != 0; break;
-+ default: crBitSet = false; break;
-+ }
-+ bool cond_ok = (bo & 0x10) || (crBitSet == ((bo & 0x08) != 0));
-+
-+ if (ctr_ok && cond_ok) {
-+ int64_t target;
-+ if (aa) {
-+ target = (int64_t)bd;
-+ } else {
-+ target = get_pc() + (int64_t)bd;
-+ }
-+ if (lk) {
-+ setLR(get_pc() + SimInstruction::kInstrSize);
-+ }
-+ set_pc(target);
-+ } else {
-+ // Branch not taken.
-+ set_pc(get_pc() + SimInstruction::kInstrSize);
-+ }
-+ return;
-+ }
-+
-+ if (opcode == 19) {
-+ // XL-form: bclr, bcctr, crand, crandc, cror, crorc, crxor, creqv,
-+ // mcrf, isync
-+ uint32_t xl = instr->xlValue();
-+
-+ switch (xl) {
-+ case 16: {
-+ // bclr: conditional branch to LR
-+ uint32_t bo = instr->boValue();
-+ uint32_t bi = instr->biValue();
-+ bool lk = instr->lkBit();
-+
-+ if (!(bo & 0x04)) {
-+ setCTR(getCTR() - 1);
-+ }
-+
-+ bool ctr_ok = (bo & 0x04) ||
-+ ((getCTR() != 0) ^ ((bo & 0x02) != 0));
-+
-+ uint32_t crField = bi / 4;
-+ uint32_t crBit = bi % 4;
-+ uint8_t crFieldVal = getCRField(crField);
-+ bool crBitSet;
-+ switch (crBit) {
-+ case 0: crBitSet = (crFieldVal & kCRFieldLT) != 0; break;
-+ case 1: crBitSet = (crFieldVal & kCRFieldGT) != 0; break;
-+ case 2: crBitSet = (crFieldVal & kCRFieldEQ) != 0; break;
-+ case 3: crBitSet = (crFieldVal & kCRFieldSO) != 0; break;
-+ default: crBitSet = false; break;
-+ }
-+ bool cond_ok = (bo & 0x10) || (crBitSet == ((bo & 0x08) != 0));
-+
-+ if (ctr_ok && cond_ok) {
-+ int64_t target = getLR() & ~3LL;
-+ if (lk) {
-+ setLR(get_pc() + SimInstruction::kInstrSize);
-+ }
-+ set_pc(target);
-+ } else {
-+ set_pc(get_pc() + SimInstruction::kInstrSize);
-+ }
-+ break;
-+ }
-+ case 528: {
-+ // bcctr: conditional branch to CTR
-+ uint32_t bo = instr->boValue();
-+ uint32_t bi = instr->biValue();
-+ bool lk = instr->lkBit();
-+
-+ // CTR is not decremented for bcctr.
-+ uint32_t crField = bi / 4;
-+ uint32_t crBit = bi % 4;
-+ uint8_t crFieldVal = getCRField(crField);
-+ bool crBitSet;
-+ switch (crBit) {
-+ case 0: crBitSet = (crFieldVal & kCRFieldLT) != 0; break;
-+ case 1: crBitSet = (crFieldVal & kCRFieldGT) != 0; break;
-+ case 2: crBitSet = (crFieldVal & kCRFieldEQ) != 0; break;
-+ case 3: crBitSet = (crFieldVal & kCRFieldSO) != 0; break;
-+ default: crBitSet = false; break;
-+ }
-+ bool cond_ok = (bo & 0x10) || (crBitSet == ((bo & 0x08) != 0));
-+
-+ if (cond_ok) {
-+ int64_t target = getCTR() & ~3LL;
-+ if (lk) {
-+ setLR(get_pc() + SimInstruction::kInstrSize);
-+ }
-+ set_pc(target);
-+ } else {
-+ set_pc(get_pc() + SimInstruction::kInstrSize);
-+ }
-+ break;
-+ }
-+ case 257: {
-+ // crand: CR[BT] = CR[BA] & CR[BB]
-+ uint32_t bt = instr->rtValue();
-+ uint32_t ba = instr->raValue();
-+ uint32_t bb = instr->rbValue();
-+ SetCRBit(*this, bt, GetCRBit(*this, ba) && GetCRBit(*this, bb));
-+ break;
-+ }
-+ case 129: {
-+ // crandc: CR[BT] = CR[BA] & ~CR[BB]
-+ uint32_t bt = instr->rtValue();
-+ uint32_t ba = instr->raValue();
-+ uint32_t bb = instr->rbValue();
-+ SetCRBit(*this, bt, GetCRBit(*this, ba) && !GetCRBit(*this, bb));
-+ break;
-+ }
-+ case 449: {
-+ // cror: CR[BT] = CR[BA] | CR[BB]
-+ uint32_t bt = instr->rtValue();
-+ uint32_t ba = instr->raValue();
-+ uint32_t bb = instr->rbValue();
-+ SetCRBit(*this, bt, GetCRBit(*this, ba) || GetCRBit(*this, bb));
-+ break;
-+ }
-+ case 417: {
-+ // crorc: CR[BT] = CR[BA] | ~CR[BB]
-+ uint32_t bt = instr->rtValue();
-+ uint32_t ba = instr->raValue();
-+ uint32_t bb = instr->rbValue();
-+ SetCRBit(*this, bt, GetCRBit(*this, ba) || !GetCRBit(*this, bb));
-+ break;
-+ }
-+ case 193: {
-+ // crxor: CR[BT] = CR[BA] ^ CR[BB]
-+ uint32_t bt = instr->rtValue();
-+ uint32_t ba = instr->raValue();
-+ uint32_t bb = instr->rbValue();
-+ SetCRBit(*this, bt, GetCRBit(*this, ba) ^ GetCRBit(*this, bb));
-+ break;
-+ }
-+ case 289: {
-+ // creqv: CR[BT] = ~(CR[BA] ^ CR[BB])
-+ uint32_t bt = instr->rtValue();
-+ uint32_t ba = instr->raValue();
-+ uint32_t bb = instr->rbValue();
-+ SetCRBit(*this, bt, !(GetCRBit(*this, ba) ^ GetCRBit(*this, bb)));
-+ break;
-+ }
-+ case 150: {
-+ // isync: no-op in simulator
-+ break;
-+ }
-+ case 370: {
-+ // PPC_stop (0x4C0002E4) decoded as XL-form opcode 19, XL=370.
-+ // This is our kCallRedirInstr. Handle via softwareInterrupt.
-+ softwareInterrupt(instr);
-+ break;
-+ }
-+ case 2: {
-+ // POWER9 addpcis rT, D (DX-form). Computes rT = (CIA + 4) +
-+ // (sext16(D) << 16). The 16-bit signed displacement D is split
-+ // across three sub-fields:
-+ // d0 = bits LE 6..15 (10 bits) — D[15:6]
-+ // d1 = bits LE 16..20 (5 bits) — D[5:1]
-+ // d2 = bit LE 0 (1 bit) — D[0]
-+ // (Mirrors the encoder in Assembler-ppc64.cpp:as_addpcis.)
-+ uint32_t rt = instr->rtValue();
-+ uint32_t d0 = instr->bits(15, 6);
-+ uint32_t d1 = instr->bits(20, 16);
-+ uint32_t d2 = instr->bit(0);
-+ int16_t D = (int16_t)((d0 << 6) | (d1 << 1) | d2);
-+ int64_t cia = reinterpret_cast<int64_t>(instr);
-+ setRegister(rt, cia + SimInstruction::kInstrSize +
-+ (static_cast<int64_t>(D) << 16));
-+ break;
-+ }
-+ default:
-+ MOZ_CRASH_UNSAFE_PRINTF("decodeBranch: XL opcode 19, xl=%u", xl);
-+ }
-+ return;
-+ }
-+
-+ MOZ_CRASH_UNSAFE_PRINTF("decodeBranch: opcode=%u", opcode);
-+}
-+
-+// -----------------------------------------------------------------------------
-+// decodeFP: Major opcodes 59 (A-form single) and 63 (X-form / A-form double)
-+
-+void Simulator::decodeFP(SimInstruction* instr) {
-+ uint32_t opcode = instr->opcode();
-+ uint32_t rt = instr->rtValue(); // FRT
-+ uint32_t ra = instr->raValue(); // FRA
-+ uint32_t rb = instr->rbValue(); // FRB
-+ uint32_t rc_reg = instr->rcValue(); // FRC (A-form)
-+
-+ if (opcode == 63) {
-+ // X-form and A-form double-precision instructions.
-+ // For A-form, the sub-opcode is in bits 1..5.
-+ // For X-form, the sub-opcode is in bits 1..10.
-+ uint32_t xo_a = instr->bits(5, 1); // A-form sub-opcode
-+ uint32_t xo_x = instr->bits(10, 1); // X-form sub-opcode
-+
-+ // Try A-form first (5-bit sub-opcode in bits 1..5).
-+ switch (xo_a) {
-+ case 21: {
-+ // fadd
-+ double result = getFpuRegisterDouble(ra) + getFpuRegisterDouble(rb);
-+ setFpuRegisterDouble(rt, result);
-+ return;
-+ }
-+ case 20: {
-+ // fsub
-+ double result = getFpuRegisterDouble(ra) - getFpuRegisterDouble(rb);
-+ setFpuRegisterDouble(rt, result);
-+ return;
-+ }
-+ case 25: {
-+ // fmul: FRT = FRA * FRC (note: FRC, not FRB!)
-+ double result = getFpuRegisterDouble(ra) * getFpuRegisterDouble(rc_reg);
-+ setFpuRegisterDouble(rt, result);
-+ return;
-+ }
-+ case 18: {
-+ // fdiv
-+ double result = getFpuRegisterDouble(ra) / getFpuRegisterDouble(rb);
-+ setFpuRegisterDouble(rt, result);
-+ return;
-+ }
-+ case 22: {
-+ // fsqrt
-+ double result = sqrt(getFpuRegisterDouble(rb));
-+ setFpuRegisterDouble(rt, result);
-+ return;
-+ }
-+ case 29: {
-+ // fmadd: FRT = FRA * FRC + FRB
-+ double result = std::fma(getFpuRegisterDouble(ra),
-+ getFpuRegisterDouble(rc_reg),
-+ getFpuRegisterDouble(rb));
-+ setFpuRegisterDouble(rt, result);
-+ return;
-+ }
-+ case 30: {
-+ // fnmsub: FRT = -(FRA * FRC - FRB)
-+ double result = -(std::fma(getFpuRegisterDouble(ra),
-+ getFpuRegisterDouble(rc_reg),
-+ -getFpuRegisterDouble(rb)));
-+ setFpuRegisterDouble(rt, result);
-+ return;
-+ }
-+ case 28: {
-+ // fmsub: FRT = FRA * FRC - FRB
-+ double result = std::fma(getFpuRegisterDouble(ra),
-+ getFpuRegisterDouble(rc_reg),
-+ -getFpuRegisterDouble(rb));
-+ setFpuRegisterDouble(rt, result);
-+ return;
-+ }
-+ case 31: {
-+ // fnmadd: FRT = -(FRA * FRC + FRB)
-+ double result = -(std::fma(getFpuRegisterDouble(ra),
-+ getFpuRegisterDouble(rc_reg),
-+ getFpuRegisterDouble(rb)));
-+ setFpuRegisterDouble(rt, result);
-+ return;
-+ }
-+ case 23: {
-+ // fsel: FRT = (FRA >= 0) ? FRC : FRB
-+ double fra = getFpuRegisterDouble(ra);
-+ setFpuRegisterDouble(rt, (fra >= 0.0) ? getFpuRegisterDouble(rc_reg)
-+ : getFpuRegisterDouble(rb));
-+ return;
-+ }
-+ case 26: {
-+ // frsqrte: FRT = 1.0 / sqrt(FRB) (estimate)
-+ double result = 1.0 / sqrt(getFpuRegisterDouble(rb));
-+ setFpuRegisterDouble(rt, result);
-+ return;
-+ }
-+ }
-+
-+ // X-form (10-bit sub-opcode).
-+ switch (xo_x) {
-+ case 72: {
-+ // fmr: FRT = FRB
-+ setFpuRegisterDouble(rt, getFpuRegisterDouble(rb));
-+ break;
-+ }
-+ case 40: {
-+ // fneg: FRT = -FRB
-+ setFpuRegisterDouble(rt, -getFpuRegisterDouble(rb));
-+ break;
-+ }
-+ case 264: {
-+ // fabs: FRT = |FRB|
-+ setFpuRegisterDouble(rt, fabs(getFpuRegisterDouble(rb)));
-+ break;
-+ }
-+ case 136: {
-+ // fnabs: FRT = -|FRB|
-+ setFpuRegisterDouble(rt, -fabs(getFpuRegisterDouble(rb)));
-+ break;
-+ }
-+ case 8: {
-+ // fcpsgn: FRT = sign(FRA) || magnitude(FRB)
-+ double fra = getFpuRegisterDouble(ra);
-+ double frb = getFpuRegisterDouble(rb);
-+ setFpuRegisterDouble(rt, std::copysign(frb, fra));
-+ break;
-+ }
-+ case 0: {
-+ // fcmpu: compare FRA, FRB unordered
-+ uint32_t bf = instr->bfValue();
-+ double fra = getFpuRegisterDouble(ra);
-+ double frb = getFpuRegisterDouble(rb);
-+ uint8_t field = 0;
-+ if (std::isnan(fra) || std::isnan(frb)) {
-+ field = kCRFieldSO;
-+ } else if (fra < frb) {
-+ field = kCRFieldLT;
-+ } else if (fra > frb) {
-+ field = kCRFieldGT;
-+ } else {
-+ field = kCRFieldEQ;
-+ }
-+ setCRField(bf, field);
-+ break;
-+ }
-+ case 32: {
-+ // fcmpo: compare FRA, FRB ordered
-+ uint32_t bf = instr->bfValue();
-+ double fra = getFpuRegisterDouble(ra);
-+ double frb = getFpuRegisterDouble(rb);
-+ uint8_t field = 0;
-+ if (std::isnan(fra) || std::isnan(frb)) {
-+ field = kCRFieldSO;
-+ } else if (fra < frb) {
-+ field = kCRFieldLT;
-+ } else if (fra > frb) {
-+ field = kCRFieldGT;
-+ } else {
-+ field = kCRFieldEQ;
-+ }
-+ setCRField(bf, field);
-+ break;
-+ }
-+ // For fctid* and fctiw* the ISA specifies that bit 23 of FPSCR (VXCVI,
-+ // "invalid op for integer convert") is set when the source is NaN, +Inf,
-+ // -Inf, or out of the destination's range. Wasm's out-of-range trap
-+ // sequence is `mtfsb0 23; fctidz; mfvsrd; mcrfs cr0,5; bt SOBit,trap`,
-+ // so the simulator MUST update VXCVI here for the trap to fire. With
-+ // FPSCR_ in the low-half PPC layout (PPC bit N → int64 bit (31-N)),
-+ // VXCVI lives at int64 bit (31-23) = 8.
-+ case 814: {
-+ // fctid: convert double to int64 (current rounding)
-+ double frb = getFpuRegisterDouble(rb);
-+ int64_t result;
-+ bool invalid = false;
-+ if (std::isnan(frb)) {
-+ result = INT64_MIN;
-+ invalid = true;
-+ } else if (frb >= -(double)INT64_MIN || frb < (double)INT64_MIN) {
-+ result = (frb < 0) ? INT64_MIN : INT64_MAX;
-+ invalid = true;
-+ } else {
-+ switch (FPSCR_ & kFPSCRRNMask) {
-+ case RN: result = (int64_t)llrint(frb); break;
-+ case RZ: result = (int64_t)frb; break;
-+ case RP: result = (int64_t)ceil(frb); break;
-+ case RM: result = (int64_t)floor(frb); break;
-+ default: result = (int64_t)frb; break;
-+ }
-+ }
-+ if (invalid) FPSCR_ |= (1ULL << 8); /* VXCVI: PPC bit 23 in low-half layout */
-+ setFpuRegister(rt, result);
-+ break;
-+ }
-+ case 815: {
-+ // fctidz: convert double to int64 (round toward zero)
-+ double frb = getFpuRegisterDouble(rb);
-+ int64_t result;
-+ bool invalid = false;
-+ if (std::isnan(frb)) {
-+ result = INT64_MIN;
-+ invalid = true;
-+ } else if (frb >= -(double)INT64_MIN) {
-+ result = INT64_MAX;
-+ invalid = true;
-+ } else if (frb < (double)INT64_MIN) {
-+ result = INT64_MIN;
-+ invalid = true;
-+ } else {
-+ result = (int64_t)frb;
-+ }
-+ if (invalid) FPSCR_ |= (1ULL << 8); /* VXCVI: PPC bit 23 in low-half layout */
-+ setFpuRegister(rt, result);
-+ break;
-+ }
-+ case 942: {
-+ // fctidu: convert double to uint64 (current rounding).
-+ // VXCVI is signaled when source is NaN, ±Inf, or the rounded value
-+ // is outside [0, 2^64-1]. Notably,
-+ // a negative source whose rounded value is 0 (e.g. -0.4 in RN, or
-+ // any value in (-1, 0) in RZ) is NOT invalid.
-+ double frb = getFpuRegisterDouble(rb);
-+ uint64_t result;
-+ bool invalid = false;
-+ if (std::isnan(frb)) {
-+ result = 0;
-+ invalid = true;
-+ } else if (frb >= -2.0 * (double)INT64_MIN /* 2^64 */) {
-+ result = UINT64_MAX;
-+ invalid = true;
-+ } else {
-+ double rounded;
-+ switch (FPSCR_ & kFPSCRRNMask) {
-+ case RN: rounded = nearbyint(frb); break;
-+ case RZ: rounded = trunc(frb); break;
-+ case RP: rounded = ceil(frb); break;
-+ case RM: rounded = floor(frb); break;
-+ default: rounded = trunc(frb); break;
-+ }
-+ if (rounded < 0.0) {
-+ result = 0;
-+ invalid = true;
-+ } else {
-+ result = (uint64_t)rounded;
-+ }
-+ }
-+ if (invalid) FPSCR_ |= (1ULL << 8); /* VXCVI: PPC bit 23 in low-half layout */
-+ setFpuRegister(rt, I64(result));
-+ break;
-+ }
-+ case 943: {
-+ // fctiduz: convert double to uint64 (round toward zero).
-+ // Same VXCVI rule as fctidu but rounding is fixed to truncate
-+ // toward zero. Source in (-1, 0) truncates to 0 — VALID.
-+ double frb = getFpuRegisterDouble(rb);
-+ uint64_t result;
-+ bool invalid = false;
-+ if (std::isnan(frb)) {
-+ result = 0;
-+ invalid = true;
-+ } else if (frb >= -2.0 * (double)INT64_MIN /* 2^64 */) {
-+ result = UINT64_MAX;
-+ invalid = true;
-+ } else if (frb <= -1.0) {
-+ // Truncated value is negative — invalid for unsigned.
-+ result = 0;
-+ invalid = true;
-+ } else {
-+ // Source is in (-1, 2^64); truncation toward zero yields a value
-+ // in [0, 2^64).
-+ result = (uint64_t)trunc(frb);
-+ }
-+ if (invalid) FPSCR_ |= (1ULL << 8); /* VXCVI: PPC bit 23 in low-half layout */
-+ setFpuRegister(rt, I64(result));
-+ break;
-+ }
-+ case 14: {
-+ // fctiw: convert double to int32 (current rounding).
-+ // Invalid range: rounded value < INT32_MIN or > INT32_MAX. The
-+ // double-precision boundary on the negative side is INT32_MIN-1 =
-+ // -2^31-1 = -2147483649.0 (exactly representable; doubles in
-+ // (-2^31-1, -2^31) all round-to-nearest to -2^31 which is valid).
-+ double frb = getFpuRegisterDouble(rb);
-+ int32_t result;
-+ bool invalid = false;
-+ if (std::isnan(frb)) {
-+ result = INT32_MIN;
-+ invalid = true;
-+ } else {
-+ double rounded;
-+ switch (FPSCR_ & kFPSCRRNMask) {
-+ case RN: rounded = nearbyint(frb); break;
-+ case RZ: rounded = trunc(frb); break;
-+ case RP: rounded = ceil(frb); break;
-+ case RM: rounded = floor(frb); break;
-+ default: rounded = trunc(frb); break;
-+ }
-+ if (rounded > (double)INT32_MAX) {
-+ result = INT32_MAX;
-+ invalid = true;
-+ } else if (rounded < (double)INT32_MIN) {
-+ result = INT32_MIN;
-+ invalid = true;
-+ } else {
-+ result = (int32_t)rounded;
-+ }
-+ }
-+ if (invalid) FPSCR_ |= (1ULL << 8); /* VXCVI: PPC bit 23 in low-half layout */
-+ setFpuRegister(rt, (int64_t)result);
-+ break;
-+ }
-+ case 15: {
-+ // fctiwz: convert double to int32 (round toward zero).
-+ // Truncation of a value in (-2^31-1, INT32_MIN) toward zero gives
-+ // INT32_MIN — valid. Only `frb <= -2^31-1` (i.e. `frb < INT32_MIN-1+1`
-+ // = `frb < -2147483648` ... wait, simplest: check truncated value in
-+ // range AFTER truncation.)
-+ double frb = getFpuRegisterDouble(rb);
-+ int32_t result;
-+ bool invalid = false;
-+ if (std::isnan(frb)) {
-+ result = INT32_MIN;
-+ invalid = true;
-+ } else {
-+ double truncated = trunc(frb);
-+ if (truncated > (double)INT32_MAX) {
-+ result = INT32_MAX;
-+ invalid = true;
-+ } else if (truncated < (double)INT32_MIN) {
-+ result = INT32_MIN;
-+ invalid = true;
-+ } else {
-+ result = (int32_t)truncated;
-+ }
-+ }
-+ if (invalid) FPSCR_ |= (1ULL << 8); /* VXCVI: PPC bit 23 in low-half layout */
-+ setFpuRegister(rt, (int64_t)result);
-+ break;
-+ }
-+ case 142: {
-+ // fctiwu: convert double to uint32 (current rounding). The check is
-+ // on the ROUNDED value: VXCVI iff rounded < 0 or rounded > UINT32_MAX.
-+ double frb = getFpuRegisterDouble(rb);
-+ uint32_t result;
-+ bool invalid = false;
-+ if (std::isnan(frb)) {
-+ result = 0;
-+ invalid = true;
-+ } else {
-+ double rounded;
-+ switch (FPSCR_ & kFPSCRRNMask) {
-+ case RN: rounded = nearbyint(frb); break;
-+ case RZ: rounded = trunc(frb); break;
-+ case RP: rounded = ceil(frb); break;
-+ case RM: rounded = floor(frb); break;
-+ default: rounded = trunc(frb); break;
-+ }
-+ if (rounded < 0.0) {
-+ result = 0;
-+ invalid = true;
-+ } else if (rounded > (double)UINT32_MAX) {
-+ result = UINT32_MAX;
-+ invalid = true;
-+ } else {
-+ result = (uint32_t)rounded;
-+ }
-+ }
-+ if (invalid) FPSCR_ |= (1ULL << 8); /* VXCVI: PPC bit 23 in low-half layout */
-+ setFpuRegister(rt, (int64_t)(uint64_t)result);
-+ break;
-+ }
-+ case 143: {
-+ // fctiwuz: convert double to uint32 (round toward zero).
-+ // Source in (-1, 0) truncates to 0 — VALID.
-+ double frb = getFpuRegisterDouble(rb);
-+ uint32_t result;
-+ bool invalid = false;
-+ if (std::isnan(frb)) {
-+ result = 0;
-+ invalid = true;
-+ } else {
-+ double truncated = trunc(frb);
-+ if (truncated > (double)UINT32_MAX) {
-+ result = UINT32_MAX;
-+ invalid = true;
-+ } else if (truncated < 0.0) {
-+ result = 0;
-+ invalid = true;
-+ } else {
-+ result = (uint32_t)truncated;
-+ }
-+ }
-+ if (invalid) FPSCR_ |= (1ULL << 8); /* VXCVI: PPC bit 23 in low-half layout */
-+ setFpuRegister(rt, (int64_t)(uint64_t)result);
-+ break;
-+ }
-+ case 846: {
-+ // fcfid: convert int64 in FPR to double
-+ int64_t val = getFpuRegister(rb);
-+ setFpuRegisterDouble(rt, (double)val);
-+ break;
-+ }
-+ case 974: {
-+ // fcfidu: convert uint64 in FPR to double
-+ uint64_t val = U64(getFpuRegister(rb));
-+ setFpuRegisterDouble(rt, (double)val);
-+ break;
-+ }
-+ case 12: {
-+ // frsp: round double to single precision (then re-extend in FPR).
-+ // sNaN inputs are quieted (the result payload MSB is set).
-+ // wasm f32.demote_f64 lowers to this op when
-+ // not using xscvdpsp directly.
-+ double frb = getFpuRegisterDouble(rb);
-+ float result = demoteDoublePreservingNaN(frb);
-+ uint32_t fbits;
-+ memcpy(&fbits, &result, sizeof(fbits));
-+ if ((fbits & 0x7F800000u) == 0x7F800000u &&
-+ (fbits & 0x007FFFFFu) != 0) {
-+ fbits |= 0x00400000u;
-+ memcpy(&result, &fbits, sizeof(result));
-+ }
-+ setFpuRegisterDouble(rt, promoteFloatPreservingNaN(result));
-+ break;
-+ }
-+ case 392: {
-+ // frin: round to nearest integer (ties away from zero)
-+ double frb = getFpuRegisterDouble(rb);
-+ setFpuRegisterDouble(rt, round(frb));
-+ break;
-+ }
-+ case 424: {
-+ // friz: round toward zero
-+ double frb = getFpuRegisterDouble(rb);
-+ setFpuRegisterDouble(rt, trunc(frb));
-+ break;
-+ }
-+ case 456: {
-+ // frip: round toward +infinity (ceil). XO=456.
-+ double frb = getFpuRegisterDouble(rb);
-+ setFpuRegisterDouble(rt, ceil(frb));
-+ break;
-+ }
-+ case 488: {
-+ // frim: round toward -infinity (floor). XO=488.
-+ double frb = getFpuRegisterDouble(rb);
-+ setFpuRegisterDouble(rt, floor(frb));
-+ break;
-+ }
-+ case 583: {
-+ // mffs: FRT = FPSCR (as double bit pattern)
-+ setFpuRegister(rt, I64(FPSCR_));
-+ break;
-+ }
-+ // FPSCR is treated as a 32-bit register stored in the low 32 bits of
-+ // FPSCR_ (uint64_t), with PPC bit numbering: PPC bit N (where bit 0 is
-+ // the MSB) lives at int64 bit (31-N). Field F (4 bits) covers PPC bits
-+ // 4F..4F+3 → int64 bit-LSB (28-4F) to bit-MSB (31-4F). This matches
-+ // mcrfs, mtfsfi, kFPSCRRNMask (which checks bits 30-31 PPC = int64 bits
-+ // 0-1), and mffs (which copies FPSCR into FPR bits 32..63 PPC = int64
-+ // bits 0..31). Earlier mtfsb0/mtfsb1 used (63-bt) which placed bits in
-+ // the high half of FPSCR_ where mcrfs etc. would never see them — so
-+ // the wasm trap sequence `mtfsb0 23; fctidz; mcrfs cr0,5; bt SO,oolEntry`
-+ // could not detect VXCVI.
-+ case 70: {
-+ // mtfsb0: clear FPSCR bit. XO=70.
-+ // (Cases 38 and 70 had the labels swapped, so wasm's
-+ // `mtfsb0 23; fctidz; mcrfs cr0,5; bt SO,trap` sequence accidentally
-+ // SET VXCVI before the convert ran, causing every fctid* to trap.)
-+ uint32_t bt = instr->rtValue();
-+ FPSCR_ &= ~(1ULL << (31 - bt));
-+ break;
-+ }
-+ case 64: {
-+ // mcrfs: copy FPSCR field to CR field
-+ uint32_t bf = instr->bfValue();
-+ uint32_t bfa = instr->bits(20, 18);
-+ uint32_t shift = 4 * (7 - bfa);
-+ uint8_t val = (FPSCR_ >> shift) & 0xF;
-+ setCRField(bf, val);
-+ break;
-+ }
-+ default:
-+ MOZ_CRASH_UNSAFE_PRINTF(
-+ "decodeFP: opcode 63, xo_x=%u (instruction 0x%08x)", xo_x,
-+ instr->instructionBits());
-+ }
-+ } else if (opcode == 59) {
-+ // A-form single-precision instructions.
-+ uint32_t xo_a = instr->bits(5, 1);
-+
-+ switch (xo_a) {
-+ case 21: {
-+ // fadds
-+ double result = (double)((float)(getFpuRegisterDouble(ra) +
-+ getFpuRegisterDouble(rb)));
-+ setFpuRegisterDouble(rt, result);
-+ break;
-+ }
-+ case 20: {
-+ // fsubs
-+ double result = (double)((float)(getFpuRegisterDouble(ra) -
-+ getFpuRegisterDouble(rb)));
-+ setFpuRegisterDouble(rt, result);
-+ break;
-+ }
-+ case 25: {
-+ // fmuls: FRT = (float)(FRA * FRC)
-+ double result = (double)((float)(getFpuRegisterDouble(ra) *
-+ getFpuRegisterDouble(rc_reg)));
-+ setFpuRegisterDouble(rt, result);
-+ break;
-+ }
-+ case 18: {
-+ // fdivs
-+ double result = (double)((float)(getFpuRegisterDouble(ra) /
-+ getFpuRegisterDouble(rb)));
-+ setFpuRegisterDouble(rt, result);
-+ break;
-+ }
-+ case 22: {
-+ // fsqrts
-+ double result = (double)sqrtf((float)getFpuRegisterDouble(rb));
-+ setFpuRegisterDouble(rt, result);
-+ break;
-+ }
-+ case 29: {
-+ // fmadds
-+ double result = (double)((float)std::fma(getFpuRegisterDouble(ra),
-+ getFpuRegisterDouble(rc_reg),
-+ getFpuRegisterDouble(rb)));
-+ setFpuRegisterDouble(rt, result);
-+ break;
-+ }
-+ case 30: {
-+ // fnmsubs
-+ double result = (double)(-(float)std::fma(getFpuRegisterDouble(ra),
-+ getFpuRegisterDouble(rc_reg),
-+ -getFpuRegisterDouble(rb)));
-+ setFpuRegisterDouble(rt, result);
-+ break;
-+ }
-+ case 28: {
-+ // fmsubs
-+ double result = (double)((float)std::fma(getFpuRegisterDouble(ra),
-+ getFpuRegisterDouble(rc_reg),
-+ -getFpuRegisterDouble(rb)));
-+ setFpuRegisterDouble(rt, result);
-+ break;
-+ }
-+ case 31: {
-+ // fnmadds
-+ double result = (double)(-(float)std::fma(getFpuRegisterDouble(ra),
-+ getFpuRegisterDouble(rc_reg),
-+ getFpuRegisterDouble(rb)));
-+ setFpuRegisterDouble(rt, result);
-+ break;
-+ }
-+ default: {
-+ // Try X-form sub-opcodes for opcode 59 (e.g., fcfids, fcfidus).
-+ uint32_t xo_x = instr->bits(10, 1);
-+ switch (xo_x) {
-+ case 846: {
-+ // fcfids: convert int64 to float single (result stored as double)
-+ int64_t val = getFpuRegister(rb);
-+ setFpuRegisterDouble(rt, (double)(float)val);
-+ break;
-+ }
-+ case 974: {
-+ // fcfidus: convert uint64 to float single
-+ uint64_t val = U64(getFpuRegister(rb));
-+ setFpuRegisterDouble(rt, (double)(float)val);
-+ break;
-+ }
-+ default:
-+ MOZ_CRASH_UNSAFE_PRINTF(
-+ "decodeFP: opcode 59, xo_a=%u xo_x=%u", xo_a, xo_x);
-+ }
-+ break;
-+ }
-+ }
-+ } else {
-+ MOZ_CRASH_UNSAFE_PRINTF("decodeFP: opcode=%u", opcode);
-+ }
-+}
-+
-+// -----------------------------------------------------------------------------
-+// decodeVMX: Major opcode 4 (AltiVec/VMX vector ops on VR0-VR31).
-+//
-+// VR-form (VX-form): bits 0-5 = primary opcode (4), bits 6-10 = VRT,
-+// bits 11-15 = VRA, bits 16-20 = VRB, bits 21-31 = XO (11 bits).
-+// XO extracted via `instructionBits() & 0x7FF`.
-+//
-+// Helpers below pack/unpack each VR via the VRregisters_ byte storage
-+// (16 bytes, big-endian PPC numbering: bytes[0] is the most-significant
-+// byte of the architectural register, but on PPC64 LE wasm the lane
-+// ordering is what the JIT expects). All ops here use byte-level
-+// accessors for consistency with the existing VMX memory ops.
-+
-+void Simulator::decodeVMX(SimInstruction* instr) {
-+ uint32_t xo = instr->instructionBits() & 0x7FFu;
-+ uint32_t vrt = instr->rtValue(); // bits 6..10
-+ uint32_t vra = instr->raValue(); // bits 11..15
-+ uint32_t vrb = instr->rbValue(); // bits 16..20
-+ uint32_t uimm = instr->raValue(); // VA-form: 5-bit immediate at bits 11..15
-+
-+ uint8_t a[16], b[16], r[16];
-+ getVRBytes(vra, a);
-+ getVRBytes(vrb, b);
-+
-+ // Helpers for treating the byte storage as typed lane arrays.
-+ // The PPC64LE wasm SIMD lowering stores each lane's bytes in
-+ // little-endian order, so lane i of an N-byte element occupies bytes
-+ // (i*N) .. (i*N + N - 1) with the LSB at byte (i*N). For example,
-+ // a v128.const i32x4 0x12345678 has bytes [78 56 34 12 …].
-+ #define LANE_U8(buf, i) ((uint8_t)(buf)[(i)])
-+ #define LANE_S8(buf, i) ((int8_t)(buf)[(i)])
-+ #define LANE_U16(buf, i) \
-+ ((uint16_t)((uint16_t)(buf)[(i) * 2] | \
-+ ((uint16_t)(buf)[(i) * 2 + 1] << 8)))
-+ #define LANE_S16(buf, i) ((int16_t)LANE_U16(buf, i))
-+ #define LANE_U32(buf, i) \
-+ ((uint32_t)((uint32_t)(buf)[(i) * 4] | \
-+ ((uint32_t)(buf)[(i) * 4 + 1] << 8) | \
-+ ((uint32_t)(buf)[(i) * 4 + 2] << 16) | \
-+ ((uint32_t)(buf)[(i) * 4 + 3] << 24)))
-+ #define LANE_S32(buf, i) ((int32_t)LANE_U32(buf, i))
-+ #define LANE_U64(buf, i) \
-+ ((uint64_t)((uint64_t)(buf)[(i) * 8] | \
-+ ((uint64_t)(buf)[(i) * 8 + 1] << 8) | \
-+ ((uint64_t)(buf)[(i) * 8 + 2] << 16) | \
-+ ((uint64_t)(buf)[(i) * 8 + 3] << 24) | \
-+ ((uint64_t)(buf)[(i) * 8 + 4] << 32) | \
-+ ((uint64_t)(buf)[(i) * 8 + 5] << 40) | \
-+ ((uint64_t)(buf)[(i) * 8 + 6] << 48) | \
-+ ((uint64_t)(buf)[(i) * 8 + 7] << 56)))
-+ #define LANE_S64(buf, i) ((int64_t)LANE_U64(buf, i))
-+ #define SET_LANE_U8(buf, i, v) do { (buf)[(i)] = (uint8_t)(v); } while (0)
-+ #define SET_LANE_U16(buf, i, v) do { \
-+ (buf)[(i) * 2] = (uint8_t)((uint16_t)(v) & 0xFF); \
-+ (buf)[(i) * 2 + 1] = (uint8_t)(((uint16_t)(v) >> 8) & 0xFF); \
-+ } while (0)
-+ #define SET_LANE_U32(buf, i, v) do { \
-+ (buf)[(i) * 4] = (uint8_t)((uint32_t)(v) & 0xFF); \
-+ (buf)[(i) * 4 + 1] = (uint8_t)(((uint32_t)(v) >> 8) & 0xFF); \
-+ (buf)[(i) * 4 + 2] = (uint8_t)(((uint32_t)(v) >> 16) & 0xFF); \
-+ (buf)[(i) * 4 + 3] = (uint8_t)(((uint32_t)(v) >> 24) & 0xFF); \
-+ } while (0)
-+ #define SET_LANE_U64(buf, i, v) do { \
-+ (buf)[(i) * 8] = (uint8_t)((uint64_t)(v) & 0xFF); \
-+ (buf)[(i) * 8 + 1] = (uint8_t)(((uint64_t)(v) >> 8) & 0xFF); \
-+ (buf)[(i) * 8 + 2] = (uint8_t)(((uint64_t)(v) >> 16) & 0xFF); \
-+ (buf)[(i) * 8 + 3] = (uint8_t)(((uint64_t)(v) >> 24) & 0xFF); \
-+ (buf)[(i) * 8 + 4] = (uint8_t)(((uint64_t)(v) >> 32) & 0xFF); \
-+ (buf)[(i) * 8 + 5] = (uint8_t)(((uint64_t)(v) >> 40) & 0xFF); \
-+ (buf)[(i) * 8 + 6] = (uint8_t)(((uint64_t)(v) >> 48) & 0xFF); \
-+ (buf)[(i) * 8 + 7] = (uint8_t)(((uint64_t)(v) >> 56) & 0xFF); \
-+ } while (0)
-+
-+ // --- VA-form pre-dispatch ---
-+ //
-+ // VA-form has a 6-bit XO at bits 26-31 and a 5-bit VRC at bits 21-25.
-+ // decodeVMX's 11-bit XO mask conflates VRC with
-+ // XO, so a plain `switch (xo)` over 11-bit values only matches when
-+ // VRC == 0. Peel off the three VA-form ops actually used by the JIT
-+ // (vmladduhm, vsel, vperm) before the main switch so any VRC value
-+ // works. vsldoi (XO=44) is VX-form with SH at bits 22-25, not VA —
-+ // handled in the switch below.
-+ {
-+ uint32_t va_xo = xo & 0x3Fu;
-+ if (va_xo == 32 || va_xo == 33 || va_xo == 34 || va_xo == 38 ||
-+ va_xo == 40 || va_xo == 42 || va_xo == 43) {
-+ uint32_t vrc = (instr->instructionBits() >> 6) & 0x1F;
-+ uint8_t cv[16];
-+ getVRBytes(vrc, cv);
-+ if (va_xo == 32) {
-+ // vmhaddshs VT,VA,VB,VC : VT[i] = sat_s16(
-+ // (s32)VA.h[i] * (s32)VB.h[i] >> 15 + (s32)VC.h[i])
-+ // (no rounding term — use vmhraddshs for the rounded form).
-+ for (int i = 0; i < 8; i++) {
-+ int32_t prod = (int32_t)LANE_S16(a, i) * (int32_t)LANE_S16(b, i);
-+ int32_t sum = (prod >> 15) + (int32_t)LANE_S16(cv, i);
-+ if (sum > INT16_MAX) sum = INT16_MAX;
-+ if (sum < INT16_MIN) sum = INT16_MIN;
-+ SET_LANE_U16(r, i, (uint16_t)(int16_t)sum);
-+ }
-+ } else if (va_xo == 33) {
-+ // vmhraddshs VT,VA,VB,VC : rounded Q15 multiply-add-saturate.
-+ // VT[i] = sat_s16(((s32)VA.h[i] * (s32)VB.h[i] + 0x4000)
-+ // >> 15 + (s32)VC.h[i])
-+ // Used by wasm i16x8.q15mulr_sat_s (VC is zero).
-+ for (int i = 0; i < 8; i++) {
-+ int32_t prod = (int32_t)LANE_S16(a, i) * (int32_t)LANE_S16(b, i);
-+ int32_t sum = ((prod + 0x4000) >> 15) + (int32_t)LANE_S16(cv, i);
-+ if (sum > INT16_MAX) sum = INT16_MAX;
-+ if (sum < INT16_MIN) sum = INT16_MIN;
-+ SET_LANE_U16(r, i, (uint16_t)(int16_t)sum);
-+ }
-+ } else if (va_xo == 34) {
-+ // vmladduhm VT,VA,VB,VC : VT = low16(VA*VB + VC)
-+ for (int i = 0; i < 8; i++) {
-+ uint16_t prod = LANE_U16(a, i) * LANE_U16(b, i);
-+ SET_LANE_U16(r, i, prod + LANE_U16(cv, i));
-+ }
-+ } else if (va_xo == 40) {
-+ // vmsumshm VT,VA,VB,VC : pairwise multiply-sum of signed halfwords
-+ // into i32 lanes, modulo i32 wrap.
-+ // VT.i32[k] = VC.i32[k] + VA.i16[2k]*VB.i16[2k]
-+ // + VA.i16[2k+1]*VB.i16[2k+1]
-+ // Used by wasm i32x4.dot_i16x8_s with VC = 0, and by
-+ // i32x4.extadd_pairwise_i16x8_s with VB = splat(1) and VC = 0.
-+ for (int k = 0; k < 4; k++) {
-+ int32_t a0 = (int32_t)LANE_S16(a, 2 * k);
-+ int32_t a1 = (int32_t)LANE_S16(a, 2 * k + 1);
-+ int32_t b0 = (int32_t)LANE_S16(b, 2 * k);
-+ int32_t b1 = (int32_t)LANE_S16(b, 2 * k + 1);
-+ int32_t c = LANE_S32(cv, k);
-+ int32_t result = (int32_t)((uint32_t)c + (uint32_t)(a0 * b0) +
-+ (uint32_t)(a1 * b1));
-+ SET_LANE_U32(r, k, (uint32_t)result);
-+ }
-+ } else if (va_xo == 38) {
-+ // vmsumuhm VT,VA,VB,VC : same as vmsumshm but unsigned halfwords.
-+ // VT.u32[k] = VC.u32[k] + VA.u16[2k]*VB.u16[2k]
-+ // + VA.u16[2k+1]*VB.u16[2k+1]
-+ // Used by wasm i32x4.extadd_pairwise_i16x8_u with VB = splat(1)
-+ // and VC = 0.
-+ for (int k = 0; k < 4; k++) {
-+ uint32_t a0 = (uint32_t)LANE_U16(a, 2 * k);
-+ uint32_t a1 = (uint32_t)LANE_U16(a, 2 * k + 1);
-+ uint32_t b0 = (uint32_t)LANE_U16(b, 2 * k);
-+ uint32_t b1 = (uint32_t)LANE_U16(b, 2 * k + 1);
-+ uint32_t c = LANE_U32(cv, k);
-+ uint32_t result = c + a0 * b0 + a1 * b1;
-+ SET_LANE_U32(r, k, result);
-+ }
-+ } else if (va_xo == 42) {
-+ // vsel VT,VA,VB,VC : VT[i] = (VC[i] & VB[i]) | (~VC[i] & VA[i])
-+ for (int i = 0; i < 16; i++) {
-+ r[i] = (uint8_t)((cv[i] & b[i]) | (~cv[i] & a[i]));
-+ }
-+ } else {
-+ // vperm VT,VA,VB,VC; empirical LE:
-+ // r[LE_i] = (VC[LE_i] < 16) ? VA[LE_(15-VC[i])]
-+ // : VB[LE_(31-VC[i])]
-+ for (int i = 0; i < 16; i++) {
-+ uint8_t idx = cv[i] & 0x1F;
-+ r[i] = (idx < 16) ? a[15 - idx] : b[31 - idx];
-+ }
-+ }
-+ setVRBytes(vrt, r);
-+ goto vmx_done;
-+ }
-+ }
-+
-+ switch (xo) {
-+ // === Integer add (modulo) ===
-+ case 0: // vaddubm
-+ for (int i = 0; i < 16; i++) {
-+ SET_LANE_U8(r, i, LANE_U8(a, i) + LANE_U8(b, i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 64: // vadduhm
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i, LANE_U16(a, i) + LANE_U16(b, i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 128: // vadduwm
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i, LANE_U32(a, i) + LANE_U32(b, i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 192: // vaddudm
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i, LANE_U64(a, i) + LANE_U64(b, i));
-+ }
-+ setVRBytes(vrt, r); break;
-+
-+ // === Integer sub (modulo) ===
-+ case 1024: // vsububm
-+ for (int i = 0; i < 16; i++) {
-+ SET_LANE_U8(r, i, LANE_U8(a, i) - LANE_U8(b, i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 1088: // vsubuhm
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i, LANE_U16(a, i) - LANE_U16(b, i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 1152: // vsubuwm
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i, LANE_U32(a, i) - LANE_U32(b, i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 1216: // vsubudm
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i, LANE_U64(a, i) - LANE_U64(b, i));
-+ }
-+ setVRBytes(vrt, r); break;
-+
-+ // === Integer add (saturating, signed) ===
-+ case 768: // vaddsbs
-+ for (int i = 0; i < 16; i++) {
-+ int s = (int)LANE_S8(a, i) + (int)LANE_S8(b, i);
-+ if (s > INT8_MAX) s = INT8_MAX;
-+ if (s < INT8_MIN) s = INT8_MIN;
-+ SET_LANE_U8(r, i, (uint8_t)s);
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 832: // vaddshs
-+ for (int i = 0; i < 8; i++) {
-+ int s = (int)LANE_S16(a, i) + (int)LANE_S16(b, i);
-+ if (s > INT16_MAX) s = INT16_MAX;
-+ if (s < INT16_MIN) s = INT16_MIN;
-+ SET_LANE_U16(r, i, (uint16_t)s);
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 896: // vaddsws
-+ for (int i = 0; i < 4; i++) {
-+ int64_t s = (int64_t)LANE_S32(a, i) + (int64_t)LANE_S32(b, i);
-+ if (s > INT32_MAX) s = INT32_MAX;
-+ if (s < INT32_MIN) s = INT32_MIN;
-+ SET_LANE_U32(r, i, (uint32_t)s);
-+ }
-+ setVRBytes(vrt, r); break;
-+
-+ // === Integer add (saturating, unsigned) ===
-+ case 512: // vaddubs
-+ for (int i = 0; i < 16; i++) {
-+ unsigned s = (unsigned)LANE_U8(a, i) + (unsigned)LANE_U8(b, i);
-+ if (s > UINT8_MAX) s = UINT8_MAX;
-+ SET_LANE_U8(r, i, (uint8_t)s);
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 576: // vadduhs
-+ for (int i = 0; i < 8; i++) {
-+ unsigned s = (unsigned)LANE_U16(a, i) + (unsigned)LANE_U16(b, i);
-+ if (s > UINT16_MAX) s = UINT16_MAX;
-+ SET_LANE_U16(r, i, (uint16_t)s);
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 640: // vadduws
-+ for (int i = 0; i < 4; i++) {
-+ uint64_t s = (uint64_t)LANE_U32(a, i) + (uint64_t)LANE_U32(b, i);
-+ if (s > UINT32_MAX) s = UINT32_MAX;
-+ SET_LANE_U32(r, i, (uint32_t)s);
-+ }
-+ setVRBytes(vrt, r); break;
-+
-+ // === Integer sub (saturating, signed) ===
-+ case 1792: // vsubsbs
-+ for (int i = 0; i < 16; i++) {
-+ int s = (int)LANE_S8(a, i) - (int)LANE_S8(b, i);
-+ if (s > INT8_MAX) s = INT8_MAX;
-+ if (s < INT8_MIN) s = INT8_MIN;
-+ SET_LANE_U8(r, i, (uint8_t)s);
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 1856: // vsubshs
-+ for (int i = 0; i < 8; i++) {
-+ int s = (int)LANE_S16(a, i) - (int)LANE_S16(b, i);
-+ if (s > INT16_MAX) s = INT16_MAX;
-+ if (s < INT16_MIN) s = INT16_MIN;
-+ SET_LANE_U16(r, i, (uint16_t)s);
-+ }
-+ setVRBytes(vrt, r); break;
-+
-+ // === Integer sub (saturating, unsigned) ===
-+ case 1536: // vsububs
-+ for (int i = 0; i < 16; i++) {
-+ int s = (int)LANE_U8(a, i) - (int)LANE_U8(b, i);
-+ if (s < 0) s = 0;
-+ SET_LANE_U8(r, i, (uint8_t)s);
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 1600: // vsubuhs
-+ for (int i = 0; i < 8; i++) {
-+ int s = (int)LANE_U16(a, i) - (int)LANE_U16(b, i);
-+ if (s < 0) s = 0;
-+ SET_LANE_U16(r, i, (uint16_t)s);
-+ }
-+ setVRBytes(vrt, r); break;
-+
-+ // === Average unsigned (rounded: (a+b+1)>>1) ===
-+ case 1026: // vavgub
-+ for (int i = 0; i < 16; i++) {
-+ SET_LANE_U8(r, i,
-+ ((unsigned)LANE_U8(a, i) + LANE_U8(b, i) + 1) >> 1);
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 1090: // vavguh
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i,
-+ ((unsigned)LANE_U16(a, i) + LANE_U16(b, i) + 1) >> 1);
-+ }
-+ setVRBytes(vrt, r); break;
-+
-+ // === Vector multiply per-lane (i32x4.mul) ===
-+ case 137: { // vmuluwm: per-lane i32 multiply (low 32 bits)
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i, LANE_U32(a, i) * LANE_U32(b, i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+
-+ // === POWER10 vmulld: per-lane i64 multiply (low 64 bits) ===
-+ case 457: {
-+ for (int i = 0; i < 2; i++) {
-+ uint64_t av = 0, bv = 0;
-+ for (int j = 0; j < 8; j++) {
-+ av |= ((uint64_t)a[i * 8 + j]) << (j * 8);
-+ bv |= ((uint64_t)b[i * 8 + j]) << (j * 8);
-+ }
-+ uint64_t prod = av * bv; // low 64 bits, modulo wrap
-+ for (int j = 0; j < 8; j++) {
-+ r[i * 8 + j] = (uint8_t)(prod >> (j * 8));
-+ }
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+
-+ // === vmule/vmulo* (multiply even/odd lanes, widening) ===
-+ //
-+ // All XO values below were verified by disassembling the
-+ // PPC_vmule*/PPC_vmulo* constants from Assembler-ppc64.h with
-+ // `as -mppc64 -mlittle` + `objdump -Mpower9 -d`. The previous
-+ // version had all 12 XO labels swapped with each other's semantic
-+ // pair (so the JIT's vmulesb was decoded as vmulosb and vice
-+ // versa), causing i8x16→i16x8 extmul to produce wrong halfwords.
-+ //
-+ // PPC_vmuloub = 0x10000008 → XO=8 vmuloub (LE even-byte pairs)
-+ // PPC_vmulouh = 0x10000048 → XO=72 vmulouh
-+ // PPC_vmulouw = 0x10000088 → XO=136 vmulouw
-+ // PPC_vmulosb = 0x10000108 → XO=264 vmulosb
-+ // PPC_vmulosh = 0x10000148 → XO=328 vmulosh
-+ // PPC_vmulosw = 0x10000188 → XO=392 vmulosw
-+ // PPC_vmuleub = 0x10000208 → XO=520 vmuleub (LE odd-byte pairs)
-+ // PPC_vmuleuh = 0x10000248 → XO=584 vmuleuh
-+ // PPC_vmuleuw = 0x10000288 → XO=648 vmuleuw
-+ // PPC_vmulesb = 0x10000308 → XO=776 vmulesb
-+ // PPC_vmulesh = 0x10000348 → XO=840 vmulesh
-+ // PPC_vmulesw = 0x10000388 → XO=904 vmulesw
-+ //
-+ // Lane indexing on LE storage: "BE-even byte i" is stored at LE
-+ // byte index (15 - 2i); since our LANE_S8 uses LE byte index, the
-+ // "BE-even" = "LE-odd" mapping gives `2*i + 1` for vmule, `2*i`
-+ // for vmulo. The JIT's extmul helpers emit `vmulesb + vmulosb +
-+ // vmrglh` to pack both halves; getting the semantics swapped here
-+ // produces the right result register but with the halves in the
-+ // wrong merge order, breaking extmul.
-+ case 776: { // vmulesb: signed BE-even byte → halfword (8 results)
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i,
-+ (int16_t)LANE_S8(a, 2 * i + 1) *
-+ (int16_t)LANE_S8(b, 2 * i + 1));
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+ case 520: { // vmuleub: unsigned BE-even byte → halfword
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i,
-+ (uint16_t)LANE_U8(a, 2 * i + 1) *
-+ (uint16_t)LANE_U8(b, 2 * i + 1));
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+ case 840: { // vmulesh: signed BE-even halfword → word
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i,
-+ (int32_t)LANE_S16(a, 2 * i + 1) *
-+ (int32_t)LANE_S16(b, 2 * i + 1));
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+ case 584: { // vmuleuh: unsigned BE-even halfword → word
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i,
-+ (uint32_t)LANE_U16(a, 2 * i + 1) *
-+ (uint32_t)LANE_U16(b, 2 * i + 1));
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+ case 904: { // vmulesw: signed BE-even word → dword (POWER8)
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i,
-+ (int64_t)LANE_S32(a, 2 * i + 1) *
-+ (int64_t)LANE_S32(b, 2 * i + 1));
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+ case 648: { // vmuleuw: unsigned BE-even word → dword (POWER8)
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i,
-+ (uint64_t)LANE_U32(a, 2 * i + 1) *
-+ (uint64_t)LANE_U32(b, 2 * i + 1));
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+ case 264: { // vmulosb: signed BE-odd byte → halfword
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i,
-+ (int16_t)LANE_S8(a, 2 * i) *
-+ (int16_t)LANE_S8(b, 2 * i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+ case 8: { // vmuloub: unsigned BE-odd byte
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i,
-+ (uint16_t)LANE_U8(a, 2 * i) *
-+ (uint16_t)LANE_U8(b, 2 * i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+ case 328: { // vmulosh: signed BE-odd halfword → word
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i,
-+ (int32_t)LANE_S16(a, 2 * i) *
-+ (int32_t)LANE_S16(b, 2 * i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+ case 72: { // vmulouh: unsigned BE-odd halfword → word
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i,
-+ (uint32_t)LANE_U16(a, 2 * i) *
-+ (uint32_t)LANE_U16(b, 2 * i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+ case 392: { // vmulosw: signed BE-odd word
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i,
-+ (int64_t)LANE_S32(a, 2 * i) *
-+ (int64_t)LANE_S32(b, 2 * i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+ case 136: { // vmulouw: unsigned BE-odd word
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i,
-+ (uint64_t)LANE_U32(a, 2 * i) *
-+ (uint64_t)LANE_U32(b, 2 * i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+
-+ // === Per-lane rotate left (vrl{b,h,w,d}) ===
-+ case 4: // vrlb
-+ for (int i = 0; i < 16; i++) {
-+ uint8_t v = LANE_U8(a, i);
-+ uint32_t s = LANE_U8(b, i) & 7;
-+ SET_LANE_U8(r, i, (uint8_t)((v << s) | (v >> ((8 - s) & 7))));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 68: // vrlh
-+ for (int i = 0; i < 8; i++) {
-+ uint16_t v = LANE_U16(a, i);
-+ uint32_t s = LANE_U16(b, i) & 15;
-+ SET_LANE_U16(r, i, (uint16_t)((v << s) | (v >> ((16 - s) & 15))));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 132: // vrlw
-+ for (int i = 0; i < 4; i++) {
-+ uint32_t v = LANE_U32(a, i);
-+ uint32_t s = LANE_U32(b, i) & 31;
-+ SET_LANE_U32(r, i, (v << s) | (v >> ((32 - s) & 31)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 196: // vrld
-+ for (int i = 0; i < 2; i++) {
-+ uint64_t v = LANE_U64(a, i);
-+ uint32_t s = LANE_U64(b, i) & 63;
-+ SET_LANE_U64(r, i, (v << s) | (v >> ((64 - s) & 63)));
-+ }
-+ setVRBytes(vrt, r); break;
-+
-+ // === Min / Max signed ===
-+ case 258: // vmaxsb
-+ for (int i = 0; i < 16; i++) {
-+ SET_LANE_U8(r, i, std::max(LANE_S8(a, i), LANE_S8(b, i)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 322: // vmaxsh
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i, std::max(LANE_S16(a, i), LANE_S16(b, i)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 386: // vmaxsw
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i, std::max(LANE_S32(a, i), LANE_S32(b, i)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 450: // vmaxsd
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i, std::max(LANE_S64(a, i), LANE_S64(b, i)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 770: // vminsb
-+ for (int i = 0; i < 16; i++) {
-+ SET_LANE_U8(r, i, std::min(LANE_S8(a, i), LANE_S8(b, i)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 834: // vminsh
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i, std::min(LANE_S16(a, i), LANE_S16(b, i)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 898: // vminsw
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i, std::min(LANE_S32(a, i), LANE_S32(b, i)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 962: // vminsd
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i, std::min(LANE_S64(a, i), LANE_S64(b, i)));
-+ }
-+ setVRBytes(vrt, r); break;
-+
-+ // === Min / Max unsigned ===
-+ case 2: // vmaxub
-+ for (int i = 0; i < 16; i++) {
-+ SET_LANE_U8(r, i, std::max(LANE_U8(a, i), LANE_U8(b, i)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 66: // vmaxuh
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i, std::max(LANE_U16(a, i), LANE_U16(b, i)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 130: // vmaxuw
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i, std::max(LANE_U32(a, i), LANE_U32(b, i)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 194: // vmaxud
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i, std::max(LANE_U64(a, i), LANE_U64(b, i)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 514: // vminub
-+ for (int i = 0; i < 16; i++) {
-+ SET_LANE_U8(r, i, std::min(LANE_U8(a, i), LANE_U8(b, i)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 578: // vminuh
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i, std::min(LANE_U16(a, i), LANE_U16(b, i)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 642: // vminuw
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i, std::min(LANE_U32(a, i), LANE_U32(b, i)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 706: // vminud
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i, std::min(LANE_U64(a, i), LANE_U64(b, i)));
-+ }
-+ setVRBytes(vrt, r); break;
-+
-+ // === Vector compare (eq, gt signed, gt unsigned, ne POWER9) ===
-+ //
-+ // All vcmp* ops set per-lane all-1s on true, all-0s on false. The
-+ // record form (Rc=1, XO MSB bit set; XO_rec = XO_base + 1024) must
-+ // additionally write CR6:
-+ // CR6.LT = 1 iff ALL lanes are true;
-+ // CR6.GT = 0 (always);
-+ // CR6.EQ = 1 iff NO lane is true;
-+ // CR6.SO = 0 (always).
-+ // `i8x16.all_true` etc. in wasm rely on CR6.EQ via `mfocrf cr6`; the
-+ // previous simulator implementation left CR6 untouched, so the
-+ // predicate was always wrong.
-+ //
-+ // Helper: count true lanes by looking at byte 0 of each lane (all
-+ // bytes within a "true" lane are 0xFF so byte 0 is a sound proxy).
-+ #define VCMP_DONE(lanes_, lane_bytes_) \
-+ do { \
-+ setVRBytes(vrt, r); \
-+ if (xo >= 1024) { \
-+ int numTrue_ = 0; \
-+ for (int i_ = 0; i_ < (lanes_); i_++) { \
-+ if (r[i_ * (lane_bytes_)] == 0xFF) numTrue_++; \
-+ } \
-+ uint8_t field_ = 0; \
-+ if (numTrue_ == (lanes_)) field_ |= kCRFieldLT; \
-+ if (numTrue_ == 0) field_ |= kCRFieldEQ; \
-+ setCRField(6, field_); \
-+ } \
-+ } while (0)
-+
-+ case 6: // vcmpequb (Rc=0)
-+ case 1030: // vcmpequb. (record, CR6 updated)
-+ for (int i = 0; i < 16; i++) {
-+ SET_LANE_U8(r, i, LANE_U8(a, i) == LANE_U8(b, i) ? 0xFF : 0);
-+ }
-+ VCMP_DONE(16, 1); break;
-+ case 70: // vcmpequh
-+ case 1094: // vcmpequh.
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i, LANE_U16(a, i) == LANE_U16(b, i) ? 0xFFFF : 0);
-+ }
-+ VCMP_DONE(8, 2); break;
-+ case 134: // vcmpequw
-+ case 1158: // vcmpequw.
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i,
-+ LANE_U32(a, i) == LANE_U32(b, i) ? 0xFFFFFFFFu : 0);
-+ }
-+ VCMP_DONE(4, 4); break;
-+ case 199: // vcmpequd
-+ case 1223: // vcmpequd.
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i,
-+ LANE_U64(a, i) == LANE_U64(b, i)
-+ ? UINT64_MAX
-+ : 0);
-+ }
-+ VCMP_DONE(2, 8); break;
-+
-+ // === Compare greater-than signed ===
-+ case 774: // vcmpgtsb
-+ case 1798: // vcmpgtsb.
-+ for (int i = 0; i < 16; i++) {
-+ SET_LANE_U8(r, i, LANE_S8(a, i) > LANE_S8(b, i) ? 0xFF : 0);
-+ }
-+ VCMP_DONE(16, 1); break;
-+ case 838: // vcmpgtsh
-+ case 1862: // vcmpgtsh.
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i, LANE_S16(a, i) > LANE_S16(b, i) ? 0xFFFF : 0);
-+ }
-+ VCMP_DONE(8, 2); break;
-+ case 902: // vcmpgtsw
-+ case 1926: // vcmpgtsw.
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i,
-+ LANE_S32(a, i) > LANE_S32(b, i) ? 0xFFFFFFFFu : 0);
-+ }
-+ VCMP_DONE(4, 4); break;
-+ case 967: // vcmpgtsd
-+ case 1991: // vcmpgtsd.
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i,
-+ LANE_S64(a, i) > LANE_S64(b, i) ? UINT64_MAX : 0);
-+ }
-+ VCMP_DONE(2, 8); break;
-+
-+ // === Compare greater-than unsigned ===
-+ case 518: // vcmpgtub
-+ case 1542: // vcmpgtub.
-+ for (int i = 0; i < 16; i++) {
-+ SET_LANE_U8(r, i, LANE_U8(a, i) > LANE_U8(b, i) ? 0xFF : 0);
-+ }
-+ VCMP_DONE(16, 1); break;
-+ case 582: // vcmpgtuh
-+ case 1606: // vcmpgtuh.
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i, LANE_U16(a, i) > LANE_U16(b, i) ? 0xFFFF : 0);
-+ }
-+ VCMP_DONE(8, 2); break;
-+ case 646: // vcmpgtuw
-+ case 1670: // vcmpgtuw.
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i,
-+ LANE_U32(a, i) > LANE_U32(b, i) ? 0xFFFFFFFFu : 0);
-+ }
-+ VCMP_DONE(4, 4); break;
-+ case 711: // vcmpgtud
-+ case 1735: // vcmpgtud.
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i,
-+ LANE_U64(a, i) > LANE_U64(b, i) ? UINT64_MAX : 0);
-+ }
-+ VCMP_DONE(2, 8); break;
-+
-+ // === Splat from immediate (5-bit signed splat into all lanes) ===
-+ // ISA defines UIM in BE element numbering. For LE storage, BE element i = LE element (N-1-i).
-+ case 524: // vspltb: VRT[*] = VRB[BE-byte-UIM]; uimm from VRA field (bits 11..15)
-+ for (int i = 0; i < 16; i++) {
-+ SET_LANE_U8(r, i, LANE_U8(b, 15 - (uimm & 0xF)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 588: // vsplth
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i, LANE_U16(b, 7 - (uimm & 0x7)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 652: // vspltw
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i, LANE_U32(b, 3 - (uimm & 0x3)));
-+ }
-+ setVRBytes(vrt, r); break;
-+
-+ // === Splat 5-bit signed immediate to all byte lanes ===
-+ case 780: { // vspltisb VRT, SIMM5
-+ int32_t simm5 = (int32_t)((instr->instructionBits() >> 16) & 0x1F);
-+ if (simm5 & 0x10) simm5 |= ~0x1F;
-+ uint8_t b = (uint8_t)(int8_t)simm5;
-+ memset(r, b, 16);
-+ setVRBytes(vrt, r); break;
-+ }
-+
-+ // === Splat 5-bit signed immediate to all halfword lanes ===
-+ case 844: { // vspltish VRT, SIMM5
-+ // SIMM5 occupies bits 11..15 of the instruction (VRA field). It
-+ // is sign-extended to 16 bits and replicated across all 8 halfword
-+ // lanes of VRT. Range: [-16, 15].
-+ int32_t simm5 = (int32_t)((instr->instructionBits() >> 16) & 0x1F);
-+ if (simm5 & 0x10) simm5 |= ~0x1F; // sign-extend bit 4
-+ int16_t hw = (int16_t)simm5;
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i, (uint16_t)hw);
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+
-+ // === Splat 5-bit signed immediate to all word lanes ===
-+ case 908: { // vspltisw VRT, SIMM5
-+ int32_t simm5 = (int32_t)((instr->instructionBits() >> 16) & 0x1F);
-+ if (simm5 & 0x10) simm5 |= ~0x1F;
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i, (uint32_t)simm5);
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+
-+ // === Merge (interleave) ===
-+ //
-+ // The ISA defines vmrgh* / vmrgl* in BE numbering; the
-+ // empirical LE storage behaviour is:
-+ // vmrgh* VT,VA,VB: for i in 0..N/2-1,
-+ // VT.lane_LE[2i] = VB.lane_LE[(N/2) + i]
-+ // VT.lane_LE[2i+1] = VA.lane_LE[(N/2) + i]
-+ // vmrgl* VT,VA,VB: for i in 0..N/2-1,
-+ // VT.lane_LE[2i] = VB.lane_LE[i]
-+ // VT.lane_LE[2i+1] = VA.lane_LE[i]
-+ // i.e. the VB operand goes to the even result positions (reversed
-+ // from what a naïve BE reading would suggest) and the "high" form
-+ // selects the upper-half of LE storage.
-+ //
-+ // Previous implementation had both the operand order swapped AND
-+ // the high/low halves swapped (consistent with each other, so
-+ // JIT-only-visible ops that round-tripped through vmrg* happened
-+ // to produce the right answer, but wasm-visible extmul exposed
-+ // the bug).
-+ case 12: // vmrghb
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U8(r, 2 * i, LANE_U8(b, 8 + i));
-+ SET_LANE_U8(r, 2 * i + 1, LANE_U8(a, 8 + i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 76: // vmrghh
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U16(r, 2 * i, LANE_U16(b, 4 + i));
-+ SET_LANE_U16(r, 2 * i + 1, LANE_U16(a, 4 + i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 140: // vmrghw
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U32(r, 2 * i, LANE_U32(b, 2 + i));
-+ SET_LANE_U32(r, 2 * i + 1, LANE_U32(a, 2 + i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 268: // vmrglb
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U8(r, 2 * i, LANE_U8(b, i));
-+ SET_LANE_U8(r, 2 * i + 1, LANE_U8(a, i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 332: // vmrglh
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U16(r, 2 * i, LANE_U16(b, i));
-+ SET_LANE_U16(r, 2 * i + 1, LANE_U16(a, i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 396: // vmrglw
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U32(r, 2 * i, LANE_U32(b, i));
-+ SET_LANE_U32(r, 2 * i + 1, LANE_U32(a, i));
-+ }
-+ setVRBytes(vrt, r); break;
-+
-+ // === Per-lane shift left (count from VRB, low N bits per element) ===
-+ case 260: // vslb
-+ for (int i = 0; i < 16; i++) {
-+ SET_LANE_U8(r, i, LANE_U8(a, i) << (LANE_U8(b, i) & 7));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 324: // vslh
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i, LANE_U16(a, i) << (LANE_U16(b, i) & 15));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 388: // vslw
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i, LANE_U32(a, i) << (LANE_U32(b, i) & 31));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 1476: // vsld
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i, LANE_U64(a, i) << (LANE_U64(b, i) & 63));
-+ }
-+ setVRBytes(vrt, r); break;
-+
-+ // === Per-lane shift right unsigned ===
-+ case 516: // vsrb
-+ for (int i = 0; i < 16; i++) {
-+ SET_LANE_U8(r, i, LANE_U8(a, i) >> (LANE_U8(b, i) & 7));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 580: // vsrh
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i, LANE_U16(a, i) >> (LANE_U16(b, i) & 15));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 644: // vsrw
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i, LANE_U32(a, i) >> (LANE_U32(b, i) & 31));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 1732: // vsrd
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i, LANE_U64(a, i) >> (LANE_U64(b, i) & 63));
-+ }
-+ setVRBytes(vrt, r); break;
-+
-+ // === Per-lane shift right algebraic (signed) ===
-+ case 772: // vsrab
-+ for (int i = 0; i < 16; i++) {
-+ SET_LANE_U8(r, i,
-+ (uint8_t)(LANE_S8(a, i) >> (LANE_U8(b, i) & 7)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 836: // vsrah
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i,
-+ (uint16_t)(LANE_S16(a, i) >> (LANE_U16(b, i) & 15)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 900: // vsraw
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i,
-+ (uint32_t)(LANE_S32(a, i) >> (LANE_U32(b, i) & 31)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 964: // vsrad
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i,
-+ (uint64_t)(LANE_S64(a, i) >> (LANE_U64(b, i) & 63)));
-+ }
-+ setVRBytes(vrt, r); break;
-+
-+ // === POWER9 per-lane integer negate (subop in VRA field) ===
-+ // PPC_vnegw = 0x10060602 → XO=0x602=1538, VRA=6
-+ // PPC_vnegd = 0x10070602 → XO=0x602=1538, VRA=7
-+ case 1538:
-+ if (vra == 6) { // vnegw
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i, (uint32_t)(-LANE_S32(b, i)));
-+ }
-+ } else if (vra == 7) { // vnegd
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i, (uint64_t)(-LANE_S64(b, i)));
-+ }
-+ } else {
-+ MOZ_CRASH_UNSAFE_PRINTF("decodeVMX XO=1538: unknown subop %u", vra);
-+ }
-+ setVRBytes(vrt, r); break;
-+
-+ // === POWER10 vextract{b,h,w,d}m (XO=1602=0x642) ===
-+ // RT (GPR) gets the wasm-spec bitmask in low 16/8/4/2 bits. UIM at
-+ // bits 11..15 (= sim `vra`) selects lane width: 8=byte, 9=halfword,
-+ // 10=word, 11=doubleword.
-+ case 1602: {
-+ uint64_t result = 0;
-+ switch (vra) {
-+ case 8: // vextractbm: 16 byte lanes
-+ for (int i = 0; i < 16; i++) {
-+ if (b[i] & 0x80) result |= (1ULL << i);
-+ }
-+ break;
-+ case 9: // vextracthm: 8 halfword lanes; MSB lives at byte 2i+1
-+ for (int i = 0; i < 8; i++) {
-+ if (b[2 * i + 1] & 0x80) result |= (1ULL << i);
-+ }
-+ break;
-+ case 10: // vextractwm: 4 word lanes; MSB at byte 4i+3
-+ for (int i = 0; i < 4; i++) {
-+ if (b[4 * i + 3] & 0x80) result |= (1ULL << i);
-+ }
-+ break;
-+ case 11: // vextractdm: 2 dword lanes; MSB at byte 8i+7
-+ for (int i = 0; i < 2; i++) {
-+ if (b[8 * i + 7] & 0x80) result |= (1ULL << i);
-+ }
-+ break;
-+ default:
-+ MOZ_CRASH_UNSAFE_PRINTF("decodeVMX XO=1602: unknown UIM %u", vra);
-+ }
-+ // vrt is the GPR target (RT field at bits 6..10).
-+ setRegister(int(vrt), int64_t(result));
-+ goto vmx_done; // Skip the trailing setVRBytes used by VR-targeting ops.
-+ }
-+
-+ // === POWER9 vinsertb (XO=781) / vinserth (XO=845) ===
-+ // Insert byte/halfword from a VR (NOT a GPR) at an immediate byte
-+ // position UIM (BE).
-+ // vinsertb: VRT.byte[UIM] (BE) ← VRB.byte[7] (BE)
-+ // vinserth: VRT.byte[UIM] (BE) ← VRB.byte[6] (BE)
-+ // VRT.byte[UIM+1] (BE) ← VRB.byte[7] (BE)
-+ // BE byte i ↔ LE byte (15-i). So VRB.byte[6] (BE) = LE byte 9 of
-+ // VRB, VRB.byte[7] (BE) = LE byte 8. (Byte-pair order matters.)
-+ case 781: // vinsertb
-+ case 845: { // vinserth
-+ getVRBytes(vrt, r); // start from current VRT
-+ if (xo == 845) {
-+ // vinserth: copy 2-byte halfword (BE bytes 6..7 of VRB).
-+ r[15 - uimm] = b[9]; // BE byte UIM ← VRB BE byte 6
-+ r[14 - uimm] = b[8]; // BE byte UIM+1 ← VRB BE byte 7
-+ } else {
-+ // vinsertb: copy a single byte (BE byte 7 of VRB).
-+ r[15 - uimm] = b[8]; // BE byte UIM ← VRB BE byte 7
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+
-+ // === POWER9 vextractub (XO=525) / vextractuh (XO=589) ===
-+ // Extract one byte/halfword from VRB at immediate BE position UIM
-+ // and place it at BE byte 7 of VRT, with all other bytes of VRT
-+ // zeroed. Companion to vinsertb/h; chooses an immediate BE position
-+ // and lands the result at the low byte of VRT (= low byte of mfvsrd).
-+ // vextractub: VRT.byte[7] (BE) ← VRB.byte[UIM] (BE), rest = 0
-+ // vextractuh: VRT.byte[6] (BE) ← VRB.byte[UIM] (BE)
-+ // VRT.byte[7] (BE) ← VRB.byte[UIM+1] (BE), rest = 0
-+ case 525: // vextractub
-+ case 589: { // vextractuh
-+ memset(r, 0, sizeof(r));
-+ if (xo == 589) {
-+ r[9] = b[15 - uimm]; // VRT BE byte 6 ← VRB BE byte UIM
-+ r[8] = b[14 - uimm]; // VRT BE byte 7 ← VRB BE byte UIM+1
-+ } else {
-+ r[8] = b[15 - uimm]; // VRT BE byte 7 ← VRB BE byte UIM
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+
-+ // === POWER10 vinsbrx (XO=783) / vinshrx (XO=847) ===
-+ // Right-indexed (LE-natural) byte/halfword insert from GPR. RA's
-+ // low 4 bits supply the byte position (mod 16); for vinshrx the
-+ // position is also masked to even (& 0xE) so the halfword is
-+ // 2-byte aligned. RB's low 8 / 16 bits are inserted; other bytes
-+ // of VRT are unchanged. RA and RB are GPRs (NOT VRs) — sim's
-+ // pre-fetched `a` and `b` from getVRBytes are unused here.
-+ case 783: // vinsbrx
-+ case 847: { // vinshrx
-+ uint64_t ra_val = U64(getRegister(int(vra)));
-+ uint64_t rb_val = U64(getRegister(int(vrb)));
-+ getVRBytes(vrt, r); // start from current VRT
-+ const bool isHalf = (xo == 847);
-+ const uint32_t pos = isHalf ? uint32_t(ra_val & 0xEULL)
-+ : uint32_t(ra_val & 0xFULL);
-+ r[pos] = (uint8_t)(rb_val & 0xFFULL);
-+ if (isHalf) {
-+ r[pos + 1] = (uint8_t)((rb_val >> 8) & 0xFFULL);
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+
-+ // === POWER10 vinsw (XO=207) / vinsd (XO=463) ===
-+ // VRT[UIM*8:UIM*8+N-1] (BE bits) ← RB low N bits, where N = 32 or 64.
-+ // RB is a GPR (the `vrb` field at sim bits 15..11). UIM is at sim
-+ // bits 20..16 (= the `uimm` / `vra` decode). Other bytes of VRT are
-+ // unchanged, so we read VRT first then patch UIM..UIM+(N/8-1).
-+ case 207: // vinsw
-+ case 463: { // vinsd
-+ uint64_t rb_val = U64(getRegister(int(vrb)));
-+ getVRBytes(vrt, r); // start from current VRT
-+ const int width = (xo == 463) ? 8 : 4; // bytes
-+ // BE byte UIM+i of VRT = LE byte (15 - UIM - i).
-+ // For vinsd, RB.dword[0] (BE) = bits 56..63 of rb_val (host LSB end
-+ // of the GPR — recall U64() puts the canonical 64-bit value in a
-+ // host uint64_t with bit 63 = MSB).
-+ // For vinsw, source is RB[32:63] = low 32 bits of rb_val.
-+ uint64_t src = (width == 8) ? rb_val : (rb_val & 0xFFFFFFFFULL);
-+ const int srcMsbShift = (width * 8) - 8; // 56 or 24
-+ for (int i = 0; i < width; i++) {
-+ r[15 - uimm - i] = (uint8_t)(src >> (srcMsbShift - 8 * i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+
-+ // === POWER8+ vbpermq (XO=1356=0x54C): per-byte bit permute ===
-+ // For each i in 0..15, take VRB BE-byte i (= sim b[15-i]); if its
-+ // high bit is set, perm[i]=0; else perm[i] = bit at BE position
-+ // (low 7 bits) of VRA. ISA says perm[0..15] go into VRT.dw[1] low
-+ // 16 bits, but on real LE silicon the bitmap is observable in dw[0]
-+ // low 16 bits — i.e., recoverable via mfvsrd. Match that observable
-+ // behaviour: write the bitmap into sim bytes[8..9] (where mfvsrd
-+ // reads dw[0] from), zero the rest.
-+ case 1356: {
-+ uint8_t perm[16];
-+ for (int k = 0; k < 16; k++) {
-+ uint8_t ctl = b[15 - k];
-+ if (ctl & 0x80) {
-+ perm[k] = 0;
-+ } else {
-+ int p = ctl & 0x7F;
-+ int le_idx = 15 - (p / 8);
-+ int bit_in_byte = 7 - (p % 8);
-+ perm[k] = (a[le_idx] >> bit_in_byte) & 1;
-+ }
-+ }
-+ uint8_t lo = 0, hi = 0;
-+ for (int k = 0; k < 8; k++) hi = (hi << 1) | perm[k];
-+ for (int k = 8; k < 16; k++) lo = (lo << 1) | perm[k];
-+ for (int i = 0; i < 16; i++) r[i] = 0;
-+ r[8] = lo;
-+ r[9] = hi;
-+ setVRBytes(vrt, r); break;
-+ }
-+
-+ // VA-form ops vmladduhm (XO=34), vsel (XO=42), vperm (XO=43) are
-+ // peeled off in the pre-dispatch above (see "VA-form pre-dispatch"
-+ // comment near the top of this function), since the 11-bit XO
-+ // mask conflates VRC into the case label.
-+
-+ // === Unpack high signed (BE-numbering = LE indices 8..15) ===
-+ // vupkhsb: VRT[i] = sign_extend_to_16(VRA[i+0..7]). On LE storage with
-+ // BE-named "high" being the low-indexed bytes, vupkhsb sign-extends the
-+ // low 8 bytes of VRA into 8 halfwords. PPC64LE wasm calls these the
-+ // "high" lanes per PPC convention; the JIT compensates internally via
-+ // the vupklsb/vupkhsb swap documented in MacroAssembler-ppc64-inl.h.
-+ case 526: // vupkhsb (high signed byte → halfword)
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i, (uint16_t)(int16_t)LANE_S8(b, 8 + i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 590: // vupkhsh (high signed halfword → word)
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i, (uint32_t)(int32_t)LANE_S16(b, 4 + i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 1614: // vupkhsw (high signed word → dword) POWER8+
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i, (uint64_t)(int64_t)LANE_S32(b, 2 + i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 654: // vupklsb (low signed byte → halfword) — PPC LE: takes high lanes
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i, (uint16_t)(int16_t)LANE_S8(b, i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 718: // vupklsh (low signed halfword → word)
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i, (uint32_t)(int32_t)LANE_S16(b, i));
-+ }
-+ setVRBytes(vrt, r); break;
-+ case 1742: // vupklsw (low signed word → dword)
-+ for (int i = 0; i < 2; i++) {
-+ SET_LANE_U64(r, i, (uint64_t)(int64_t)LANE_S32(b, i));
-+ }
-+ setVRBytes(vrt, r); break;
-+
-+ // === Pack (saturate or modulo) ===
-+ //
-+ // vpk* definitions are BE-specified:
-+ // VT.byte[0..7] = saturate(VA.halfword[0..7]), VT.byte[8..15] =
-+ // saturate(VB.halfword[0..7]) (BE-numbered throughout). On
-+ // PPC64LE register storage that inverts to: LE bytes 0-7 = VB's
-+ // saturated halfwords, LE bytes 8-15 = VA's.
-+ //
-+ // vpkshus = XO 270 (s16 → u8 sat)
-+ // vpkshss = XO 398 (s16 → s8 sat)
-+ // vpkswus = XO 334 (s32 → u16 sat)
-+ // vpkswss = XO 462 (s32 → s16 sat)
-+ // The sim previously had three of these four labels rotated
-+ // (270=vpkshss, 334=vpkshus, 398=vpkswus) so every i8x16/i16x8
-+ // narrow_* call silently used the wrong saturation kind or
-+ // lane width — vpkshss was completely absent.
-+ case 398: { // vpkshss (signed halfword → signed byte)
-+ for (int i = 0; i < 8; i++) {
-+ int v = LANE_S16(b, i);
-+ if (v > INT8_MAX) v = INT8_MAX;
-+ if (v < INT8_MIN) v = INT8_MIN;
-+ SET_LANE_U8(r, i, (uint8_t)(int8_t)v);
-+ }
-+ for (int i = 0; i < 8; i++) {
-+ int v = LANE_S16(a, i);
-+ if (v > INT8_MAX) v = INT8_MAX;
-+ if (v < INT8_MIN) v = INT8_MIN;
-+ SET_LANE_U8(r, 8 + i, (uint8_t)(int8_t)v);
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+ case 462: { // vpkswss (signed word → signed halfword)
-+ for (int i = 0; i < 4; i++) {
-+ int64_t v = LANE_S32(b, i);
-+ if (v > INT16_MAX) v = INT16_MAX;
-+ if (v < INT16_MIN) v = INT16_MIN;
-+ SET_LANE_U16(r, i, (uint16_t)(int16_t)v);
-+ }
-+ for (int i = 0; i < 4; i++) {
-+ int64_t v = LANE_S32(a, i);
-+ if (v > INT16_MAX) v = INT16_MAX;
-+ if (v < INT16_MIN) v = INT16_MIN;
-+ SET_LANE_U16(r, 4 + i, (uint16_t)(int16_t)v);
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+ case 270: { // vpkshus (signed halfword → unsigned byte, sat)
-+ for (int i = 0; i < 8; i++) {
-+ int v = LANE_S16(b, i);
-+ if (v > UINT8_MAX) v = UINT8_MAX;
-+ if (v < 0) v = 0;
-+ SET_LANE_U8(r, i, (uint8_t)v);
-+ }
-+ for (int i = 0; i < 8; i++) {
-+ int v = LANE_S16(a, i);
-+ if (v > UINT8_MAX) v = UINT8_MAX;
-+ if (v < 0) v = 0;
-+ SET_LANE_U8(r, 8 + i, (uint8_t)v);
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+ case 334: { // vpkswus (signed word → unsigned halfword, sat)
-+ for (int i = 0; i < 4; i++) {
-+ int64_t v = LANE_S32(b, i);
-+ if (v > UINT16_MAX) v = UINT16_MAX;
-+ if (v < 0) v = 0;
-+ SET_LANE_U16(r, i, (uint16_t)v);
-+ }
-+ for (int i = 0; i < 4; i++) {
-+ int64_t v = LANE_S32(a, i);
-+ if (v > UINT16_MAX) v = UINT16_MAX;
-+ if (v < 0) v = 0;
-+ SET_LANE_U16(r, 4 + i, (uint16_t)v);
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+
-+ // === POWER9 compare not-equal (vcmpne{b,h,w}) — Rc=0 and Rc=1 ===
-+ case 7: // vcmpneb
-+ case 1031: // vcmpneb.
-+ for (int i = 0; i < 16; i++) {
-+ SET_LANE_U8(r, i, LANE_U8(a, i) != LANE_U8(b, i) ? 0xFF : 0);
-+ }
-+ VCMP_DONE(16, 1); break;
-+ case 71: // vcmpneh
-+ case 1095: // vcmpneh.
-+ for (int i = 0; i < 8; i++) {
-+ SET_LANE_U16(r, i, LANE_U16(a, i) != LANE_U16(b, i) ? 0xFFFF : 0);
-+ }
-+ VCMP_DONE(8, 2); break;
-+ case 135: // vcmpnew
-+ case 1159: // vcmpnew.
-+ for (int i = 0; i < 4; i++) {
-+ SET_LANE_U32(r, i,
-+ LANE_U32(a, i) != LANE_U32(b, i) ? 0xFFFFFFFFu : 0);
-+ }
-+ VCMP_DONE(4, 4); break;
-+ #undef VCMP_DONE
-+
-+ // === Population count per byte (POWER8) ===
-+ case 1795: { // vpopcntb (XO 0x703 = 1795). VRA field unused.
-+ for (int i = 0; i < 16; i++) {
-+ SET_LANE_U8(r, i, (uint8_t)__builtin_popcount(LANE_U8(b, i)));
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+
-+ // === vsldoi: VRT = (VRA || VRB) shifted left by SH bytes (SH at bits 22..25) ===
-+ case 44: case 45: case 46: case 47: {
-+ // SH is at bits 22..25 (PPC) → LSB bits 6..9 of the instruction →
-+ // (instructionBits >> 6) & 0xF. Our XO mask already bottoms-out at
-+ // bit 0, so extract from the raw instruction.
-+ uint32_t sh = (instr->instructionBits() >> 6) & 0xF;
-+ uint8_t cat[32];
-+ memcpy(cat, a, 16);
-+ memcpy(cat + 16, b, 16);
-+ for (int i = 0; i < 16; i++) {
-+ r[i] = cat[sh + i];
-+ }
-+ setVRBytes(vrt, r); break;
-+ }
-+
-+
-+ default:
-+ MOZ_CRASH_UNSAFE_PRINTF(
-+ "decodeVMX: unimplemented XO=%u (instruction 0x%08x)", xo,
-+ instr->instructionBits());
-+ }
-+
-+vmx_done:
-+ #undef LANE_U8
-+ #undef LANE_S8
-+ #undef LANE_U16
-+ #undef LANE_S16
-+ #undef LANE_U32
-+ #undef LANE_S32
-+ #undef LANE_U64
-+ #undef LANE_S64
-+ #undef SET_LANE_U8
-+ #undef SET_LANE_U16
-+ #undef SET_LANE_U32
-+ #undef SET_LANE_U64
-+ ; // empty stmt for label
-+}
-+
-+// -----------------------------------------------------------------------------
-+// decodeVSX: Major opcode 60 (XX1-form, XX2-form)
-+// mfvsrd, mtvsrd, mtvsrwz, mtvsrws, xscvdpsp, xscvdpspn, xscvspdp,
-+// xscvspdpn, xxbrd
-+
-+void Simulator::decodeVSX(SimInstruction* instr) {
-+ // VSX major opcode 60 covers XX1/XX2/XX3/XX4 forms. We dispatch XX4
-+ // (xxsel) first because its XO is only 2 bits (at ISA 26-27 = sim
-+ // bits 5-4), and the XC register field at ISA 21-25 would otherwise
-+ // produce 32 different 9-bit XO values to enumerate in the switch.
-+ // Peel off any instruction with XX4 XO=3 (xxsel). No XX2/XX3 op currently
-+ // emitted by the JIT has sim bits (5,4) == 3.
-+ if (instr->bits(5, 4) == 3) {
-+ // xxsel XT,XA,XB,XC (VA-like XX4-form).
-+ // XT[i] = (XA[i] & ~XC[i]) | (XB[i] & XC[i])
-+ // Register fields: XA/XB/XT per-byte; XC at ISA bits 21-25 (sim
-+ // bits 10-6) with CX extension at ISA bit 28 (sim bit 3).
-+ int xa = int(instr->raValue() | (instr->bit(2) << 5));
-+ int xb = int(instr->rbValue() | (instr->bit(1) << 5));
-+ int xt = int(instr->rtValue() | (instr->bit(0) << 5));
-+ int xc = int(instr->bits(10, 6) | (instr->bit(3) << 5));
-+ uint8_t ab[16], bb[16], cb[16], result[16];
-+ getVSR128(xa, ab);
-+ getVSR128(xb, bb);
-+ getVSR128(xc, cb);
-+ for (int i = 0; i < 16; i++) {
-+ result[i] = (uint8_t)((ab[i] & ~cb[i]) | (bb[i] & cb[i]));
-+ }
-+ setVSR128(xt, result);
-+ return;
-+ }
-+
-+ // The remaining forms (XX1/XX2/XX3) share a 9-bit XO at ISA bits
-+ // 21-29 (sim bits 10-2). For XX3 this is (8-bit XO << 1) | AX; for
-+ // XX2 the full 9 bits are the XO (no AX field).
-+ uint32_t xo = instr->bits(10, 2);
-+ uint32_t rt = instr->rtValue();
-+ uint32_t rb = instr->rbValue();
-+
-+ switch (xo) {
-+ // xscvdpsp / xscvdpspn / xscvspdp / xscvspdpn / xxbrd are
-+ // XX2-form: XT/XB are each 6-bit (5-bit field + TX/BX extension at
-+ // sim bits 0/1). Post-Phase-2 the JIT emits these with Simd128
-+ // targets (encoding 32-63), which require the extension bit to
-+ // select VR-space instead of FPR-space. The previous code used
-+ // only the 5-bit field, so any VR-space target silently clobbered
-+ // FPR 0..31 and the post-splat fbits in splatX4 never reached the
-+ // vector lanes.
-+ case 265: {
-+ // xscvdpsp: double→single with sNaN quieting. The ISA says
-+ // result lands at XT[0:31] (BE word 0 = LE bytes 12..15) and
-+ // XT[32:127] is "undefined". Real POWER9 silicon actually
-+ // duplicates the result into BE word 1 as well, so the bytes
-+ // at LE 8..11 hold the same single. The JIT's
-+ // replaceLaneFloat32x4 lowering depends on this: it follows
-+ // xscvdpspn with `xxinsertw …, 12`, which reads XB.word[1]
-+ // (LE bytes 8..11). Zeroing those bytes here would silently
-+ // lose the single under sim. Mirror HW.
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint8_t bb[16];
-+ getVSR128(xb, bb);
-+ // Source double at BE DW0 = LE bytes 8..15 of xb.
-+ uint64_t dbits = 0;
-+ for (int i = 0; i < 8; i++) dbits |= ((uint64_t)bb[8 + i]) << (i * 8);
-+ double frb;
-+ memcpy(&frb, &dbits, sizeof(frb));
-+ float result = demoteDoublePreservingNaN(frb);
-+ uint32_t fbits;
-+ memcpy(&fbits, &result, sizeof(fbits));
-+ if ((fbits & 0x7F800000u) == 0x7F800000u && (fbits & 0x007FFFFFu) != 0) {
-+ fbits |= 0x00400000u;
-+ }
-+ uint8_t out[16];
-+ memset(out, 0, 8);
-+ // BE word 1 (LE 8..11) and BE word 0 (LE 12..15) both = fbits.
-+ for (int off : {8, 12}) {
-+ out[off] = (uint8_t)(fbits);
-+ out[off + 1] = (uint8_t)(fbits >> 8);
-+ out[off + 2] = (uint8_t)(fbits >> 16);
-+ out[off + 3] = (uint8_t)(fbits >> 24);
-+ }
-+ setVSR128(xt, out);
-+ break;
-+ }
-+ case 267: {
-+ // xscvdpspn: same as xscvdpsp but non-signaling. Same HW-observed
-+ // word-1 duplication (see xscvdpsp comment above).
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint8_t bb[16];
-+ getVSR128(xb, bb);
-+ uint64_t dbits = 0;
-+ for (int i = 0; i < 8; i++) dbits |= ((uint64_t)bb[8 + i]) << (i * 8);
-+ double frb;
-+ memcpy(&frb, &dbits, sizeof(frb));
-+ float result = demoteDoublePreservingNaN(frb);
-+ uint32_t fbits;
-+ memcpy(&fbits, &result, sizeof(fbits));
-+ uint8_t out[16];
-+ memset(out, 0, 8);
-+ for (int off : {8, 12}) {
-+ out[off] = (uint8_t)(fbits);
-+ out[off + 1] = (uint8_t)(fbits >> 8);
-+ out[off + 2] = (uint8_t)(fbits >> 16);
-+ out[off + 3] = (uint8_t)(fbits >> 24);
-+ }
-+ setVSR128(xt, out);
-+ break;
-+ }
-+ case 393: {
-+ // xvcvdpsp: convert two doubles to two singles, replicating each
-+ // result across its dword. BE words = [s(BE_dw0), s(BE_dw0),
-+ // s(BE_dw1), s(BE_dw1)]. SIGNALING form per ISA: sNaN inputs are
-+ // quieted (high-order fraction bit set in result).
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint8_t bb[16], out[16];
-+ getVSR128(xb, bb);
-+ uint32_t fbits[2];
-+ // BE_dw0 = LE bytes 8..15, BE_dw1 = LE bytes 0..7.
-+ for (int dw = 0; dw < 2; dw++) {
-+ int leOff = (dw == 0) ? 8 : 0;
-+ uint64_t dbits = 0;
-+ for (int i = 0; i < 8; i++) {
-+ dbits |= ((uint64_t)bb[leOff + i]) << (i * 8);
-+ }
-+ double frb;
-+ memcpy(&frb, &dbits, sizeof(frb));
-+ float result = demoteDoublePreservingNaN(frb);
-+ memcpy(&fbits[dw], &result, sizeof(uint32_t));
-+ if ((fbits[dw] & 0x7F800000u) == 0x7F800000u &&
-+ (fbits[dw] & 0x007FFFFFu) != 0) {
-+ fbits[dw] |= 0x00400000u; // quiet sNaN result
-+ }
-+ }
-+ // LE words: [s(dw1), s(dw1), s(dw0), s(dw0)]
-+ // (LE word 0 = BE word 3 = s(dw1); LE word 3 = BE word 0 = s(dw0)).
-+ uint32_t leWords[4] = {fbits[1], fbits[1], fbits[0], fbits[0]};
-+ for (int w = 0; w < 4; w++) {
-+ out[w * 4] = (uint8_t)leWords[w];
-+ out[w * 4 + 1] = (uint8_t)(leWords[w] >> 8);
-+ out[w * 4 + 2] = (uint8_t)(leWords[w] >> 16);
-+ out[w * 4 + 3] = (uint8_t)(leWords[w] >> 24);
-+ }
-+ setVSR128(xt, out);
-+ break;
-+ }
-+ case 216: // xvcvdpsxws: double → signed word, saturating, RTZ (vector)
-+ case 200: { // xvcvdpuxws: double → unsigned word, saturating, RTZ (vector)
-+ // src1 := XB.dword_BE[0]; src2 := XB.dword_BE[1]
-+ // r1 := ConvertDPtoSat(src1); r2 := ConvertDPtoSat(src2)
-+ // XT.word_BE[0] := r1; XT.word_BE[1] := r1 (replicated)
-+ // XT.word_BE[2] := r2; XT.word_BE[3] := r2 (replicated)
-+ // Saturation: signed clamps to [INT32_MIN, INT32_MAX] with NaN→INT32_MIN;
-+ // unsigned clamps to [0, UINT32_MAX] with NaN→0 and neg→0.
-+ // BE_dw0 = LE bytes 8..15; BE_dw1 = LE bytes 0..7.
-+ bool isSigned = (xo == 216);
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint8_t bb[16], out[16];
-+ getVSR128(xb, bb);
-+ const int srcOffsets[2] = {8, 0}; // BE_dw0 (LE 8..15), BE_dw1 (LE 0..7)
-+ uint32_t results[2];
-+ for (int lane = 0; lane < 2; lane++) {
-+ uint64_t dbits = 0;
-+ for (int j = 0; j < 8; j++) {
-+ dbits |= ((uint64_t)bb[srcOffsets[lane] + j]) << (j * 8);
-+ }
-+ double dval;
-+ memcpy(&dval, &dbits, sizeof(dval));
-+ if (std::isnan(dval)) {
-+ results[lane] = isSigned ? 0x80000000u : 0u;
-+ } else if (isSigned) {
-+ if (dval >= 2147483647.0) {
-+ results[lane] = 0x7FFFFFFFu;
-+ } else if (dval <= -2147483648.0) {
-+ results[lane] = 0x80000000u;
-+ } else {
-+ results[lane] = (uint32_t)(int32_t)dval; // RTZ
-+ }
-+ } else { // unsigned
-+ if (dval <= 0.0) {
-+ results[lane] = 0u;
-+ } else if (dval >= 4294967295.0) {
-+ results[lane] = 0xFFFFFFFFu;
-+ } else {
-+ results[lane] = (uint32_t)dval; // RTZ
-+ }
-+ }
-+ }
-+ // Replicated layout: BE words [r1, r1, r2, r2]; in LE bytes
-+ // [r2, r2, r1, r1] (LE word 0 = BE word 3 = r2, LE word 3 = BE word 0 = r1).
-+ uint32_t leWords[4] = {results[1], results[1], results[0], results[0]};
-+ for (int w = 0; w < 4; w++) {
-+ out[w * 4] = (uint8_t)leWords[w];
-+ out[w * 4 + 1] = (uint8_t)(leWords[w] >> 8);
-+ out[w * 4 + 2] = (uint8_t)(leWords[w] >> 16);
-+ out[w * 4 + 3] = (uint8_t)(leWords[w] >> 24);
-+ }
-+ setVSR128(xt, out);
-+ break;
-+ }
-+ case 248: // xvcvsxwdp: signed word → double (vector)
-+ case 232: { // xvcvuxwdp: unsigned word → double (vector)
-+ // src1 := XB.word_BE[0]; src2 := XB.word_BE[2]
-+ // XT.dword_BE[0] := Convert(src1); XT.dword_BE[1] := Convert(src2)
-+ // BE word 0 = LE bytes 12..15; BE word 2 = LE bytes 4..7.
-+ // Output BE dword 0 = LE bytes 8..15; BE dword 1 = LE bytes 0..7.
-+ // No NaN handling needed (integer source).
-+ bool isSigned = (xo == 248);
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint8_t bb[16], out[16];
-+ getVSR128(xb, bb);
-+ const int srcOffsets[2] = {12, 4};
-+ const int dstOffsets[2] = {8, 0};
-+ for (int lane = 0; lane < 2; lane++) {
-+ uint32_t bits = (uint32_t)bb[srcOffsets[lane]] |
-+ ((uint32_t)bb[srcOffsets[lane] + 1] << 8) |
-+ ((uint32_t)bb[srcOffsets[lane] + 2] << 16) |
-+ ((uint32_t)bb[srcOffsets[lane] + 3] << 24);
-+ double dval = isSigned ? (double)(int32_t)bits : (double)bits;
-+ uint64_t dbits;
-+ memcpy(&dbits, &dval, sizeof(dbits));
-+ for (int i = 0; i < 8; i++) {
-+ out[dstOffsets[lane] + i] = (uint8_t)(dbits >> (i * 8));
-+ }
-+ }
-+ setVSR128(xt, out);
-+ break;
-+ }
-+ case 457: {
-+ // xvcvspdp: convert two singles to two doubles. SIGNALING form
-+ // per ISA: sNaN inputs are quieted in the result (bit 51 set).
-+ // src1 := XB.word_BE[0]; src2 := XB.word_BE[2]
-+ // XT.dword_BE[0] := ConvertSPtoDP(src1)
-+ // XT.dword_BE[1] := ConvertSPtoDP(src2)
-+ // BE word 0 = LE bytes 12..15; BE word 2 = LE bytes 4..7.
-+ // Output BE dword 0 = LE bytes 8..15; BE dword 1 = LE bytes 0..7.
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint8_t bb[16], out[16];
-+ getVSR128(xb, bb);
-+ // src1 from BE word 0 (LE 12..15), output dword at LE 8..15.
-+ // src2 from BE word 2 (LE 4..7), output dword at LE 0..7.
-+ const int srcOffsets[2] = {12, 4}; // LE byte offsets of word_BE[0], word_BE[2]
-+ const int dstOffsets[2] = {8, 0}; // LE byte offsets of dword_BE[0], dword_BE[1]
-+ for (int lane = 0; lane < 2; lane++) {
-+ uint32_t fbits = (uint32_t)bb[srcOffsets[lane]] |
-+ ((uint32_t)bb[srcOffsets[lane] + 1] << 8) |
-+ ((uint32_t)bb[srcOffsets[lane] + 2] << 16) |
-+ ((uint32_t)bb[srcOffsets[lane] + 3] << 24);
-+ float fval;
-+ memcpy(&fval, &fbits, sizeof(fval));
-+ double dval = promoteFloatPreservingNaN(fval);
-+ uint64_t dbits;
-+ memcpy(&dbits, &dval, sizeof(dbits));
-+ if ((dbits & 0x7FF0000000000000ULL) == 0x7FF0000000000000ULL &&
-+ (dbits & 0x000FFFFFFFFFFFFFULL) != 0) {
-+ dbits |= 0x0008000000000000ULL; // quiet sNaN result
-+ }
-+ for (int i = 0; i < 8; i++) {
-+ out[dstOffsets[lane] + i] = (uint8_t)(dbits >> (i * 8));
-+ }
-+ }
-+ setVSR128(xt, out);
-+ break;
-+ }
-+ case 329: {
-+ // xscvspdp: single→double from BE word 0 of XB. SIGNALING form;
-+ // an sNaN input yields a qNaN result with the high-order
-+ // fraction bit (quiet bit) set.
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint8_t bb[16];
-+ getVSR128(xb, bb);
-+ // BE word 0 = LE bytes 12..15 of xb.
-+ uint32_t fbits = (uint32_t)bb[12] |
-+ ((uint32_t)bb[13] << 8) |
-+ ((uint32_t)bb[14] << 16) |
-+ ((uint32_t)bb[15] << 24);
-+ float fval;
-+ memcpy(&fval, &fbits, sizeof(fval));
-+ double dval = promoteFloatPreservingNaN(fval);
-+ uint64_t dbits;
-+ memcpy(&dbits, &dval, sizeof(dbits));
-+ // Quiet any NaN result (signaling form): set bit 51 of mantissa.
-+ if ((dbits & 0x7FF0000000000000ULL) == 0x7FF0000000000000ULL &&
-+ (dbits & 0x000FFFFFFFFFFFFFULL) != 0) {
-+ dbits |= 0x0008000000000000ULL;
-+ }
-+ uint8_t out[16];
-+ memset(out, 0, 8);
-+ for (int i = 0; i < 8; i++) out[8 + i] = (uint8_t)(dbits >> (i * 8));
-+ setVSR128(xt, out);
-+ break;
-+ }
-+ case 331: {
-+ // xscvspdpn: non-signaling variant of xscvspdp.
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint8_t bb[16];
-+ getVSR128(xb, bb);
-+ uint32_t fbits = (uint32_t)bb[12] |
-+ ((uint32_t)bb[13] << 8) |
-+ ((uint32_t)bb[14] << 16) |
-+ ((uint32_t)bb[15] << 24);
-+ float fval;
-+ memcpy(&fval, &fbits, sizeof(fval));
-+ double dval = promoteFloatPreservingNaN(fval);
-+ uint64_t dbits;
-+ memcpy(&dbits, &dval, sizeof(dbits));
-+ uint8_t out[16];
-+ memset(out, 0, 8);
-+ for (int i = 0; i < 8; i++) out[8 + i] = (uint8_t)(dbits >> (i * 8));
-+ setVSR128(xt, out);
-+ break;
-+ }
-+ case 347: {
-+ // POWER9 XX2-form ops sharing XO=347; disambiguated by the 5-bit
-+ // A immediate (sim bits 20..16):
-+ // A=0 -> xsxexpdp (extract biased exponent into 11 LSBs of XT.dw0)
-+ // A=16 -> xscvhpdp (FP16 -> FP64)
-+ // A=17 -> xscvdphp (FP64 -> FP16)
-+ // Half placement: the FP16 value lives at LE bytes 8..9 of
-+ // the VSR (= BE bits 48..63 of
-+ // dword[0]), with the rest of dword[0] zeroed. This matches the
-+ // lxsihzx layout already used by the JIT.
-+ uint32_t aImm = (instr->instructionBits() >> 16) & 0x1F;
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint8_t bb[16], out[16];
-+ getVSR128(xb, bb);
-+ memset(out, 0, 16);
-+ if (aImm == 17) {
-+ // xscvdphp: read FP64 from BE 0..63 of XB (LE bytes 8..15),
-+ // convert to FP16, place at LE bytes 8..9 of XT.
-+ double d;
-+ memcpy(&d, bb + 8, 8);
-+ uint16_t h = js::float16(d).toRawBits();
-+ out[8] = (uint8_t)(h & 0xFF);
-+ out[9] = (uint8_t)((h >> 8) & 0xFF);
-+ } else if (aImm == 16) {
-+ // xscvhpdp: read FP16 from LE bytes 8..9 of XB, convert to FP64,
-+ // place at LE bytes 8..15 of XT.
-+ uint16_t h = (uint16_t)bb[8] | ((uint16_t)bb[9] << 8);
-+ double d = static_cast<double>(js::float16::fromRawBits(h));
-+ memcpy(out + 8, &d, 8);
-+ } else if (aImm == 0) {
-+ // xsxexpdp: read FP64 from LE bytes 8..15 of XB, extract biased
-+ // exponent (bits 1..11 of the IEEE-754 double = bits 52..62 of
-+ // the 64-bit pattern), place into XT.dw0 with rest zeroed.
-+ uint64_t bits = 0;
-+ for (int i = 0; i < 8; i++) bits |= uint64_t(bb[8 + i]) << (i * 8);
-+ uint64_t exp = (bits >> 52) & 0x7FF;
-+ for (int i = 0; i < 8; i++) out[8 + i] = (uint8_t)(exp >> (i * 8));
-+ } else {
-+ MOZ_CRASH_UNSAFE_PRINTF(
-+ "decodeVSX XO=347 with unexpected A=%u (instr 0x%08x)",
-+ aImm, instr->instructionBits());
-+ }
-+ setVSR128(xt, out);
-+ break;
-+ }
-+ case 475: {
-+ // xxbrd: byte-reverse each doubleword.
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint8_t bb[16], out[16];
-+ getVSR128(xb, bb);
-+ for (int i = 0; i < 8; i++) out[i] = bb[7 - i];
-+ for (int i = 0; i < 8; i++) out[8 + i] = bb[15 - i];
-+ setVSR128(xt, out);
-+ break;
-+ }
-+
-+ // === XX3-form scalar: xsmaxjdp / xsminjdp (POWER9) ===
-+ //
-+ // xs{max,min}jdp XT, XA, XB. Scalar inputs at BE bits 0..63 of
-+ // XA / XB (= LE bytes 8..15); result lands at BE 0..63 of XT
-+ // (upper bits "undefined" per ISA).
-+ //
-+ // Semantics match ECMA-262 Math.{max,min} / wasm f64.{max,min}:
-+ // - NaN: if A is NaN return A; else if B is NaN return B. sNaN
-+ // payload preserved bit-for-bit (NOT quieted).
-+ // - ±0 tie: signed-zero ordering. xsmaxjdp returns +0 for any
-+ // mix of (-0, +0); xsminjdp returns -0.
-+ // - Otherwise: standard IEEE max / min.
-+ case 288: case 289: // xsmaxjdp (XO8=144 → 9-bit 288/289)
-+ case 304: case 305: { // xsminjdp (XO8=152 → 9-bit 304/305)
-+ int xa = int(instr->raValue() | (instr->bit(2) << 5));
-+ int xb = int(instr->rbValue() | (instr->bit(1) << 5));
-+ int xt = int(instr->rtValue() | (instr->bit(0) << 5));
-+ uint8_t ab[16], bb[16], out[16];
-+ getVSR128(xa, ab);
-+ getVSR128(xb, bb);
-+ double a, b;
-+ memcpy(&a, ab + 8, 8);
-+ memcpy(&b, bb + 8, 8);
-+ bool isMax = (xo >> 1) == 144;
-+ double r;
-+ if (std::isnan(a)) {
-+ r = a;
-+ } else if (std::isnan(b)) {
-+ r = b;
-+ } else if (a == 0.0 && b == 0.0) {
-+ // Signed-zero ordering: max picks +0, min picks -0.
-+ if (isMax) {
-+ r = std::signbit(a) ? b : a;
-+ } else {
-+ r = std::signbit(a) ? a : b;
-+ }
-+ } else {
-+ r = isMax ? std::max(a, b) : std::min(a, b);
-+ }
-+ memset(out, 0, 8);
-+ memcpy(out + 8, &r, 8);
-+ setVSR128(xt, out);
-+ break;
-+ }
-+
-+ // --- VSX XX3-form: xxpermdi ---
-+ //
-+ // xxpermdi XT, XA, XB, DM:
-+ // XT.DW0 = XA.DW(DM[0])
-+ // XT.DW1 = XB.DW(DM[1])
-+ // In BE, DW0 is MSB-side, DW1 is LSB-side. On PPC64LE register
-+ // storage, DW0 = LE bytes 8-15 and DW1 = LE bytes 0-7. The sim's
-+ // previous implementation used the reversed "DW0 = LE 0-7"
-+ // convention which cancelled for self-swap round-trips but
-+ // produced wrong halves when chained with ISA-correct ops
-+ // (mtvsrd, xxspltw, mfvsrd).
-+ case 20: case 21: // xxpermdi DM=0
-+ case 84: case 85: // xxpermdi DM=1
-+ case 148: case 149: // xxpermdi DM=2 (= xxswapd when XA==XB)
-+ case 212: case 213: { // xxpermdi DM=3
-+ uint8_t dm_hi = (xo >> 7) & 1; // DM[0]
-+ uint8_t dm_lo = (xo >> 6) & 1; // DM[1]
-+ int xa = int(instr->raValue() | (instr->bit(2) << 5));
-+ int xb = int(instr->rbValue() | (instr->bit(1) << 5));
-+ int xt = int(instr->rtValue() | (instr->bit(0) << 5));
-+ uint8_t xa_bytes[16], xb_bytes[16], result[16];
-+ getVSR128(xa, xa_bytes);
-+ getVSR128(xb, xb_bytes);
-+ // DW0 in LE storage is bytes 8-15; DW1 is bytes 0-7.
-+ // XT.DW0 (result[8..15]) = XA.DW(dm_hi)
-+ // XT.DW1 (result[0..7]) = XB.DW(dm_lo)
-+ // DW(0) is at LE 8, DW(1) is at LE 0.
-+ memcpy(result + 8, xa_bytes + (dm_hi ? 0 : 8), 8);
-+ memcpy(result, xb_bytes + (dm_lo ? 0 : 8), 8);
-+ setVSR128(xt, result);
-+ break;
-+ }
-+
-+ // --- VSX logical (XX3-form, primary opcode 60) ---
-+ //
-+ // Each takes two 6-bit VSR sources XA/XB and writes 6-bit VSR
-+ // destination XT. 8-bit ISA XO at bits 21-28; our
-+ // 9-bit XO extraction (bits 10:2) includes the AX bit at position 0,
-+ // so each op appears as two consecutive values (AX=0 and AX=1).
-+ //
-+ // xxland XT,XA,XB XO=130 (9-bit: 260, 261) XT = XA & XB
-+ // xxlandc XT,XA,XB XO=138 (276, 277) XT = XA & ~XB
-+ // xxlor XT,XA,XB XO=146 (292, 293) XT = XA | XB
-+ // xxlxor XT,XA,XB XO=154 (308, 309) XT = XA ^ XB
-+ // xxlnor XT,XA,XB XO=162 (324, 325) XT = ~(XA | XB)
-+ // xxlorc XT,XA,XB XO=170 (340, 341) XT = XA | ~XB
-+ // xxlnand XT,XA,XB XO=178 (356, 357) XT = ~(XA & XB)
-+ // xxleqv XT,XA,XB XO=186 (372, 373) XT = ~(XA ^ XB)
-+ //
-+ // The encoding constants in Assembler-ppc64.h match: PPC_xxlor=0xF0000490
-+ // has bits 4,7,10 set in its base (XO=146 in the 8-bit field), which
-+ // under the simulator's 9-bit extraction gives 2*146=292 (AX=0 default).
-+ case 260: case 261: // xxland
-+ case 276: case 277: // xxlandc
-+ case 292: case 293: // xxlor
-+ case 308: case 309: // xxlxor
-+ case 324: case 325: // xxlnor
-+ case 340: case 341: // xxlorc
-+ case 356: case 357: // xxlnand
-+ case 372: case 373: // xxleqv
-+ {
-+ int xa = int(instr->raValue() | (instr->bit(2) << 5));
-+ int xb = int(instr->rbValue() | (instr->bit(1) << 5));
-+ int xt = int(instr->rtValue() | (instr->bit(0) << 5));
-+ uint8_t a_bytes[16], b_bytes[16], result[16];
-+ getVSR128(xa, a_bytes);
-+ getVSR128(xb, b_bytes);
-+ // Dispatch on the 8-bit ISA XO (ignoring AX bit at position 0).
-+ uint32_t xo8 = xo >> 1;
-+ for (int i = 0; i < 16; i++) {
-+ uint8_t a = a_bytes[i], b = b_bytes[i];
-+ switch (xo8) {
-+ case 130: result[i] = a & b; break; // xxland
-+ case 138: result[i] = a & ~b; break; // xxlandc
-+ case 146: result[i] = a | b; break; // xxlor
-+ case 154: result[i] = a ^ b; break; // xxlxor
-+ case 162: result[i] = (uint8_t)~(a | b); break; // xxlnor
-+ case 170: result[i] = a | (uint8_t)~b; break; // xxlorc
-+ case 178: result[i] = (uint8_t)~(a & b); break; // xxlnand
-+ case 186: result[i] = (uint8_t)~(a ^ b); break; // xxleqv
-+ }
-+ }
-+ setVSR128(xt, result);
-+ break;
-+ }
-+
-+ // === XX2-form: xxspltw (splat word from VRB[UIM] to all 4 lanes) ===
-+ //
-+ // xxspltw: UIM selects one of four words in BE numbering. UIM=0
-+ // → BE word 0 (MSB side of the 128 bits). On PPC64LE register
-+ // storage that maps to LE word (3 - UIM). With the input
-+ // {0x11111111, 0x22222222, 0x33333333, 0x44444444}: UIM=0
-+ // splats 0x44444444 (= LE word 3), UIM=3 splats 0x11111111
-+ // (= LE word 0). The JIT emits xxspltw UIM=1 after mtvsrd on the
-+ // POWER8 splatX4 path — mtvsrd puts the GPR's low 32 bits in BE
-+ // word 1 (= LE word 2 on HW), so xxspltw UIM=1 picks up exactly
-+ // that word and splats it to every lane.
-+ case 164: { // xxspltw
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint32_t uim = (instr->instructionBits() >> 16) & 0x3;
-+ uint32_t leIdx = 3 - uim; // BE word UIM → LE word (3-UIM)
-+ uint8_t bb[16], result[16];
-+ getVSR128(xb, bb);
-+ uint32_t word = (uint32_t)bb[leIdx * 4] |
-+ ((uint32_t)bb[leIdx * 4 + 1] << 8) |
-+ ((uint32_t)bb[leIdx * 4 + 2] << 16) |
-+ ((uint32_t)bb[leIdx * 4 + 3] << 24);
-+ for (int i = 0; i < 4; i++) {
-+ result[i * 4] = (uint8_t)(word & 0xFF);
-+ result[i * 4 + 1] = (uint8_t)((word >> 8) & 0xFF);
-+ result[i * 4 + 2] = (uint8_t)((word >> 16) & 0xFF);
-+ result[i * 4 + 3] = (uint8_t)((word >> 24) & 0xFF);
-+ }
-+ setVSR128(xt, result);
-+ break;
-+ }
-+
-+ // === XX2-form: xxextractuw (extract word at BE byte UIM, place at BE word 1) ===
-+ //
-+ // xxextractuw XT, XB, UIM:
-+ // Bytes [4:7] of XT receive bytes [UIM:UIM+3] of XB. Bytes [0:3]
-+ // and [8:15] of XT are set to zero.
-+ // UIM ∈ {0, 4, 8, 12} (caller responsible for alignment).
-+ // BE byte i ↔ LE byte (15-i), so the word at XB BE bytes UIM..UIM+3
-+ // sits at XB LE bytes (12-UIM)..(15-UIM), and lands at XT LE bytes
-+ // 8..11 (= XT BE word 1).
-+ case 165: { // xxextractuw
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint32_t uim = (instr->instructionBits() >> 16) & 0xF;
-+ uint8_t bb[16], result[16];
-+ getVSR128(xb, bb);
-+ memset(result, 0, sizeof(result));
-+ // result.LE[8..11] = XB.LE[(12-UIM)..(15-UIM)] (preserves byte order).
-+ memcpy(result + 8, bb + (12 - uim), 4);
-+ setVSR128(xt, result);
-+ break;
-+ }
-+
-+ case 180: {
-+ // xxspltib XT, IMM8 (POWER9, ISA 3.0): splat 8-bit immediate to
-+ // all 16 bytes of XT. The encoder writes `imm8 << 11`, so IMM8
-+ // occupies LE bits 11..18; TX bit at LE bit 0 selects upper VSR.
-+ uint32_t imm8 = instr->bits(18, 11);
-+ int xt = int(instr->rtValue() | (instr->bit(0) << 5));
-+ uint8_t xt_bytes[16];
-+ memset(xt_bytes, (uint8_t)imm8, 16);
-+ setVSR128(xt, xt_bytes);
-+ break;
-+ }
-+ case 181: {
-+ // xxinsertw XT, XB, UIM (POWER9, ISA 3.0): copy XB[32..63] (the
-+ // low 32 bits of XB's BE doubleword 0, which lives at LE bytes
-+ // 8-11 of XB) into XT at BE byte position UIM. UIM ∈ {0,4,8,12};
-+ // dest occupies XT LE bytes (12-UIM)..(15-UIM). Other bytes of
-+ // XT are preserved. UIM at PPC bits 11-15 = LE bits 16-20; TX/BX
-+ // at LE bits 0/1.
-+ uint32_t uim = instr->bits(20, 16);
-+ int xt = int(instr->rtValue() | (instr->bit(0) << 5));
-+ int xb = int(instr->rbValue() | (instr->bit(1) << 5));
-+ uint8_t xb_bytes[16], xt_bytes[16];
-+ getVSR128(xb, xb_bytes);
-+ getVSR128(xt, xt_bytes);
-+ memcpy(xt_bytes + (12 - uim), xb_bytes + 8, 4);
-+ setVSR128(xt, xt_bytes);
-+ break;
-+ }
-+
-+ // === XX2-form: xvabssp / xvabsdp (vector absolute value) ===
-+ case 408: case 409: case 410: case 411: { // xvabssp + AX/BX bits
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint8_t bb[16], result[16];
-+ getVSR128(xb, bb);
-+ for (int i = 0; i < 4; i++) {
-+ uint32_t bits = (uint32_t)bb[i * 4] |
-+ ((uint32_t)bb[i * 4 + 1] << 8) |
-+ ((uint32_t)bb[i * 4 + 2] << 16) |
-+ ((uint32_t)bb[i * 4 + 3] << 24);
-+ bits &= 0x7FFFFFFFu; // clear sign bit
-+ result[i * 4] = (uint8_t)(bits & 0xFF);
-+ result[i * 4 + 1] = (uint8_t)((bits >> 8) & 0xFF);
-+ result[i * 4 + 2] = (uint8_t)((bits >> 16) & 0xFF);
-+ result[i * 4 + 3] = (uint8_t)((bits >> 24) & 0xFF);
-+ }
-+ setVSR128(xt, result);
-+ break;
-+ }
-+ case 472: case 473: case 474: { // xvabsdp (475 used by xxbrd)
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint8_t bb[16], result[16];
-+ getVSR128(xb, bb);
-+ for (int i = 0; i < 2; i++) {
-+ uint64_t bits = 0;
-+ for (int k = 0; k < 8; k++) bits |= ((uint64_t)bb[i * 8 + k]) << (k * 8);
-+ bits &= 0x7FFFFFFFFFFFFFFFULL;
-+ for (int k = 0; k < 8; k++) result[i * 8 + k] = (uint8_t)((bits >> (k * 8)) & 0xFF);
-+ }
-+ setVSR128(xt, result);
-+ break;
-+ }
-+
-+ // === XX2-form unary vector float ops (single XB operand, no AX) ===
-+ //
-+ // Encoding: opcode 60, bits 6-10=XT, 11-15 reserved, 16-20=XB,
-+ // 21-29 = 9-bit XO (full field), 30=BX, 31=TX. Extraction gives us
-+ // xo = XO9 directly (no AX bit). Every op below has a unique XO9.
-+ //
-+ // xvsqrtsp XO9=139 PPC_xvsqrtsp=0xF000022C
-+ // xvsqrtdp XO9=203 PPC_xvsqrtdp=0xF000032C
-+ // xvnegsp XO9=441 PPC_xvnegsp=0xF00006E4
-+ // xvnegdp XO9=505 PPC_xvnegdp=0xF00007E4
-+ // xvrspip XO9=169 PPC_xvrspip=0xF00002A4 (round +inf = ceil)
-+ // xvrspiz XO9=153 PPC_xvrspiz=0xF0000264 (round toward 0 = trunc)
-+ // xvrspim XO9=185 PPC_xvrspim=0xF00002E4 (round -inf = floor)
-+ // xvrspic XO9=171 PPC_xvrspic=0xF00002AC (round per FPSCR)
-+ // xvrdpip XO9=233 PPC_xvrdpip=0xF00003A4
-+ // xvrdpiz XO9=217 PPC_xvrdpiz=0xF0000364
-+ // xvrdpim XO9=249 PPC_xvrdpim=0xF00003E4
-+ // xvrdpic XO9=235 PPC_xvrdpic=0xF00003AC
-+ // xvcvspsxws XO9=152 PPC_xvcvspsxws=0xF0000260 (f32 → s32, sat)
-+ // xvcvspuxws XO9=136 PPC_xvcvspuxws=0xF0000220 (f32 → u32, sat)
-+ // xvcvsxwsp XO9=184 PPC_xvcvsxwsp=0xF00002E0 (s32 → f32)
-+ // xvcvuxwsp XO9=168 PPC_xvcvuxwsp=0xF00002A0 (u32 → f32)
-+ case 139: case 203: // xvsqrtsp / xvsqrtdp
-+ case 441: case 505: // xvnegsp / xvnegdp
-+ case 169: case 233: // xvrspip / xvrdpip (ceil)
-+ case 153: case 217: // xvrspiz / xvrdpiz (trunc)
-+ case 185: case 249: // xvrspim / xvrdpim (floor)
-+ case 171: case 235: // xvrspic / xvrdpic (round-to-nearest)
-+ case 136: case 152: // xvcvspuxws / xvcvspsxws
-+ case 168: case 184: { // xvcvuxwsp / xvcvsxwsp
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint8_t bb[16], result[16];
-+ getVSR128(xb, bb);
-+ bool isSp = (xo == 139 || xo == 441 || xo == 169 || xo == 153 ||
-+ xo == 185 || xo == 171 || xo == 136 || xo == 152 ||
-+ xo == 168 || xo == 184);
-+ auto getF32 = [](uint8_t* buf, int i) -> float {
-+ uint32_t b = (uint32_t)buf[i * 4] |
-+ ((uint32_t)buf[i * 4 + 1] << 8) |
-+ ((uint32_t)buf[i * 4 + 2] << 16) |
-+ ((uint32_t)buf[i * 4 + 3] << 24);
-+ float f; memcpy(&f, &b, sizeof(f)); return f;
-+ };
-+ auto setF32 = [](uint8_t* buf, int i, float f) {
-+ uint32_t b; memcpy(&b, &f, sizeof(b));
-+ buf[i*4]=(uint8_t)b; buf[i*4+1]=(uint8_t)(b>>8);
-+ buf[i*4+2]=(uint8_t)(b>>16); buf[i*4+3]=(uint8_t)(b>>24);
-+ };
-+ auto getF64 = [](uint8_t* buf, int i) -> double {
-+ uint64_t b = 0;
-+ for (int k=0;k<8;k++) b |= ((uint64_t)buf[i*8+k])<<(k*8);
-+ double d; memcpy(&d, &b, sizeof(d)); return d;
-+ };
-+ auto setF64 = [](uint8_t* buf, int i, double d) {
-+ uint64_t b; memcpy(&b, &d, sizeof(b));
-+ for (int k=0;k<8;k++) buf[i*8+k]=(uint8_t)(b>>(k*8));
-+ };
-+ // Integer lane read/write (used by conversion ops).
-+ auto setU32 = [](uint8_t* buf, int i, uint32_t v) {
-+ buf[i*4]=(uint8_t)v; buf[i*4+1]=(uint8_t)(v>>8);
-+ buf[i*4+2]=(uint8_t)(v>>16); buf[i*4+3]=(uint8_t)(v>>24);
-+ };
-+ // Saturated float→int conversion per Power ISA v3.0B: input NaN maps
-+ // to 0; out-of-range saturates to the extreme of the destination type.
-+ auto fp2sxw = [](double f) -> uint32_t {
-+ if (std::isnan(f)) return 0;
-+ if (f >= (double)INT32_MAX) return (uint32_t)INT32_MAX;
-+ if (f <= (double)INT32_MIN) return (uint32_t)INT32_MIN;
-+ return (uint32_t)(int32_t)std::trunc(f);
-+ };
-+ auto fp2uxw = [](double f) -> uint32_t {
-+ if (std::isnan(f)) return 0;
-+ if (f >= (double)UINT32_MAX) return UINT32_MAX;
-+ if (f <= 0.0) return 0;
-+ return (uint32_t)std::trunc(f);
-+ };
-+
-+ if (isSp) {
-+ for (int i = 0; i < 4; i++) {
-+ float v = getF32(bb, i);
-+ float out = 0.0f;
-+ uint32_t iout = 0;
-+ bool isInt = false;
-+ switch (xo) {
-+ case 139: out = std::sqrt(v); break; // xvsqrtsp
-+ case 441: out = -v; break; // xvnegsp
-+ case 169: out = std::ceil(v); break; // xvrspip
-+ case 153: out = std::trunc(v); break; // xvrspiz
-+ case 185: out = std::floor(v); break; // xvrspim
-+ case 171: out = std::nearbyint(v); break; // xvrspic
-+ case 152: iout = fp2sxw(v); isInt = true; break; // xvcvspsxws
-+ case 136: iout = fp2uxw(v); isInt = true; break; // xvcvspuxws
-+ case 184: { // xvcvsxwsp
-+ uint32_t bits = (uint32_t)bb[i*4] |
-+ ((uint32_t)bb[i*4+1]<<8) |
-+ ((uint32_t)bb[i*4+2]<<16) |
-+ ((uint32_t)bb[i*4+3]<<24);
-+ out = (float)(int32_t)bits;
-+ break;
-+ }
-+ case 168: { // xvcvuxwsp
-+ uint32_t bits = (uint32_t)bb[i*4] |
-+ ((uint32_t)bb[i*4+1]<<8) |
-+ ((uint32_t)bb[i*4+2]<<16) |
-+ ((uint32_t)bb[i*4+3]<<24);
-+ out = (float)(uint32_t)bits;
-+ break;
-+ }
-+ }
-+ if (isInt) setU32(result, i, iout);
-+ else setF32(result, i, out);
-+ }
-+ } else {
-+ for (int i = 0; i < 2; i++) {
-+ double v = getF64(bb, i);
-+ double out = 0.0;
-+ switch (xo) {
-+ case 203: out = std::sqrt(v); break; // xvsqrtdp
-+ case 505: out = -v; break; // xvnegdp
-+ case 233: out = std::ceil(v); break; // xvrdpip
-+ case 217: out = std::trunc(v); break; // xvrdpiz
-+ case 249: out = std::floor(v); break; // xvrdpim
-+ case 235: out = std::nearbyint(v); break; // xvrdpic
-+ }
-+ setF64(result, i, out);
-+ }
-+ }
-+ setVSR128(xt, result);
-+ break;
-+ }
-+
-+ // === XX3-form vector float compare (eq, gt, ge) ===
-+ // The wasm SIMD compares emit these and use the result as a bitmask.
-+ // Per Power ISA: result is all-1s for true lanes, all-0s for false
-+ // (for the non-recording form; bit 0 of XO selects record form which
-+ // we don't model — wasm doesn't read CR6 here).
-+ // Encodings:
-+ // 0xF0000218 xvcmpeqsp (XO8=67) → XO9 = 134/135 (+AX).
-+ // 0xF0000258 xvcmpgtsp (XO8=75) → XO9 = 150/151.
-+ // 0xF0000298 xvcmpgesp (XO8=83) → XO9 = 166/167.
-+ // 0xF0000318 xvcmpeqdp (XO8=99) → XO9 = 198/199.
-+ // 0xF0000358 xvcmpgtdp (XO8=107) → XO9 = 214/215.
-+ // 0xF0000398 xvcmpgedp (XO8=115) → XO9 = 230/231.
-+ // Rc=1 record form flips ISA bit 21 (sim bit 10), yielding XO9+256
-+ // (not adjacent to the Rc=0 slot). wasm never emits the record form.
-+ case 134: case 135: // xvcmpeqsp (XO8=67)
-+ case 198: case 199: // xvcmpeqdp (XO8=99)
-+ case 150: case 151: // xvcmpgtsp (XO8=75)
-+ case 214: case 215: // xvcmpgtdp (XO8=107)
-+ case 166: case 167: // xvcmpgesp (XO8=83)
-+ case 230: case 231: { // xvcmpgedp (XO8=115)
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ uint32_t ra = instr->raValue();
-+ int xa = int(ra | ((instr->instructionBits() >> 2) & 1) << 5);
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint8_t ab[16], bb[16], result[16];
-+ getVSR128(xa, ab);
-+ getVSR128(xb, bb);
-+ uint32_t op8 = xo >> 1; // canonical 8-bit XO
-+ bool isF32 = (op8 == 67 || op8 == 75 || op8 == 83);
-+ bool isEq = (op8 == 67 || op8 == 99);
-+ bool isGt = (op8 == 75 || op8 == 107);
-+ bool isGe = (op8 == 83 || op8 == 115);
-+ (void)isGe;
-+ auto cmpF32 = [&](int i) -> bool {
-+ uint32_t aBits = (uint32_t)ab[i * 4] |
-+ ((uint32_t)ab[i * 4 + 1] << 8) |
-+ ((uint32_t)ab[i * 4 + 2] << 16) |
-+ ((uint32_t)ab[i * 4 + 3] << 24);
-+ uint32_t bBits = (uint32_t)bb[i * 4] |
-+ ((uint32_t)bb[i * 4 + 1] << 8) |
-+ ((uint32_t)bb[i * 4 + 2] << 16) |
-+ ((uint32_t)bb[i * 4 + 3] << 24);
-+ float fa, fb;
-+ memcpy(&fa, &aBits, sizeof(fa));
-+ memcpy(&fb, &bBits, sizeof(fb));
-+ if (isEq) return fa == fb;
-+ if (isGt) return fa > fb;
-+ return fa >= fb;
-+ };
-+ auto cmpF64 = [&](int i) -> bool {
-+ uint64_t aBits = 0, bBits = 0;
-+ for (int k = 0; k < 8; k++) aBits |= ((uint64_t)ab[i * 8 + k]) << (k * 8);
-+ for (int k = 0; k < 8; k++) bBits |= ((uint64_t)bb[i * 8 + k]) << (k * 8);
-+ double fa, fb;
-+ memcpy(&fa, &aBits, sizeof(fa));
-+ memcpy(&fb, &bBits, sizeof(fb));
-+ if (isEq) return fa == fb;
-+ if (isGt) return fa > fb;
-+ return fa >= fb;
-+ };
-+ if (isF32) {
-+ for (int i = 0; i < 4; i++) {
-+ uint32_t mask = cmpF32(i) ? 0xFFFFFFFFu : 0;
-+ for (int k = 0; k < 4; k++) {
-+ result[i * 4 + k] = (uint8_t)((mask >> (k * 8)) & 0xFF);
-+ }
-+ }
-+ } else {
-+ for (int i = 0; i < 2; i++) {
-+ uint64_t mask = cmpF64(i) ? UINT64_MAX : 0;
-+ for (int k = 0; k < 8; k++) {
-+ result[i * 8 + k] = (uint8_t)((mask >> (k * 8)) & 0xFF);
-+ }
-+ }
-+ }
-+ setVSR128(xt, result);
-+ break;
-+ }
-+
-+ // === XX3-form vector float arithmetic ===
-+ // Encoding: bits 6-10=XT, 11-15=XA, 16-20=XB, 21-28=XO (8 bits), 29=AX,
-+ // 30=BX, 31=TX. We dispatched above using `bits(10, 2)` which is bits
-+ // 21-29 (9 bits) — that includes the AX register-extension bit, which
-+ // changes for every XA in {0..31} vs {32..63}. To match all 4
-+ // (AX,BX) combinations of an XX3 op we use `case xo3 | 0|1|2|3` where
-+ // xo3 = (8-bit XO) << 1 (because XO occupies bits 1..8 of our 9-bit
-+ // extraction). Helper macro: each case covers four labels.
-+ #define XX3_CASE_BASE(name) \
-+ case ((name) | 0): case ((name) | 1):
-+ case 128: case 129: // xvaddsp: 4 × f32 add (XO=64 → bits 1..8 = 128)
-+ case 192: case 193: // xvadddp
-+ case 144: case 145: // xvsubsp
-+ case 208: case 209: // xvsubdp
-+ case 160: case 161: // xvmulsp
-+ case 224: case 225: // xvmuldp
-+ case 176: case 177: // xvdivsp
-+ case 240: case 241: // xvdivdp
-+ case 384: case 385: // xvmaxsp
-+ case 448: case 449: // xvmaxdp
-+ case 400: case 401: // xvminsp
-+ case 464: case 465: // xvmindp
-+ {
-+ // Re-extract the canonical 8-bit XX3 XO.
-+ uint32_t xo3 = (xo >> 1);
-+ (void)xo3;
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ uint32_t ra = instr->raValue();
-+ int xa = int(ra | ((instr->instructionBits() >> 2) & 1) << 5);
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint8_t ab[16], bb[16], rb_bytes[16];
-+ getVSR128(xa, ab);
-+ getVSR128(xb, bb);
-+
-+ auto getF32 = [](uint8_t* buf, int i) -> float {
-+ uint32_t bits = (uint32_t)buf[i * 4] |
-+ ((uint32_t)buf[i * 4 + 1] << 8) |
-+ ((uint32_t)buf[i * 4 + 2] << 16) |
-+ ((uint32_t)buf[i * 4 + 3] << 24);
-+ float f;
-+ memcpy(&f, &bits, sizeof(f));
-+ return f;
-+ };
-+ auto setF32 = [](uint8_t* buf, int i, float f) {
-+ uint32_t bits;
-+ memcpy(&bits, &f, sizeof(bits));
-+ buf[i * 4] = (uint8_t)(bits & 0xFF);
-+ buf[i * 4 + 1] = (uint8_t)((bits >> 8) & 0xFF);
-+ buf[i * 4 + 2] = (uint8_t)((bits >> 16) & 0xFF);
-+ buf[i * 4 + 3] = (uint8_t)((bits >> 24) & 0xFF);
-+ };
-+ auto getF64 = [](uint8_t* buf, int i) -> double {
-+ uint64_t bits = 0;
-+ for (int k = 0; k < 8; k++) bits |= ((uint64_t)buf[i * 8 + k]) << (k * 8);
-+ double d;
-+ memcpy(&d, &bits, sizeof(d));
-+ return d;
-+ };
-+ auto setF64 = [](uint8_t* buf, int i, double d) {
-+ uint64_t bits;
-+ memcpy(&bits, &d, sizeof(bits));
-+ for (int k = 0; k < 8; k++) buf[i * 8 + k] = (uint8_t)((bits >> (k * 8)) & 0xFF);
-+ };
-+
-+ // Dispatch on the canonical 8-bit XX3 XO (bits 21..28 PPC = xo>>1).
-+ switch (xo3) {
-+ case 64: for (int i = 0; i < 4; i++) setF32(rb_bytes, i, getF32(ab, i) + getF32(bb, i)); break; // xvaddsp
-+ case 96: for (int i = 0; i < 2; i++) setF64(rb_bytes, i, getF64(ab, i) + getF64(bb, i)); break; // xvadddp
-+ case 72: for (int i = 0; i < 4; i++) setF32(rb_bytes, i, getF32(ab, i) - getF32(bb, i)); break; // xvsubsp
-+ case 104: for (int i = 0; i < 2; i++) setF64(rb_bytes, i, getF64(ab, i) - getF64(bb, i)); break; // xvsubdp
-+ case 80: for (int i = 0; i < 4; i++) setF32(rb_bytes, i, getF32(ab, i) * getF32(bb, i)); break; // xvmulsp
-+ case 112: for (int i = 0; i < 2; i++) setF64(rb_bytes, i, getF64(ab, i) * getF64(bb, i)); break; // xvmuldp
-+ case 88: for (int i = 0; i < 4; i++) setF32(rb_bytes, i, getF32(ab, i) / getF32(bb, i)); break; // xvdivsp
-+ case 120: for (int i = 0; i < 2; i++) setF64(rb_bytes, i, getF64(ab, i) / getF64(bb, i)); break; // xvdivdp
-+ // xvmin{sp,dp} / xvmax{sp,dp}:
-+ // If both operands are NaN, result is the NaN from XA.
-+ // If exactly one operand is NaN, result is the NON-NaN operand.
-+ // For 0 / -0, treat -0 < +0 (signed-zero ordering): xvminsp(+0,-0)
-+ // = -0, xvmaxsp(+0,-0) = +0, in either operand order.
-+ // Otherwise, result is IEEE min/max(a, b).
-+ // This differs from IEEE 754 (which propagates NaN) and is
-+ // relied upon by wasm relaxed_min/max (bug1946618.js) and by
-+ // wasm f32x4.min(0,-0) returning -0 (simd_f32x4.wast.js).
-+ #define XV_MAX(T, a, b) [](T a_, T b_) -> T { \
-+ bool an = std::isnan(a_), bn = std::isnan(b_); \
-+ if (an && bn) return a_; \
-+ if (an) return b_; \
-+ if (bn) return a_; \
-+ if (a_ == 0.0 && b_ == 0.0) { \
-+ /* -0 is smaller than +0; max picks +0. */ \
-+ return std::signbit(a_) ? b_ : a_; \
-+ } \
-+ return std::max(a_, b_); \
-+ }(a, b)
-+ #define XV_MIN(T, a, b) [](T a_, T b_) -> T { \
-+ bool an = std::isnan(a_), bn = std::isnan(b_); \
-+ if (an && bn) return a_; \
-+ if (an) return b_; \
-+ if (bn) return a_; \
-+ if (a_ == 0.0 && b_ == 0.0) { \
-+ /* -0 is smaller than +0; min picks -0. */ \
-+ return std::signbit(a_) ? a_ : b_; \
-+ } \
-+ return std::min(a_, b_); \
-+ }(a, b)
-+ case 192: for (int i = 0; i < 4; i++) { // xvmaxsp
-+ float a = getF32(ab, i), b = getF32(bb, i);
-+ setF32(rb_bytes, i, XV_MAX(float, a, b));
-+ } break;
-+ case 224: for (int i = 0; i < 2; i++) { // xvmaxdp
-+ double a = getF64(ab, i), b = getF64(bb, i);
-+ setF64(rb_bytes, i, XV_MAX(double, a, b));
-+ } break;
-+ case 200: for (int i = 0; i < 4; i++) { // xvminsp
-+ float a = getF32(ab, i), b = getF32(bb, i);
-+ setF32(rb_bytes, i, XV_MIN(float, a, b));
-+ } break;
-+ case 232: for (int i = 0; i < 2; i++) { // xvmindp
-+ double a = getF64(ab, i), b = getF64(bb, i);
-+ setF64(rb_bytes, i, XV_MIN(double, a, b));
-+ } break;
-+ #undef XV_MAX
-+ #undef XV_MIN
-+ default:
-+ MOZ_CRASH_UNSAFE_PRINTF(
-+ "xv float dispatch missing 8-bit XO=%u (instr 0x%08x)",
-+ xo3, instr->instructionBits());
-+ }
-+ setVSR128(xt, rb_bytes);
-+ break;
-+ }
-+
-+ // === XX3-form fused multiply-add (3-source: XT is also input) ===
-+ //
-+ // xvmaddasp XT,XA,XB: XT = (XA * XB) + XT (fused madd)
-+ // xvmaddadp XT,XA,XB: same for f64
-+ // xvnmsubasp XT,XA,XB: XT = -((XA * XB) - XT) = XT - (XA * XB)
-+ // xvnmsubadp XT,XA,XB: same for f64
-+ //
-+ // Encodings (each +AX): XO8 → XO9 pairs
-+ // xvmaddasp PPC_xvmaddasp=0xF0000208 XO8=65 → XO9 130/131
-+ // xvmaddadp PPC_xvmaddadp=0xF0000308 XO8=97 → XO9 194/195
-+ // xvnmsubasp PPC_xvnmsubasp=0xF0000688 XO8=209 → XO9 418/419
-+ // xvnmsubadp PPC_xvnmsubadp=0xF0000788 XO8=241 → XO9 482/483
-+ // std::fma gives IEEE-correct single-rounding behaviour matching the
-+ // Power ISA definition of these fused forms.
-+ case 130: case 131: // xvmaddasp
-+ case 194: case 195: // xvmaddadp
-+ case 418: case 419: // xvnmsubasp
-+ case 482: case 483: { // xvnmsubadp
-+ int xt = int(rt | (instr->bit(0) << 5));
-+ uint32_t ra = instr->raValue();
-+ int xa = int(ra | ((instr->instructionBits() >> 2) & 1) << 5);
-+ int xb = int(rb | ((instr->instructionBits() >> 1) & 1) << 5);
-+ uint8_t ab[16], bb[16], tb[16];
-+ getVSR128(xa, ab);
-+ getVSR128(xb, bb);
-+ getVSR128(xt, tb); // XT is also an input (accumulator).
-+ bool isSp = (xo == 130 || xo == 131 || xo == 418 || xo == 419);
-+ bool isNmsub = (xo == 418 || xo == 419 || xo == 482 || xo == 483);
-+ auto rdF32 = [](uint8_t* buf, int i) -> float {
-+ uint32_t b = (uint32_t)buf[i * 4] |
-+ ((uint32_t)buf[i * 4 + 1] << 8) |
-+ ((uint32_t)buf[i * 4 + 2] << 16) |
-+ ((uint32_t)buf[i * 4 + 3] << 24);
-+ float f; memcpy(&f, &b, sizeof(f)); return f;
-+ };
-+ auto wrF32 = [](uint8_t* buf, int i, float f) {
-+ uint32_t b; memcpy(&b, &f, sizeof(b));
-+ buf[i*4]=(uint8_t)b; buf[i*4+1]=(uint8_t)(b>>8);
-+ buf[i*4+2]=(uint8_t)(b>>16); buf[i*4+3]=(uint8_t)(b>>24);
-+ };
-+ auto rdF64 = [](uint8_t* buf, int i) -> double {
-+ uint64_t b = 0;
-+ for (int k=0;k<8;k++) b |= ((uint64_t)buf[i*8+k])<<(k*8);
-+ double d; memcpy(&d, &b, sizeof(d)); return d;
-+ };
-+ auto wrF64 = [](uint8_t* buf, int i, double d) {
-+ uint64_t b; memcpy(&b, &d, sizeof(b));
-+ for (int k=0;k<8;k++) buf[i*8+k]=(uint8_t)(b>>(k*8));
-+ };
-+ uint8_t result[16];
-+ if (isSp) {
-+ for (int i = 0; i < 4; i++) {
-+ float a = rdF32(ab, i), b = rdF32(bb, i), t = rdF32(tb, i);
-+ // madd: t + a*b ; nmsub: -(a*b - t) = t - a*b = std::fma(a,b,-t) negated.
-+ float out = isNmsub ? -std::fma(a, b, -t)
-+ : std::fma(a, b, t);
-+ wrF32(result, i, out);
-+ }
-+ } else {
-+ for (int i = 0; i < 2; i++) {
-+ double a = rdF64(ab, i), b = rdF64(bb, i), t = rdF64(tb, i);
-+ double out = isNmsub ? -std::fma(a, b, -t)
-+ : std::fma(a, b, t);
-+ wrF64(result, i, out);
-+ }
-+ }
-+ setVSR128(xt, result);
-+ break;
-+ }
-+
-+ default:
-+ MOZ_CRASH_UNSAFE_PRINTF(
-+ "decodeVSX: unimplemented XO=%u (instruction 0x%08x)", xo,
-+ instr->instructionBits());
-+ }
-+}
-+
-+// =============================================================================
-+// Power ISA v3.1 prefixed instructions (POWER10).
-+// =============================================================================
-+//
-+// A prefixed instruction is 8 bytes: a 4-byte prefix word (primary opcode 1)
-+// followed by a 4-byte suffix word. Prefix and suffix must lie in the same
-+// 64-byte aligned block — the JIT must guarantee this when emitting; the sim
-+// asserts.
-+//
-+// Prefix word layout (BE bit numbering):
-+// [0..5] primary opcode = 1
-+// [6..7] Type (00 = 8LS, 10 = MLS — only forms we implement)
-+// [8..10] reserved (must be 0)
-+// [11] R (1 = PC-relative; RA must be 0)
-+// [12..13] reserved (must be 0)
-+// [14..31] d0 (high 18 bits of the 34-bit signed immediate)
-+//
-+// Suffix word (MLS/8LS form, GPR-target instructions like paddi/pld):
-+// [0..5] suffix primary opcode (selects the actual instruction)
-+// [6..10] RT (or RS for stores)
-+// [11..15] RA
-+// [16..31] d1 (low 16 bits of immediate)
-+//
-+// Suffix word (8LS plxv quirk): the suffix opcode field is only 5 bits
-+// wide and bit [5] holds TX, the high bit of the 6-bit XT VSR number:
-+// [0..4] plxv suffix opcode = 11001 (= 25)
-+// [5] TX
-+// [6..10] T
-+// [11..15] RA
-+// [16..31] d1
-+// Combined: XT = (TX << 5) | T. (Equivalent: full 6-bit field at [0..5]
-+// is 0b11001(TX) — values 50 or 51 in our LE bits 31..26.)
-+//
-+// Combined immediate: SI = sign_extend((d0 << 16) | d1, 34).
-+// EA when R=1: address-of-prefix + SI. (RA must be 0.)
-+// EA when R=0: (RA == 0 ? 0 : GPR[RA]) + SI.
-+//
-+// Suffix opcodes implemented here:
-+// MLS (Type 2) / suffix=14 paddi
-+// MLS (Type 2) / suffix=48 plfs (load FP single, widens to double)
-+// MLS (Type 2) / suffix=50 plfd (load FP double)
-+// 8LS (Type 0) / suffix=57 pld
-+// 8LS (Type 0) / 5-bit suffix=25, bit 26 = TX plxv
-+//
-+// Verification recipe when adding more: assemble with `gcc -mcpu=power10
-+// -c` (or clang) and compare the emitted bytes against the encoder; encode
-+// in a small inline-asm program and step through under this sim.
-+
-+void Simulator::decodePrefixed(SimInstruction* prefix) {
-+ // Prefix and suffix must reside in the same 64-byte block.
-+ uint64_t prefixAddr = reinterpret_cast<uint64_t>(prefix);
-+ MOZ_ASSERT((prefixAddr & 63) <= 56,
-+ "POWER10 prefixed instruction crosses 64-byte boundary");
-+
-+ SimInstruction* suffix = reinterpret_cast<SimInstruction*>(
-+ reinterpret_cast<uint8_t*>(prefix) + SimInstruction::kInstrSize);
-+
-+ uint32_t type = prefix->bits(25, 24);
-+ uint32_t R = prefix->bit(20);
-+ uint32_t d0 = prefix->bits(17, 0); // 18 bits
-+ uint32_t suffixOp6 = suffix->bits(31, 26); // 6-bit form (paddi, pld)
-+ uint32_t suffixOp5 = suffix->bits(31, 27); // 5-bit form (plxv)
-+ uint32_t plxvTX = suffix->bit(26);
-+ uint32_t rt = suffix->rtValue();
-+ uint32_t ra = suffix->raValue();
-+ uint32_t d1 = suffix->uimm16Value();
-+
-+ // Reassemble 34-bit signed displacement.
-+ int64_t imm34 = (static_cast<int64_t>(d0) << 16) | d1;
-+ imm34 = (imm34 << 30) >> 30; // sign-extend from bit 33
-+
-+ // R=1 forms require RA=0 per the ISA.
-+ MOZ_ASSERT(!R || ra == 0,
-+ "POWER10 prefixed R=1 form requires RA=0");
-+
-+ // Type 2 = MLS, Type 0 = 8LS. Other types are reserved here.
-+ if (type == 2 && suffixOp6 == 14) {
-+ // paddi RT, RA, SI, R (MLS)
-+ int64_t base = R ? static_cast<int64_t>(prefixAddr)
-+ : (ra == 0 ? 0 : getRegister(ra));
-+ setRegister(rt, base + imm34);
-+ } else if (type == 0 && suffixOp6 == 57) {
-+ // pld RT, D(RA), R (8LS)
-+ uint64_t ea = R ? prefixAddr + static_cast<uint64_t>(imm34)
-+ : (ra == 0 ? 0 : getRegister(ra)) +
-+ static_cast<uint64_t>(imm34);
-+ if (!handleWasmSegFault(ea, 8)) {
-+ setRegister(rt, readDW(ea, prefix));
-+ }
-+ } else if (type == 2 && suffixOp6 == 50) {
-+ // plfd FRT, D(RA), R (MLS) — load 8-byte double into FPR.
-+ uint64_t ea = R ? prefixAddr + static_cast<uint64_t>(imm34)
-+ : (ra == 0 ? 0 : getRegister(ra)) +
-+ static_cast<uint64_t>(imm34);
-+ if (!handleWasmSegFault(ea, 8)) {
-+ setFpuRegisterDouble(rt, readD(ea, prefix));
-+ }
-+ } else if (type == 2 && suffixOp6 == 48) {
-+ // plfs FRT, D(RA), R (MLS) — load 4-byte single, widen NaN-preserving.
-+ uint64_t ea = R ? prefixAddr + static_cast<uint64_t>(imm34)
-+ : (ra == 0 ? 0 : getRegister(ra)) +
-+ static_cast<uint64_t>(imm34);
-+ if (!handleWasmSegFault(ea, 4)) {
-+ float val = *reinterpret_cast<float*>(ea);
-+ setFpuRegisterDouble(rt, promoteFloatPreservingNaN(val));
-+ }
-+ } else if (type == 0 && suffixOp5 == 25) {
-+ // plxv XT, D(RA), R (8LS) — XT = (TX << 5) | T, TX at suffix bit 26.
-+ int xt = static_cast<int>(rt | (plxvTX << 5));
-+ uint64_t ea = R ? prefixAddr + static_cast<uint64_t>(imm34)
-+ : (ra == 0 ? 0 : getRegister(ra)) +
-+ static_cast<uint64_t>(imm34);
-+ if (!handleWasmSegFault(ea, 16)) {
-+ uint8_t buf[16];
-+ memcpy(buf, reinterpret_cast<const void*>(ea), 16);
-+ setVSR128(xt, buf);
-+ }
-+ } else if (type == 0 && suffixOp6 == 61) {
-+ // pstd RS, D(RA), R (8LS) — store doubleword.
-+ uint64_t ea = R ? prefixAddr + static_cast<uint64_t>(imm34)
-+ : (ra == 0 ? 0 : getRegister(ra)) +
-+ static_cast<uint64_t>(imm34);
-+ if (!handleWasmSegFault(ea, 8)) {
-+ writeDW(ea, getRegister(rt), prefix);
-+ }
-+ } else if (type == 2 && suffixOp6 == 54) {
-+ // pstfd FRS, D(RA), R (MLS) — store double.
-+ uint64_t ea = R ? prefixAddr + static_cast<uint64_t>(imm34)
-+ : (ra == 0 ? 0 : getRegister(ra)) +
-+ static_cast<uint64_t>(imm34);
-+ if (!handleWasmSegFault(ea, 8)) {
-+ writeD(ea, getFpuRegisterDouble(rt), prefix);
-+ }
-+ } else if (type == 2 && suffixOp6 == 52) {
-+ // pstfs FRS, D(RA), R (MLS) — store single (narrow from double in FPR).
-+ uint64_t ea = R ? prefixAddr + static_cast<uint64_t>(imm34)
-+ : (ra == 0 ? 0 : getRegister(ra)) +
-+ static_cast<uint64_t>(imm34);
-+ if (!handleWasmSegFault(ea, 4)) {
-+ double dval = getFpuRegisterDouble(rt);
-+ *reinterpret_cast<float*>(ea) = demoteDoublePreservingNaN(dval);
-+ }
-+ } else if (type == 0 && suffixOp5 == 27) {
-+ // pstxv XS, D(RA), R (8LS) — XS = (SX << 5) | S, SX at suffix bit 26.
-+ int xs = static_cast<int>(rt | (plxvTX << 5));
-+ uint64_t ea = R ? prefixAddr + static_cast<uint64_t>(imm34)
-+ : (ra == 0 ? 0 : getRegister(ra)) +
-+ static_cast<uint64_t>(imm34);
-+ if (!handleWasmSegFault(ea, 16)) {
-+ uint8_t buf[16];
-+ getVSR128(xs, buf);
-+ memcpy(reinterpret_cast<void*>(ea), buf, 16);
-+ }
-+ } else {
-+ MOZ_CRASH_UNSAFE_PRINTF(
-+ "decodePrefixed: unimplemented type=%u "
-+ "(prefix 0x%08x, suffix 0x%08x)",
-+ type, prefix->instructionBits(), suffix->instructionBits());
-+ }
-+
-+ // Advance past the full 8-byte prefixed instruction unless a handler
-+ // already redirected the PC. The caller (instructionDecode) returns
-+ // immediately after us, so its 4-byte trailing advance is skipped.
-+ if (!pc_modified_) {
-+ set_pc(static_cast<int64_t>(prefixAddr) + 2 * SimInstruction::kInstrSize);
-+ }
-+}
-+
-+// =============================================================================
-+// Top-level instruction decoder.
-+// =============================================================================
-+
-+void Simulator::instructionDecode(SimInstruction* instr) {
-+ if (!SimulatorProcess::ICacheCheckingDisableCount) {
-+ AutoLockSimulatorCache als;
-+ SimulatorProcess::checkICacheLocked(instr);
-+ }
-+ pc_modified_ = false;
-+
-+ uint32_t instrBits = instr->instructionBits();
-+
-+ // Check for kCallRedirInstr first (PPC_stop = 0x4C0002E4).
-+ if (instrBits == kCallRedirInstr) {
-+ softwareInterrupt(instr);
-+ if (!pc_modified_) {
-+ set_pc(reinterpret_cast<int64_t>(instr) + SimInstruction::kInstrSize);
-+ }
-+ return;
-+ }
-+
-+ // Check for PPC_trap (0x7FE00008).
-+ if (instrBits == 0x7FE00008) {
-+ softwareInterrupt(instr);
-+ if (!pc_modified_) {
-+ set_pc(reinterpret_cast<int64_t>(instr) + SimInstruction::kInstrSize);
-+ }
-+ return;
-+ }
-+
-+ uint32_t opcode = instr->opcode();
-+
-+ // Power ISA v3.1 prefixed instructions: primary opcode 1 marks a
-+ // 4-byte prefix word followed by a 4-byte suffix word. decodePrefixed
-+ // advances the PC by the full 8 bytes (or leaves it modified for
-+ // PC-relative side-effects).
-+ if (opcode == 1) {
-+ decodePrefixed(instr);
-+ return;
-+ }
-+
-+ switch (opcode) {
-+ // D-form ALU
-+ case 3: // twi
-+ case 7: // mulli
-+ case 8: // subfic
-+ case 10: // cmpli
-+ case 11: // cmpi
-+ case 12: // addic
-+ case 13: // addic.
-+ case 14: // addi
-+ case 15: // addis
-+ case 24: // ori
-+ case 25: // oris
-+ case 26: // xori
-+ case 27: // xoris
-+ case 28: // andi.
-+ case 29: // andis.
-+ decodeDFormALU(instr);
-+ break;
-+
-+ // D-form loads
-+ case 32: // lwz
-+ case 33: // lwzu
-+ case 34: // lbz
-+ case 35: // lbzu
-+ case 40: // lhz
-+ case 41: // lhzu
-+ case 42: // lha
-+ case 43: // lhau
-+ case 48: // lfs
-+ case 49: // lfsu
-+ case 50: // lfd
-+ case 51: // lfdu
-+ decodeDFormLoad(instr);
-+ break;
-+
-+ // D-form stores
-+ case 36: // stw
-+ case 38: // stb
-+ case 39: // stbu
-+ case 44: // sth
-+ case 45: // sthu
-+ case 52: // stfs
-+ case 53: // stfsu
-+ case 54: // stfd
-+ case 55: // stfdu
-+ decodeDFormStore(instr);
-+ break;
-+
-+ // DS-form
-+ case 58: // ld, ldu, lwa
-+ case 62: // std, stdu
-+ decodeDSForm(instr);
-+ break;
-+
-+ // B-form conditional branch
-+ case 16:
-+ decodeBranch(instr);
-+ break;
-+
-+ // SC (system call) - unused in JIT
-+ case 17:
-+ MOZ_CRASH("Simulator: sc instruction not supported");
-+ break;
-+
-+ // I-form unconditional branch
-+ case 18:
-+ decodeBranch(instr);
-+ break;
-+
-+ // XL-form (branch to LR/CTR, CR operations)
-+ case 19:
-+ decodeBranch(instr);
-+ break;
-+
-+ // M-form / MD-form rotate/mask
-+ case 20: // rlwimi
-+ case 21: // rlwinm
-+ case 23: // rlwnm
-+ case 30: // rldicl, rldicr, rldic, rldimi, rldcl, rldcr
-+ decodeRotateMask(instr);
-+ break;
-+
-+ // VMX (AltiVec) — primary opcode 4. Vector arithmetic / compare / shift /
-+ // splat / merge / pack / unpack on VR0-VR31. The wasm SIMD lowering
-+ // emits these directly (Simd128 lives in the VR namespace).
-+ case 4:
-+ decodeVMX(instr);
-+ break;
-+
-+ // X-form / XO-form
-+ case 31:
-+ decodeXForm(instr);
-+ break;
-+
-+ // FP single (A-form)
-+ case 59:
-+ decodeFP(instr);
-+ break;
-+
-+ // VSX (XX1-form)
-+ case 60:
-+ decodeVSX(instr);
-+ break;
-+
-+ // FP double (X-form / A-form)
-+ case 63:
-+ decodeFP(instr);
-+ break;
-+
-+ default:
-+ MOZ_CRASH_UNSAFE_PRINTF(
-+ "instructionDecode: unsupported opcode %u (instruction 0x%08x)",
-+ opcode, instrBits);
-+ }
-+
-+ if (!pc_modified_) {
-+ set_pc(reinterpret_cast<int64_t>(instr) + SimInstruction::kInstrSize);
-+ }
-+}
-+
-+// =============================================================================
-+// Single-stepping / execute loop.
-+// =============================================================================
-+
-+void Simulator::enable_single_stepping(SingleStepCallback cb, void* arg) {
-+ single_stepping_ = true;
-+ single_step_callback_ = cb;
-+ single_step_callback_arg_ = arg;
-+ single_step_callback_(single_step_callback_arg_, this, (void*)get_pc());
-+}
-+
-+void Simulator::disable_single_stepping() {
-+ if (!single_stepping_) {
-+ return;
-+ }
-+ single_step_callback_(single_step_callback_arg_, this, (void*)get_pc());
-+ single_stepping_ = false;
-+ single_step_callback_ = nullptr;
-+ single_step_callback_arg_ = nullptr;
-+}
-+
-+template <bool enableStopSimAt>
-+void Simulator::execute() {
-+ if (single_stepping_ && getenv("PPC64_TRACE_SIM")) {
-+ fprintf(stderr, "[sim] enter execute pc=0x%lx lr=0x%lx fp=0x%lx sp=0x%lx\n",
-+ (long)get_pc(), (long)getLR(), (long)getRegister(fp),
-+ (long)getRegister(sp));
-+ }
-+ if (single_stepping_) {
-+ single_step_callback_(single_step_callback_arg_, this, nullptr);
-+ }
-+
-+ int64_t program_counter = get_pc();
-+
-+ while (program_counter != end_sim_pc) {
-+ if (enableStopSimAt && (icount_ == Simulator::StopSimAt)) {
-+ ppc64Debugger dbg(this);
-+ dbg.debug();
-+ } else {
-+ if (single_stepping_) {
-+ if (getenv("PPC64_TRACE_SIM")) {
-+ fprintf(stderr,
-+ "[sim] step icount=%llu pc=0x%lx instr=0x%08x lr=0x%lx fp=0x%lx sp=0x%lx\n",
-+ (unsigned long long)icount_, (long)program_counter,
-+ *(uint32_t*)program_counter, (long)getLR(),
-+ (long)getRegister(fp), (long)getRegister(sp));
-+ }
-+ single_step_callback_(single_step_callback_arg_, this,
-+ (void*)program_counter);
-+ }
-+ SimInstruction* instr =
-+ reinterpret_cast<SimInstruction*>(program_counter);
-+ instructionDecode(instr);
-+ icount_++;
-+ }
-+ program_counter = get_pc();
-+ }
-+
-+ if (single_stepping_) {
-+ single_step_callback_(single_step_callback_arg_, this, nullptr);
-+ }
-+}
-+
-+// =============================================================================
-+// callInternal / call.
-+// =============================================================================
-+
-+void Simulator::callInternal(uint8_t* entry) {
-+ // Prepare to execute the code at entry.
-+ setRegister(pc, reinterpret_cast<int64_t>(entry));
-+ // The simulation stops when returning to this call point (LR == end_sim_pc).
-+ setLR(end_sim_pc);
-+
-+ // Remember the values of callee-saved registers (r14-r31 in ELFv2).
-+ int64_t r14_val = getRegister(r14);
-+ int64_t r15_val = getRegister(r15);
-+ int64_t r16_val = getRegister(r16);
-+ int64_t r17_val = getRegister(r17);
-+ int64_t r18_val = getRegister(r18);
-+ int64_t r19_val = getRegister(r19);
-+ int64_t r20_val = getRegister(r20);
-+ int64_t r21_val = getRegister(r21);
-+ int64_t r22_val = getRegister(r22);
-+ int64_t r23_val = getRegister(r23);
-+ int64_t r24_val = getRegister(r24);
-+ int64_t r25_val = getRegister(r25);
-+ int64_t r26_val = getRegister(r26);
-+ int64_t r27_val = getRegister(r27);
-+ int64_t r28_val = getRegister(r28);
-+ int64_t r29_val = getRegister(r29);
-+ int64_t r30_val = getRegister(r30);
-+ int64_t r31_val = getRegister(r31);
-+ int64_t sp_val = getRegister(sp);
-+
-+#ifdef DEBUG
-+ // Set up callee-saved registers with a known value to detect clobbers.
-+ // DEBUG-only: in release this would silently corrupt every JS-jit-entry
-+ // stub frame, since the stub saves r14-r31 to its stack early on. Any
-+ // single-step-profiling sample taken later (or any unwind through the
-+ // stub's saved CSR area) then dereferences `icount_` as a frame
-+ // pointer and crashes — see e.g. wasm/profiling.js, ion-error-*.js,
-+ // ion-lazy-tables.js, ion-callerfp-tag.js, return-call-profiling.js,
-+ // externref-global-postbarrier.js, builtin-modules/i8vecmul.js,
-+ // asm.js/testBug1357053.js (all single-step-profiling tests). In
-+ // debug builds the value collides with the same callsites but the
-+ // MOZ_ASSERTs below catch any actual ABI violation, which is the
-+ // entire point.
-+ int64_t callee_saved_value = icount_;
-+ setRegister(r14, callee_saved_value);
-+ setRegister(r15, callee_saved_value);
-+ setRegister(r16, callee_saved_value);
-+ setRegister(r17, callee_saved_value);
-+ setRegister(r18, callee_saved_value);
-+ setRegister(r19, callee_saved_value);
-+ setRegister(r20, callee_saved_value);
-+ setRegister(r21, callee_saved_value);
-+ setRegister(r22, callee_saved_value);
-+ setRegister(r23, callee_saved_value);
-+ setRegister(r24, callee_saved_value);
-+ setRegister(r25, callee_saved_value);
-+ setRegister(r26, callee_saved_value);
-+ setRegister(r27, callee_saved_value);
-+ setRegister(r28, callee_saved_value);
-+ setRegister(r29, callee_saved_value);
-+ setRegister(r30, callee_saved_value);
-+ setRegister(r31, callee_saved_value);
-+#endif
-+
-+ // Start the simulation.
-+ if (Simulator::StopSimAt != -1) {
-+ execute<true>();
-+ } else {
-+ execute<false>();
-+ }
-+
-+#ifdef DEBUG
-+ // Check that the callee-saved registers have been preserved.
-+ MOZ_ASSERT(callee_saved_value == getRegister(r14));
-+ MOZ_ASSERT(callee_saved_value == getRegister(r15));
-+ MOZ_ASSERT(callee_saved_value == getRegister(r16));
-+ MOZ_ASSERT(callee_saved_value == getRegister(r17));
-+ MOZ_ASSERT(callee_saved_value == getRegister(r18));
-+ MOZ_ASSERT(callee_saved_value == getRegister(r19));
-+ MOZ_ASSERT(callee_saved_value == getRegister(r20));
-+ MOZ_ASSERT(callee_saved_value == getRegister(r21));
-+ MOZ_ASSERT(callee_saved_value == getRegister(r22));
-+ MOZ_ASSERT(callee_saved_value == getRegister(r23));
-+ MOZ_ASSERT(callee_saved_value == getRegister(r24));
-+ MOZ_ASSERT(callee_saved_value == getRegister(r25));
-+ MOZ_ASSERT(callee_saved_value == getRegister(r26));
-+ MOZ_ASSERT(callee_saved_value == getRegister(r27));
-+ MOZ_ASSERT(callee_saved_value == getRegister(r28));
-+ MOZ_ASSERT(callee_saved_value == getRegister(r29));
-+ MOZ_ASSERT(callee_saved_value == getRegister(r30));
-+ MOZ_ASSERT(callee_saved_value == getRegister(r31));
-+#endif
-+
-+ // Restore callee-saved registers.
-+ setRegister(r14, r14_val);
-+ setRegister(r15, r15_val);
-+ setRegister(r16, r16_val);
-+ setRegister(r17, r17_val);
-+ setRegister(r18, r18_val);
-+ setRegister(r19, r19_val);
-+ setRegister(r20, r20_val);
-+ setRegister(r21, r21_val);
-+ setRegister(r22, r22_val);
-+ setRegister(r23, r23_val);
-+ setRegister(r24, r24_val);
-+ setRegister(r25, r25_val);
-+ setRegister(r26, r26_val);
-+ setRegister(r27, r27_val);
-+ setRegister(r28, r28_val);
-+ setRegister(r29, r29_val);
-+ setRegister(r30, r30_val);
-+ setRegister(r31, r31_val);
-+ setRegister(sp, sp_val);
-+}
-+
-+int64_t Simulator::call(uint8_t* entry, int argument_count, ...) {
-+ va_list parameters;
-+ va_start(parameters, argument_count);
-+
-+ int64_t original_stack = getRegister(sp);
-+ // Compute position of stack on entry to generated code.
-+ int64_t entry_stack = original_stack;
-+ if (argument_count > kCArgSlotCount) {
-+ entry_stack = entry_stack - argument_count * sizeof(int64_t);
-+ } else {
-+ entry_stack = entry_stack - kCArgsSlotsSize;
-+ }
-+
-+ entry_stack &= ~U64(ABIStackAlignment - 1);
-+
-+ intptr_t* stack_argument = reinterpret_cast<intptr_t*>(entry_stack);
-+
-+ // PPC64 ELFv2: first 8 integer args go in r3-r10.
-+ for (int i = 0; i < argument_count; i++) {
-+ js::jit::Register argReg;
-+ if (GetIntArgReg(i, &argReg)) {
-+ setRegister(argReg.code(), va_arg(parameters, int64_t));
-+ } else {
-+ stack_argument[i] = va_arg(parameters, int64_t);
-+ }
-+ }
-+
-+ va_end(parameters);
-+ setRegister(sp, entry_stack);
-+
-+ callInternal(entry);
-+
-+ MOZ_ASSERT(entry_stack == getRegister(sp));
-+ setRegister(sp, original_stack);
-+
-+ int64_t result = getRegister(r3);
-+ return result;
-+}
-+
-+uintptr_t Simulator::pushAddress(uintptr_t address) {
-+ int64_t new_sp = getRegister(sp) - sizeof(uintptr_t);
-+ uintptr_t* stack_slot = reinterpret_cast<uintptr_t*>(new_sp);
-+ *stack_slot = address;
-+ setRegister(sp, new_sp);
-+ return new_sp;
-+}
-+
-+uintptr_t Simulator::popAddress() {
-+ int64_t current_sp = getRegister(sp);
-+ uintptr_t* stack_slot = reinterpret_cast<uintptr_t*>(current_sp);
-+ uintptr_t address = *stack_slot;
-+ setRegister(sp, current_sp + sizeof(uintptr_t));
-+ return address;
-+}
-+
-+} // namespace jit
-+} // namespace js
-+
-+js::jit::Simulator* JSContext::simulator() const { return simulator_; }
-diff --git a/js/src/jit/ppc64/Simulator-ppc64.h b/js/src/jit/ppc64/Simulator-ppc64.h
-new file mode 100644
-index 000000000000..c7a3f3767d61
---- /dev/null
-+++ b/js/src/jit/ppc64/Simulator-ppc64.h
-@@ -0,0 +1,556 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#ifndef jit_ppc64_Simulator_ppc64_h
-+#define jit_ppc64_Simulator_ppc64_h
-+
-+#ifdef JS_SIMULATOR_PPC64
-+
-+# include "mozilla/Atomics.h"
-+
-+# include "jit/IonTypes.h"
-+# include "js/ProfilingFrameIterator.h"
-+# include "threading/Thread.h"
-+# include "vm/MutexIDs.h"
-+# include "wasm/WasmSignalHandlers.h"
-+
-+namespace js {
-+namespace jit {
-+
-+class JitActivation;
-+class Simulator;
-+class Redirection;
-+class CachePage;
-+class AutoLockSimulator;
-+
-+typedef void (*SingleStepCallback)(void* arg, Simulator* sim, void* pc);
-+
-+const intptr_t kPointerAlignment = 8;
-+const intptr_t kPointerAlignmentMask = kPointerAlignment - 1;
-+const intptr_t kDoubleAlignment = 8;
-+const intptr_t kDoubleAlignmentMask = kDoubleAlignment - 1;
-+
-+const int kNumGPRegisters = 32;
-+const int kPCRegister = 32;
-+const int kNumFPURegisters = 32;
-+const int kNumVRRegisters = 32; // VR0-VR31 (Altivec/VMX; = VSR32-63 in VSX)
-+
-+// PPC64 Condition Register: 8 fields of 4 bits each.
-+// Each field: bit3=LT, bit2=GT, bit1=EQ, bit0=SO (in PPC big-endian numbering
-+// within a field, but stored in little-endian nibble order in our uint32_t).
-+const int kNumCRFields = 8;
-+
-+// CR field bit positions (within a 4-bit field).
-+const uint8_t kCRFieldLT = 0x8;
-+const uint8_t kCRFieldGT = 0x4;
-+const uint8_t kCRFieldEQ = 0x2;
-+const uint8_t kCRFieldSO = 0x1;
-+
-+// XER register bit positions.
-+const int kXERSOBit = 31;
-+const int kXEROVBit = 30;
-+const int kXERCABit = 29;
-+const int kXEROV32Bit = 19;
-+const int kXERCA32Bit = 18;
-+
-+// FPSCR rounding mode bits (bits 62:63, stored in low bits of our uint64_t).
-+const uint64_t kFPSCRRNMask = 0x3;
-+
-+// FPU rounding modes matching PPC64 FPSCR RN field.
-+enum FPURoundingMode {
-+ RN = 0, // Round to Nearest (ties to even)
-+ RZ = 1, // Round toward Zero
-+ RP = 2, // Round toward +Infinity
-+ RM = 3, // Round toward -Infinity
-+};
-+
-+// FPU invalid result constants.
-+const uint32_t kFPUInvalidResult = static_cast<uint32_t>(1 << 31) - 1;
-+const int32_t kFPUInvalidResultNegative = static_cast<int32_t>(1u << 31);
-+const uint64_t kFPU64InvalidResult =
-+ static_cast<uint64_t>(static_cast<uint64_t>(1) << 63) - 1;
-+const int64_t kFPU64InvalidResultNegative =
-+ static_cast<int64_t>(static_cast<uint64_t>(1) << 63);
-+
-+// Breakpoint/stop code ranges.
-+const uint32_t kMaxWatchpointCode = 31;
-+const uint32_t kMaxStopCode = 127;
-+const uint32_t kWasmTrapCode = 6;
-+
-+// Redirection instruction: PPC_stop (0x4C0002E4).
-+// Distinct from PPC_trap (0x7FE00008) used for wasm traps.
-+const uint32_t kCallRedirInstr = 0x4C0002E4;
-+
-+typedef uint32_t Instr;
-+class SimInstruction;
-+
-+class Simulator {
-+ friend class ppc64Debugger;
-+
-+ public:
-+ enum Register {
-+ no_reg = -1,
-+ r0 = 0,
-+ r1,
-+ r2,
-+ r3,
-+ r4,
-+ r5,
-+ r6,
-+ r7,
-+ r8,
-+ r9,
-+ r10,
-+ r11,
-+ r12,
-+ r13,
-+ r14,
-+ r15,
-+ r16,
-+ r17,
-+ r18,
-+ r19,
-+ r20,
-+ r21,
-+ r22,
-+ r23,
-+ r24,
-+ r25,
-+ r26,
-+ r27,
-+ r28,
-+ r29,
-+ r30,
-+ r31,
-+ pc,
-+ kNumSimuRegisters,
-+ // Aliases
-+ sp = r1,
-+ fp = r31,
-+ };
-+
-+ enum FPURegister {
-+ f0 = 0,
-+ f1,
-+ f2,
-+ f3,
-+ f4,
-+ f5,
-+ f6,
-+ f7,
-+ f8,
-+ f9,
-+ f10,
-+ f11,
-+ f12,
-+ f13,
-+ f14,
-+ f15,
-+ f16,
-+ f17,
-+ f18,
-+ f19,
-+ f20,
-+ f21,
-+ f22,
-+ f23,
-+ f24,
-+ f25,
-+ f26,
-+ f27,
-+ f28,
-+ f29,
-+ f30,
-+ f31,
-+ kNumFPURegisters
-+ };
-+
-+ static Simulator* Create();
-+ static void Destroy(Simulator* simulator);
-+
-+ Simulator();
-+ ~Simulator();
-+
-+ static Simulator* Current();
-+
-+ static inline uintptr_t StackLimit() {
-+ return Simulator::Current()->stackLimit();
-+ }
-+
-+ uintptr_t* addressOfStackLimit();
-+
-+ // GPR accessors.
-+ void setRegister(int reg, int64_t value);
-+ int64_t getRegister(int reg) const;
-+
-+ // FPR accessors.
-+ void setFpuRegister(int fpureg, int64_t value);
-+ void setFpuRegisterWord(int fpureg, int32_t value);
-+ void setFpuRegisterFloat(int fpureg, float value);
-+ void setFpuRegisterDouble(int fpureg, double value);
-+ int64_t getFpuRegister(int fpureg) const;
-+ int32_t getFpuRegisterWord(int fpureg) const;
-+ int32_t getFpuRegisterSignedWord(int fpureg) const;
-+ float getFpuRegisterFloat(int fpureg) const;
-+ double getFpuRegisterDouble(int fpureg) const;
-+
-+ // VR accessors (Altivec/VMX registers VR0-VR31). The bytes array is the
-+ // ground truth: bytes[0] is the most-significant-byte on PPC64 big-endian
-+ // numbering, i.e., VSR[MSB..LSB] mapped as bytes[0..15]. Callers that want
-+ // typed views (lane 0 etc.) should extract from the bytes array according
-+ // to the ISA's lane numbering for that instruction.
-+ void setVRBytes(int vreg, const uint8_t bytes[16]);
-+ void getVRBytes(int vreg, uint8_t bytes[16]) const;
-+
-+ // VSR (Vector-Scalar Register) accessors: unified 64-register namespace
-+ // where VSR 0-31 aliases FPR 0-31 (DW0 is the FPR value, DW1 is
-+ // architecturally undefined — we model it as zero on read, ignored on
-+ // write) and VSR 32-63 aliases VR 0-31. Used by VSX instructions
-+ // (xxpermdi, xxlor, xxlxor, mtvsrd, mfvsrd, ...).
-+ void getVSR128(int vsr, uint8_t bytes[16]) const;
-+ void setVSR128(int vsr, const uint8_t bytes[16]);
-+
-+ // SPR accessors.
-+ int64_t getLR() const { return LR_; }
-+ void setLR(int64_t value) { LR_ = value; }
-+ int64_t getCTR() const { return CTR_; }
-+ void setCTR(int64_t value) { CTR_ = value; }
-+ uint32_t getCR() const { return CR_; }
-+ void setCR(uint32_t value) { CR_ = value; }
-+ uint64_t getXER() const { return XER_; }
-+ void setXER(uint64_t value) { XER_ = value; }
-+ uint64_t getFPSCR() const { return FPSCR_; }
-+ void setFPSCR(uint64_t value) { FPSCR_ = value; }
-+
-+ // CR field accessors: field 0 is the most significant nibble (bits 31:28).
-+ uint8_t getCRField(int field) const {
-+ return (CR_ >> (4 * (7 - field))) & 0xF;
-+ }
-+ void setCRField(int field, uint8_t val) {
-+ uint32_t shift = 4 * (7 - field);
-+ CR_ = (CR_ & ~(0xFu << shift)) | ((val & 0xFu) << shift);
-+ }
-+
-+ // XER bit accessors.
-+ bool getXERSO() const { return (XER_ >> kXERSOBit) & 1; }
-+ void setXERSO(bool v) {
-+ XER_ = (XER_ & ~(1ull << kXERSOBit)) | ((uint64_t)v << kXERSOBit);
-+ }
-+ bool getXEROV() const { return (XER_ >> kXEROVBit) & 1; }
-+ void setXEROV(bool v) {
-+ XER_ = (XER_ & ~(1ull << kXEROVBit)) | ((uint64_t)v << kXEROVBit);
-+ // Mirror to OV32. Real POWER9 silicon sets OV32 == OV for both 32-bit
-+ // and 64-bit overflow ops: mulldo(2, 2^62) produces OV=OV32=1;
-+ // mulldo(2^30, 4) produces OV=OV32=0. The JIT's
-+ // POWER9 Overflow path is `mulldo + mcrxrx + bc Overflow`, where
-+ // mcrxrx places OV32 in the GT slot and the Overflow condition tests
-+ // GT — so OV32 must be live or no-overflow is reported even when
-+ // OV=1. Without this mirror, BigInt fast-path mul silently wraps.
-+ XER_ = (XER_ & ~(1ull << kXEROV32Bit)) | ((uint64_t)v << kXEROV32Bit);
-+ if (v) setXERSO(true);
-+ }
-+ bool getXERCA() const { return (XER_ >> kXERCABit) & 1; }
-+ void setXERCA(bool v) {
-+ XER_ = (XER_ & ~(1ull << kXERCABit)) | ((uint64_t)v << kXERCABit);
-+ }
-+
-+ // PC accessors.
-+ void set_pc(int64_t value);
-+ int64_t get_pc() const;
-+
-+ template <typename T>
-+ T get_pc_as() const {
-+ return reinterpret_cast<T>(get_pc());
-+ }
-+
-+ void enable_single_stepping(SingleStepCallback cb, void* arg);
-+ void disable_single_stepping();
-+
-+ uintptr_t stackLimit() const;
-+ bool overRecursed(uintptr_t newsp = 0) const;
-+ bool overRecursedWithExtra(uint32_t extra) const;
-+
-+ template <bool enableStopSimAt>
-+ void execute();
-+
-+ int64_t call(uint8_t* entry, int argument_count, ...);
-+
-+ uintptr_t pushAddress(uintptr_t address);
-+ uintptr_t popAddress();
-+
-+ void setLastDebuggerInput(char* input);
-+ char* lastDebuggerInput() { return lastDebuggerInput_; }
-+
-+ bool has_bad_pc() const;
-+
-+ // Update CR field 0 from a 64-bit result.
-+ void updateCR0(int64_t result) {
-+ uint8_t field = kCRFieldSO * getXERSO();
-+ if (result < 0)
-+ field |= kCRFieldLT;
-+ else if (result > 0)
-+ field |= kCRFieldGT;
-+ else
-+ field |= kCRFieldEQ;
-+ setCRField(0, field);
-+ }
-+
-+ // Update CR field 0 from a 32-bit result (sign-extended comparison).
-+ void updateCR0_32(int32_t result) {
-+ uint8_t field = kCRFieldSO * getXERSO();
-+ if (result < 0)
-+ field |= kCRFieldLT;
-+ else if (result > 0)
-+ field |= kCRFieldGT;
-+ else
-+ field |= kCRFieldEQ;
-+ setCRField(0, field);
-+ }
-+
-+ // Compare and set an arbitrary CR field.
-+ void setCRFieldCmp(int field, int64_t lhs, int64_t rhs) {
-+ uint8_t val = kCRFieldSO * getXERSO();
-+ if (lhs < rhs)
-+ val |= kCRFieldLT;
-+ else if (lhs > rhs)
-+ val |= kCRFieldGT;
-+ else
-+ val |= kCRFieldEQ;
-+ setCRField(field, val);
-+ }
-+
-+ void setCRFieldCmpU(int field, uint64_t lhs, uint64_t rhs) {
-+ uint8_t val = kCRFieldSO * getXERSO();
-+ if (lhs < rhs)
-+ val |= kCRFieldLT;
-+ else if (lhs > rhs)
-+ val |= kCRFieldGT;
-+ else
-+ val |= kCRFieldEQ;
-+ setCRField(field, val);
-+ }
-+
-+ private:
-+ enum SpecialValues {
-+ // PPC64 masks the low 2 bits of branch targets, so these must be
-+ // 4-byte aligned to survive the & ~3 mask in blr/bcctr.
-+ bad_ra = -4,
-+ end_sim_pc = -8,
-+ Unpredictable = 0xbadbeaf
-+ };
-+
-+ bool init();
-+
-+ void format(SimInstruction* instr, const char* format);
-+
-+ // Memory access.
-+ inline uint8_t readBU(uint64_t addr);
-+ inline int8_t readB(uint64_t addr);
-+ inline void writeB(uint64_t addr, uint8_t value);
-+ inline void writeB(uint64_t addr, int8_t value);
-+
-+ inline uint16_t readHU(uint64_t addr, SimInstruction* instr);
-+ inline int16_t readH(uint64_t addr, SimInstruction* instr);
-+ inline void writeH(uint64_t addr, uint16_t value, SimInstruction* instr);
-+ inline void writeH(uint64_t addr, int16_t value, SimInstruction* instr);
-+
-+ inline uint32_t readWU(uint64_t addr, SimInstruction* instr);
-+ inline int32_t readW(uint64_t addr, SimInstruction* instr);
-+ inline void writeW(uint64_t addr, uint32_t value, SimInstruction* instr);
-+ inline void writeW(uint64_t addr, int32_t value, SimInstruction* instr);
-+
-+ inline int64_t readDW(uint64_t addr, SimInstruction* instr);
-+ inline void writeDW(uint64_t addr, int64_t value, SimInstruction* instr);
-+
-+ inline double readD(uint64_t addr, SimInstruction* instr);
-+ inline void writeD(uint64_t addr, double value, SimInstruction* instr);
-+
-+ inline uint8_t loadLinkedB(uint64_t addr, SimInstruction* instr);
-+ inline int storeConditionalB(uint64_t addr, uint8_t value,
-+ SimInstruction* instr);
-+ inline uint16_t loadLinkedH(uint64_t addr, SimInstruction* instr);
-+ inline int storeConditionalH(uint64_t addr, uint16_t value,
-+ SimInstruction* instr);
-+ inline int32_t loadLinkedW(uint64_t addr, SimInstruction* instr);
-+ inline int storeConditionalW(uint64_t addr, int32_t value,
-+ SimInstruction* instr);
-+ inline int64_t loadLinkedD(uint64_t addr, SimInstruction* instr);
-+ inline int storeConditionalD(uint64_t addr, int64_t value,
-+ SimInstruction* instr);
-+
-+ // Instruction decoders.
-+ void decodeDFormALU(SimInstruction* instr);
-+ void decodeDFormLoad(SimInstruction* instr);
-+ void decodeDFormStore(SimInstruction* instr);
-+ void decodeDSForm(SimInstruction* instr);
-+ void decodeXForm(SimInstruction* instr);
-+ void decodeRotateMask(SimInstruction* instr);
-+ void decodeBranch(SimInstruction* instr);
-+ void decodeFP(SimInstruction* instr);
-+ void decodeVSX(SimInstruction* instr);
-+ void decodeVMX(SimInstruction* instr);
-+ // Power ISA v3.1 prefixed instructions. `prefix` points at the
-+ // 4-byte prefix word; the suffix is read from `prefix + 4`.
-+ void decodePrefixed(SimInstruction* prefix);
-+
-+ void softwareInterrupt(SimInstruction* instr);
-+
-+ // Stop/breakpoint helpers.
-+ bool isWatchpoint(uint32_t code);
-+ void printWatchpoint(uint32_t code);
-+ void handleStop(uint32_t code, SimInstruction* instr);
-+ bool isStopInstruction(SimInstruction* instr);
-+ bool isEnabledStop(uint32_t code);
-+ void enableStop(uint32_t code);
-+ void disableStop(uint32_t code);
-+ void increaseStopCounter(uint32_t code);
-+ void printStopInfo(uint32_t code);
-+
-+ JS::ProfilingFrameIterator::RegisterState registerState();
-+
-+ bool MOZ_ALWAYS_INLINE handleWasmSegFault(uint64_t addr, unsigned numBytes) {
-+ if (MOZ_LIKELY(!js::wasm::CodeExists)) {
-+ return false;
-+ }
-+ uint8_t* newPC;
-+ if (!js::wasm::MemoryAccessTraps(registerState(), (uint8_t*)addr, numBytes,
-+ &newPC)) {
-+ return false;
-+ }
-+ LLBit_ = false;
-+ set_pc(int64_t(newPC));
-+ return true;
-+ }
-+
-+ void instructionDecode(SimInstruction* instr);
-+
-+ public:
-+ static int64_t StopSimAt;
-+
-+ static void* RedirectNativeFunction(void* nativeFunction,
-+ ABIFunctionType type);
-+
-+ private:
-+ void setCallResultDouble(double result);
-+ void setCallResultFloat(float result);
-+ void setCallResult(int64_t res);
-+# ifdef XP_DARWIN
-+ void setCallResult(intptr_t res);
-+# endif
-+ void setCallResult(__int128 res);
-+
-+ void callInternal(uint8_t* entry);
-+
-+ // Architecture state.
-+ int64_t registers_[kNumSimuRegisters];
-+ int64_t FPUregisters_[kNumFPURegisters];
-+ // VR namespace (Altivec/VMX registers VR0-VR31 == VSR32-63). Stored as
-+ // 16 raw bytes per register to preserve exact architectural byte order
-+ // independent of host endianness. Accessors defined below; the bytes
-+ // array is the ground truth.
-+ uint8_t VRregisters_[kNumVRRegisters][16];
-+
-+ // PPC64 Special Purpose Registers.
-+ int64_t LR_;
-+ int64_t CTR_;
-+ uint32_t CR_;
-+ uint64_t XER_;
-+ uint64_t FPSCR_;
-+
-+ // Atomics.
-+ bool LLBit_;
-+ uintptr_t LLAddr_;
-+ int64_t lastLLValue_;
-+
-+ // Simulator support.
-+ char* stack_;
-+ uintptr_t stackLimit_;
-+ bool pc_modified_;
-+ int64_t icount_;
-+ int64_t break_count_;
-+
-+ char* lastDebuggerInput_;
-+
-+ SimInstruction* break_pc_;
-+ Instr break_instr_;
-+
-+ bool single_stepping_;
-+ SingleStepCallback single_step_callback_;
-+ void* single_step_callback_arg_;
-+
-+ static const uint32_t kNumOfWatchedStops = 256;
-+ static const uint32_t kStopDisabledBit = 1U << 31;
-+
-+ struct StopCountAndDesc {
-+ uint32_t count_;
-+ char* desc_;
-+ };
-+ StopCountAndDesc watchedStops_[kNumOfWatchedStops];
-+};
-+
-+// Process-wide simulator state.
-+class SimulatorProcess {
-+ friend class Redirection;
-+ friend class AutoLockSimulatorCache;
-+
-+ private:
-+ struct ICacheHasher {
-+ typedef void* Key;
-+ typedef void* Lookup;
-+ static HashNumber hash(const Lookup& l);
-+ static bool match(const Key& k, const Lookup& l);
-+ };
-+
-+ public:
-+ typedef HashMap<void*, CachePage*, ICacheHasher, SystemAllocPolicy> ICacheMap;
-+
-+ static mozilla::Atomic<size_t, mozilla::ReleaseAcquire>
-+ ICacheCheckingDisableCount;
-+ static void FlushICache(void* start, size_t size);
-+ static void checkICacheLocked(SimInstruction* instr);
-+
-+ static bool initialize() {
-+ singleton_ = js_new<SimulatorProcess>();
-+ return singleton_;
-+ }
-+ static void destroy() {
-+ js_delete(singleton_);
-+ singleton_ = nullptr;
-+ }
-+
-+ SimulatorProcess();
-+ ~SimulatorProcess();
-+
-+ private:
-+ static SimulatorProcess* singleton_;
-+
-+ Mutex cacheLock_;
-+ Redirection* redirection_;
-+ ICacheMap icache_;
-+
-+ public:
-+ static ICacheMap& icache() {
-+ singleton_->cacheLock_.assertOwnedByCurrentThread();
-+ return singleton_->icache_;
-+ }
-+
-+ static Redirection* redirection() {
-+ singleton_->cacheLock_.assertOwnedByCurrentThread();
-+ return singleton_->redirection_;
-+ }
-+
-+ static void setRedirection(js::jit::Redirection* redirection) {
-+ singleton_->cacheLock_.assertOwnedByCurrentThread();
-+ singleton_->redirection_ = redirection;
-+ }
-+};
-+
-+} // namespace jit
-+} // namespace js
-+
-+#endif /* JS_SIMULATOR_PPC64 */
-+
-+#endif /* jit_ppc64_Simulator_ppc64_h */
-diff --git a/js/src/jit/ppc64/Trampoline-ppc64.cpp b/js/src/jit/ppc64/Trampoline-ppc64.cpp
-new file mode 100644
-index 000000000000..515a931c86b0
---- /dev/null
-+++ b/js/src/jit/ppc64/Trampoline-ppc64.cpp
-@@ -0,0 +1,648 @@
-+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-+ * vim: set ts=8 sts=2 et sw=2 tw=80:
-+ * This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#include "jit/Bailouts.h"
-+#include "jit/BaselineFrame.h"
-+#include "jit/CalleeToken.h"
-+#include "jit/JitFrames.h"
-+#include "jit/JitRuntime.h"
-+#include "jit/PerfSpewer.h"
-+#include "jit/ppc64/SharedICHelpers-ppc64.h"
-+#include "jit/VMFunctions.h"
-+#include "vm/JitActivation.h"
-+#include "vm/JSContext.h"
-+
-+#include "jit/MacroAssembler-inl.h"
-+
-+using namespace js;
-+using namespace js::jit;
-+
-+// Float (Single+Double) and all GPRs. Simd128 excluded — Ion compiles JS
-+// (no v128 type), so SIMD regs are never live at bailout / invalidator /
-+// preBarrier entry. Including them would force the bailout frame's
-+// FPUArray to hold v128 slots that Ion never writes.
-+static const LiveRegisterSet AllRegs = LiveRegisterSet(
-+ GeneralRegisterSet(Registers::AllMask),
-+ FloatRegisterSet(FloatRegisters::AllSingleMask |
-+ FloatRegisters::AllDoubleMask));
-+
-+static_assert(sizeof(uintptr_t) == sizeof(uint64_t), "Not 64-bit clean.");
-+
-+// PPC64 ELFv2 callee-saved: GPRs r14-r31, FPRs f14-f31, VRs VR20-VR31, LR.
-+// We also save reg_vp (r10 / IntArgReg7) so we can use it after the JIT call.
-+//
-+// Layout is alignas(16) so that after `reserveStack(sizeof(EnterJITRegs))`
-+// the SP-relative offset of every VR slot is 16-byte aligned, satisfying
-+// the 16-byte alignment requirement of stxvd2x / stvx (stvx is technically
-+// alignment-tolerant, but we'd rather align by construction). Padding at
-+// the end keeps sizeof a multiple of 16 so SP stays quadword-aligned per
-+// the ELFv2 stack-pointer rule.
-+struct alignas(16) EnterJITRegs {
-+ // VR20-VR31 first so their SP-relative offsets are 0, 16, 32, ... — all
-+ // 16-byte aligned regardless of what follows.
-+ uint8_t vr20[16];
-+ uint8_t vr21[16];
-+ uint8_t vr22[16];
-+ uint8_t vr23[16];
-+ uint8_t vr24[16];
-+ uint8_t vr25[16];
-+ uint8_t vr26[16];
-+ uint8_t vr27[16];
-+ uint8_t vr28[16];
-+ uint8_t vr29[16];
-+ uint8_t vr30[16];
-+ uint8_t vr31[16];
-+
-+ double f31;
-+ double f30;
-+ double f29;
-+ double f28;
-+ double f27;
-+ double f26;
-+ double f25;
-+ double f24;
-+ double f23;
-+ double f22;
-+ double f21;
-+ double f20;
-+ double f19;
-+ double f18;
-+ double f17;
-+ double f16;
-+ double f15;
-+ double f14;
-+
-+ uint64_t r31; // FramePointer
-+ uint64_t r30;
-+ uint64_t r29;
-+ uint64_t r28;
-+ uint64_t r27;
-+ uint64_t r26;
-+ uint64_t r25;
-+ uint64_t r24;
-+ uint64_t r23;
-+ uint64_t r22;
-+ uint64_t r21;
-+ uint64_t r20;
-+ uint64_t r19;
-+ uint64_t r18;
-+ uint64_t r17;
-+ uint64_t r16;
-+ uint64_t r15;
-+ uint64_t r14;
-+ uint64_t r2; // TOC pointer
-+ uint64_t lr;
-+ // Save reg_vp (r10) on stack so we can use it after the JIT call returns.
-+ uint64_t r10;
-+};
-+// alignas(16) on the struct ensures sizeof is a multiple of 16, which keeps
-+// SP quadword-aligned after `reserveStack(sizeof(EnterJITRegs))`. The
-+// existing fields total 312 bytes; with the 192 bytes of VR slots we are
-+// at 504, which alignas(16) bumps to 512.
-+static_assert((sizeof(EnterJITRegs) % 16) == 0,
-+ "EnterJITRegs must be 16-byte aligned to keep SP aligned");
-+
-+static void GenerateReturn(MacroAssembler& masm) {
-+ MOZ_ASSERT(masm.framePushed() == sizeof(EnterJITRegs));
-+
-+ // Restore non-volatile GPRs.
-+ masm.as_ld(r14, StackPointer, offsetof(EnterJITRegs, r14));
-+ masm.as_ld(r15, StackPointer, offsetof(EnterJITRegs, r15));
-+ masm.as_ld(r16, StackPointer, offsetof(EnterJITRegs, r16));
-+ masm.as_ld(r17, StackPointer, offsetof(EnterJITRegs, r17));
-+ masm.as_ld(r18, StackPointer, offsetof(EnterJITRegs, r18));
-+ masm.as_ld(r19, StackPointer, offsetof(EnterJITRegs, r19));
-+ masm.as_ld(r20, StackPointer, offsetof(EnterJITRegs, r20));
-+ masm.as_ld(r21, StackPointer, offsetof(EnterJITRegs, r21));
-+ masm.as_ld(r22, StackPointer, offsetof(EnterJITRegs, r22));
-+ masm.as_ld(r23, StackPointer, offsetof(EnterJITRegs, r23));
-+ masm.as_ld(r24, StackPointer, offsetof(EnterJITRegs, r24));
-+ masm.as_ld(r25, StackPointer, offsetof(EnterJITRegs, r25));
-+ masm.as_ld(r26, StackPointer, offsetof(EnterJITRegs, r26));
-+ masm.as_ld(r27, StackPointer, offsetof(EnterJITRegs, r27));
-+ masm.as_ld(r28, StackPointer, offsetof(EnterJITRegs, r28));
-+ masm.as_ld(r29, StackPointer, offsetof(EnterJITRegs, r29));
-+ masm.as_ld(r30, StackPointer, offsetof(EnterJITRegs, r30));
-+ masm.as_ld(r31, StackPointer, offsetof(EnterJITRegs, r31));
-+ masm.as_ld(r2, StackPointer, offsetof(EnterJITRegs, r2));
-+
-+ // Restore LR.
-+ masm.as_ld(r0, StackPointer, offsetof(EnterJITRegs, lr));
-+ masm.xs_mtlr(r0);
-+
-+ // Restore non-volatile FPRs.
-+ masm.as_lfd(f14, StackPointer, offsetof(EnterJITRegs, f14));
-+ masm.as_lfd(f15, StackPointer, offsetof(EnterJITRegs, f15));
-+ masm.as_lfd(f16, StackPointer, offsetof(EnterJITRegs, f16));
-+ masm.as_lfd(f17, StackPointer, offsetof(EnterJITRegs, f17));
-+ masm.as_lfd(f18, StackPointer, offsetof(EnterJITRegs, f18));
-+ masm.as_lfd(f19, StackPointer, offsetof(EnterJITRegs, f19));
-+ masm.as_lfd(f20, StackPointer, offsetof(EnterJITRegs, f20));
-+ masm.as_lfd(f21, StackPointer, offsetof(EnterJITRegs, f21));
-+ masm.as_lfd(f22, StackPointer, offsetof(EnterJITRegs, f22));
-+ masm.as_lfd(f23, StackPointer, offsetof(EnterJITRegs, f23));
-+ masm.as_lfd(f24, StackPointer, offsetof(EnterJITRegs, f24));
-+ masm.as_lfd(f25, StackPointer, offsetof(EnterJITRegs, f25));
-+ masm.as_lfd(f26, StackPointer, offsetof(EnterJITRegs, f26));
-+ masm.as_lfd(f27, StackPointer, offsetof(EnterJITRegs, f27));
-+ masm.as_lfd(f28, StackPointer, offsetof(EnterJITRegs, f28));
-+ masm.as_lfd(f29, StackPointer, offsetof(EnterJITRegs, f29));
-+ masm.as_lfd(f30, StackPointer, offsetof(EnterJITRegs, f30));
-+ masm.as_lfd(f31, StackPointer, offsetof(EnterJITRegs, f31));
-+
-+ // Restore callee-saved VR20-VR31 (ELFv2). lvx uses indexed addressing
-+ // (RA + RB), and r0's value is used here as RB (RA = StackPointer is
-+ // non-zero, so its value is added). r0 is non-allocatable.
-+#define RESTORE_VR(N) \
-+ masm.xs_li(r0, offsetof(EnterJITRegs, vr##N)); \
-+ masm.as_lvx(N, StackPointer, r0)
-+ RESTORE_VR(20); RESTORE_VR(21); RESTORE_VR(22); RESTORE_VR(23);
-+ RESTORE_VR(24); RESTORE_VR(25); RESTORE_VR(26); RESTORE_VR(27);
-+ RESTORE_VR(28); RESTORE_VR(29); RESTORE_VR(30); RESTORE_VR(31);
-+#undef RESTORE_VR
-+
-+ masm.freeStack(sizeof(EnterJITRegs));
-+
-+ masm.as_blr();
-+}
-+
-+static void GeneratePrologue(MacroAssembler& masm) {
-+ // Save LR first (PPC64 LR is SPR, not GPR).
-+ masm.xs_mflr(r0);
-+
-+ // ELFv2 prologue convention: save LR at caller's frame [SP+16] BEFORE
-+ // decrementing SP. External unwinders (gdb, perf, libunwind) walk the
-+ // stack by reading LR-save slots at [SP+16] of every frame; without
-+ // this write they'd find junk at our caller's slot. Costs 1 extra
-+ // instruction; we still keep the in-frame save below for clean
-+ // restore symmetry.
-+ masm.as_std(r0, StackPointer, 16);
-+
-+ masm.reserveStack(sizeof(EnterJITRegs));
-+
-+ // Save LR (also kept in our own frame for the clean restore in
-+ // GenerateReturn — see comment there).
-+ masm.as_std(r0, StackPointer, offsetof(EnterJITRegs, lr));
-+
-+ // Save non-volatile GPRs.
-+ masm.as_std(r2, StackPointer, offsetof(EnterJITRegs, r2));
-+ masm.as_std(r14, StackPointer, offsetof(EnterJITRegs, r14));
-+ masm.as_std(r15, StackPointer, offsetof(EnterJITRegs, r15));
-+ masm.as_std(r16, StackPointer, offsetof(EnterJITRegs, r16));
-+ masm.as_std(r17, StackPointer, offsetof(EnterJITRegs, r17));
-+ masm.as_std(r18, StackPointer, offsetof(EnterJITRegs, r18));
-+ masm.as_std(r19, StackPointer, offsetof(EnterJITRegs, r19));
-+ masm.as_std(r20, StackPointer, offsetof(EnterJITRegs, r20));
-+ masm.as_std(r21, StackPointer, offsetof(EnterJITRegs, r21));
-+ masm.as_std(r22, StackPointer, offsetof(EnterJITRegs, r22));
-+ masm.as_std(r23, StackPointer, offsetof(EnterJITRegs, r23));
-+ masm.as_std(r24, StackPointer, offsetof(EnterJITRegs, r24));
-+ masm.as_std(r25, StackPointer, offsetof(EnterJITRegs, r25));
-+ masm.as_std(r26, StackPointer, offsetof(EnterJITRegs, r26));
-+ masm.as_std(r27, StackPointer, offsetof(EnterJITRegs, r27));
-+ masm.as_std(r28, StackPointer, offsetof(EnterJITRegs, r28));
-+ masm.as_std(r29, StackPointer, offsetof(EnterJITRegs, r29));
-+ masm.as_std(r30, StackPointer, offsetof(EnterJITRegs, r30));
-+ masm.as_std(r31, StackPointer, offsetof(EnterJITRegs, r31));
-+
-+ // Save reg_vp (r10) so we can retrieve it after the JIT call.
-+ masm.as_std(r10, StackPointer, offsetof(EnterJITRegs, r10));
-+
-+ // Save non-volatile FPRs.
-+ masm.as_stfd(f14, StackPointer, offsetof(EnterJITRegs, f14));
-+ masm.as_stfd(f15, StackPointer, offsetof(EnterJITRegs, f15));
-+ masm.as_stfd(f16, StackPointer, offsetof(EnterJITRegs, f16));
-+ masm.as_stfd(f17, StackPointer, offsetof(EnterJITRegs, f17));
-+ masm.as_stfd(f18, StackPointer, offsetof(EnterJITRegs, f18));
-+ masm.as_stfd(f19, StackPointer, offsetof(EnterJITRegs, f19));
-+ masm.as_stfd(f20, StackPointer, offsetof(EnterJITRegs, f20));
-+ masm.as_stfd(f21, StackPointer, offsetof(EnterJITRegs, f21));
-+ masm.as_stfd(f22, StackPointer, offsetof(EnterJITRegs, f22));
-+ masm.as_stfd(f23, StackPointer, offsetof(EnterJITRegs, f23));
-+ masm.as_stfd(f24, StackPointer, offsetof(EnterJITRegs, f24));
-+ masm.as_stfd(f25, StackPointer, offsetof(EnterJITRegs, f25));
-+ masm.as_stfd(f26, StackPointer, offsetof(EnterJITRegs, f26));
-+ masm.as_stfd(f27, StackPointer, offsetof(EnterJITRegs, f27));
-+ masm.as_stfd(f28, StackPointer, offsetof(EnterJITRegs, f28));
-+ masm.as_stfd(f29, StackPointer, offsetof(EnterJITRegs, f29));
-+ masm.as_stfd(f30, StackPointer, offsetof(EnterJITRegs, f30));
-+ masm.as_stfd(f31, StackPointer, offsetof(EnterJITRegs, f31));
-+
-+ // Save callee-saved VR20-VR31 (ELFv2). The JIT freely uses VMX registers
-+ // via EmitVmxBinary etc.; without this save the C caller's VR20-VR31
-+ // contents would be trashed on return. stvx uses indexed addressing —
-+ // r0 holds the offset (non-allocatable in JIT regalloc; safe to use as
-+ // a free temp here).
-+#define SAVE_VR(N) \
-+ masm.xs_li(r0, offsetof(EnterJITRegs, vr##N)); \
-+ masm.as_stvx(N, StackPointer, r0)
-+ SAVE_VR(20); SAVE_VR(21); SAVE_VR(22); SAVE_VR(23);
-+ SAVE_VR(24); SAVE_VR(25); SAVE_VR(26); SAVE_VR(27);
-+ SAVE_VR(28); SAVE_VR(29); SAVE_VR(30); SAVE_VR(31);
-+#undef SAVE_VR
-+}
-+
-+void JitRuntime::generateEnterJIT(JSContext* cx, MacroAssembler& masm) {
-+ AutoCreatedBy acb(masm, "JitRuntime::generateEnterJIT");
-+
-+ enterJITOffset_ = startTrampolineCode(masm);
-+
-+ // EnterJitCode signature: (void* code, unsigned argc, Value* argv,
-+ // InterpreterFrame* fp, CalleeToken calleeToken,
-+ // JSObject* envChain, size_t numStackValues,
-+ // Value* vp)
-+ const Register reg_code = IntArgReg0; // r3
-+ const Register reg_argc = IntArgReg1; // r4
-+ const Register reg_argv = IntArgReg2; // r5
-+ const mozilla::DebugOnly<Register> reg_frame = IntArgReg3; // r6
-+ const Register reg_token = IntArgReg4; // r7
-+ const Register reg_chain = IntArgReg5; // r8
-+ const Register reg_values = IntArgReg6; // r9
-+ const Register reg_vp = IntArgReg7; // r10
-+
-+ MOZ_ASSERT(OsrFrameReg == reg_frame);
-+
-+ GeneratePrologue(masm);
-+
-+ // Save stack pointer as baseline frame.
-+ masm.movePtr(StackPointer, FramePointer);
-+
-+ // Use non-volatile scratch registers for generateEnterJitShared.
-+ // r14, r15, r17 are non-volatile and not special-purpose in JIT.
-+ generateEnterJitShared(masm, reg_argc, reg_argv, reg_token, r14, r15, r17);
-+
-+ // Push the descriptor.
-+ masm.unboxInt32(Address(reg_vp, 0), r14);
-+ masm.pushFrameDescriptorForJitCall(FrameType::CppToJSJit, r14, r14);
-+
-+ CodeLabel returnLabel;
-+ Label oomReturnLabel;
-+ {
-+ // Handle Interpreter -> Baseline OSR.
-+ AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
-+ MOZ_ASSERT(!regs.has(FramePointer));
-+ regs.take(OsrFrameReg);
-+ regs.take(reg_code);
-+ MOZ_ASSERT(!regs.has(ReturnReg), "ReturnReg matches reg_code");
-+
-+ Label notOsr;
-+ masm.branchTestPtr(Assembler::Zero, OsrFrameReg, OsrFrameReg, ¬Osr);
-+
-+ Register numStackValues = reg_values;
-+ regs.take(numStackValues);
-+ Register scratch = regs.takeAny();
-+
-+ // Push return address.
-+ masm.subPtr(Imm32(sizeof(uintptr_t)), StackPointer);
-+ masm.mov(&returnLabel, scratch);
-+ masm.storePtr(scratch, Address(StackPointer, 0));
-+
-+ // Push previous frame pointer.
-+ masm.subPtr(Imm32(sizeof(uintptr_t)), StackPointer);
-+ masm.storePtr(FramePointer, Address(StackPointer, 0));
-+
-+ // Reserve frame.
-+ Register framePtr = FramePointer;
-+ masm.movePtr(StackPointer, framePtr);
-+ masm.subPtr(Imm32(BaselineFrame::Size()), StackPointer);
-+
-+ Register framePtrScratch = regs.takeAny();
-+ masm.movePtr(StackPointer, framePtrScratch);
-+
-+ // Reserve space for locals and stack values.
-+ masm.x_sldi(scratch, numStackValues, 3);
-+ masm.subPtr(scratch, StackPointer);
-+
-+ // Enter exit frame.
-+ masm.reserveStack(3 * sizeof(uintptr_t));
-+ masm.storePtr(ImmWord(MakeFrameDescriptor(FrameType::BaselineJS)),
-+ Address(StackPointer, 2 * sizeof(uintptr_t)));
-+ masm.storePtr(ImmPtr(nullptr), Address(StackPointer, sizeof(uintptr_t)));
-+ masm.storePtr(FramePointer, Address(StackPointer, 0));
-+
-+ // No GC things to mark, push a bare token.
-+ masm.loadJSContext(scratch);
-+ masm.enterFakeExitFrame(scratch, scratch, ExitFrameType::Bare);
-+
-+ masm.reserveStack(2 * sizeof(uintptr_t));
-+ masm.storePtr(framePtr, Address(StackPointer, sizeof(uintptr_t)));
-+ masm.storePtr(reg_code, Address(StackPointer, 0));
-+
-+ using Fn = void (*)(BaselineFrame* frame, InterpreterFrame* interpFrame,
-+ uint32_t numStackValues);
-+ masm.setupUnalignedABICall(scratch);
-+ masm.passABIArg(framePtrScratch);
-+ masm.passABIArg(OsrFrameReg);
-+ masm.passABIArg(numStackValues);
-+ masm.callWithABI<Fn, jit::InitBaselineFrameForOsr>(
-+ ABIType::General, CheckUnsafeCallWithABI::DontCheckHasExitFrame);
-+
-+ regs.add(OsrFrameReg);
-+ Register jitcode = regs.takeAny();
-+ masm.loadPtr(Address(StackPointer, 0), jitcode);
-+ masm.loadPtr(Address(StackPointer, sizeof(uintptr_t)), framePtr);
-+ masm.freeStack(2 * sizeof(uintptr_t));
-+
-+ masm.freeStack(ExitFrameLayout::SizeWithFooter());
-+
-+ // If OSR-ing, then emit instrumentation for setting lastProfilerFrame
-+ // if profiler instrumentation is enabled.
-+ {
-+ Label skipProfilingInstrumentation;
-+ AbsoluteAddress addressOfEnabled(
-+ cx->runtime()->geckoProfiler().addressOfEnabled());
-+ masm.branch32(Assembler::Equal, addressOfEnabled, Imm32(0),
-+ &skipProfilingInstrumentation);
-+ masm.profilerEnterFrame(framePtr, scratch);
-+ masm.bind(&skipProfilingInstrumentation);
-+ }
-+
-+ masm.jump(jitcode);
-+
-+ masm.bind(¬Osr);
-+ // Load the scope chain in R1.
-+ MOZ_ASSERT(R1.scratchReg() != reg_code);
-+ masm.movePtr(reg_chain, R1.scratchReg());
-+ }
-+
-+ // The call will push the return address and frame pointer on the stack, thus
-+ // we check that the stack would be aligned once the call is complete.
-+ masm.assertStackAlignment(JitStackAlignment, 2 * sizeof(uintptr_t));
-+
-+ // Call the function with pushing return address to stack.
-+ masm.callJitNoProfiler(reg_code);
-+
-+ {
-+ // Interpreter -> Baseline OSR will return here.
-+ masm.bind(&returnLabel);
-+ masm.addCodeLabel(returnLabel);
-+ masm.bind(&oomReturnLabel);
-+ }
-+
-+ // Discard arguments and padding. Set sp to the address of the EnterJITRegs
-+ // on the stack.
-+ masm.movePtr(FramePointer, StackPointer);
-+
-+ // Store the returned value into the vp.
-+ masm.as_ld(reg_vp, StackPointer, offsetof(EnterJITRegs, r10));
-+ masm.storeValue(JSReturnOperand, Address(reg_vp, 0));
-+
-+ // Restore non-volatile registers and return.
-+ GenerateReturn(masm);
-+}
-+
-+// static
-+mozilla::Maybe<::JS::ProfilingFrameIterator::RegisterState>
-+JitRuntime::getCppEntryRegisters(JitFrameLayout* frameStackAddress) {
-+ return mozilla::Nothing{};
-+}
-+
-+void JitRuntime::generateInvalidator(MacroAssembler& masm, Label* bailoutTail) {
-+ AutoCreatedBy acb(masm, "JitRuntime::generateInvalidator");
-+
-+ invalidatorOffset_ = startTrampolineCode(masm);
-+
-+ masm.checkStackAlignment();
-+
-+ // Push all registers so we can access them from [base + code].
-+ masm.PushRegsInMask(AllRegs);
-+
-+ // Pass pointer to InvalidationBailoutStack structure.
-+ masm.movePtr(StackPointer, IntArgReg0);
-+
-+ // Reserve place for BailoutInfo pointer. Two words to ensure alignment for
-+ // setupAlignedABICall.
-+ masm.subPtr(Imm32(2 * sizeof(uintptr_t)), StackPointer);
-+ masm.movePtr(StackPointer, IntArgReg1);
-+
-+ using Fn = bool (*)(InvalidationBailoutStack* sp, BaselineBailoutInfo** info);
-+ masm.setupAlignedABICall();
-+ masm.passABIArg(IntArgReg0);
-+ masm.passABIArg(IntArgReg1);
-+ masm.callWithABI<Fn, InvalidationBailout>(
-+ ABIType::General, CheckUnsafeCallWithABI::DontCheckOther);
-+
-+ masm.pop(IntArgReg2);
-+
-+ // Pop the machine state and the dead frame.
-+ masm.moveToStackPtr(FramePointer);
-+
-+ // Jump to shared bailout tail. The BailoutInfo pointer has to be in
-+ // IntArgReg2 (r5).
-+ masm.jump(bailoutTail);
-+}
-+
-+// When bailout is done via out of line code (lazy bailout).
-+// Frame size is stored in LR (look at
-+// CodeGeneratorPPC64::generateOutOfLineCode()) and thunk code should save it
-+// on stack.
-+static void PushBailoutFrame(MacroAssembler& masm, Register spArg) {
-+ // Push the frameSize_ stored in LR.
-+ // See: CodeGeneratorPPC64::generateOutOfLineCode()
-+ masm.pushReturnAddress();
-+
-+ // Push registers such that we can access them from [base + code].
-+ masm.PushRegsInMask(AllRegs);
-+
-+ // Put pointer to BailoutStack as first argument to the Bailout().
-+ masm.movePtr(StackPointer, spArg);
-+}
-+
-+static void GenerateBailoutThunk(MacroAssembler& masm, Label* bailoutTail) {
-+ PushBailoutFrame(masm, IntArgReg0);
-+
-+ // Make space for Bailout's bailoutInfo outparam.
-+ masm.reserveStack(sizeof(void*));
-+ masm.movePtr(StackPointer, IntArgReg1);
-+
-+ // Call the bailout function.
-+ using Fn = bool (*)(BailoutStack* sp, BaselineBailoutInfo** info);
-+ masm.setupUnalignedABICall(IntArgReg2);
-+ masm.passABIArg(IntArgReg0);
-+ masm.passABIArg(IntArgReg1);
-+ masm.callWithABI<Fn, Bailout>(ABIType::General,
-+ CheckUnsafeCallWithABI::DontCheckOther);
-+
-+ // Get the bailoutInfo outparam.
-+ masm.pop(IntArgReg2);
-+
-+ // Remove both the bailout frame and the topmost Ion frame's stack.
-+ masm.moveToStackPtr(FramePointer);
-+
-+ // Jump to shared bailout tail. The BailoutInfo pointer has to be in
-+ // IntArgReg2 (r5).
-+ masm.jump(bailoutTail);
-+}
-+
-+void JitRuntime::generateBailoutHandler(MacroAssembler& masm,
-+ Label* bailoutTail) {
-+ AutoCreatedBy acb(masm, "JitRuntime::generateBailoutHandler");
-+
-+ bailoutHandlerOffset_ = startTrampolineCode(masm);
-+
-+ GenerateBailoutThunk(masm, bailoutTail);
-+}
-+
-+bool JitRuntime::generateVMWrapper(JSContext* cx, MacroAssembler& masm,
-+ VMFunctionId id, const VMFunctionData& f,
-+ DynFn nativeFun, uint32_t* wrapperOffset) {
-+ AutoCreatedBy acb(masm, "JitRuntime::generateVMWrapper");
-+
-+ *wrapperOffset = startTrampolineCode(masm);
-+
-+ // Avoid conflicts with argument registers while discarding the result after
-+ // the function call.
-+ AllocatableGeneralRegisterSet regs(Register::Codes::WrapperMask);
-+
-+ static_assert(
-+ (Register::Codes::VolatileMask & ~Register::Codes::WrapperMask) == 0,
-+ "Wrapper register set should be a superset of Volatile register set.");
-+
-+ // The context is the first argument; r3 is the first argument register.
-+ Register cxreg = IntArgReg0;
-+ regs.take(cxreg);
-+
-+ // On link-register platforms, it is the responsibility of the VM *callee* to
-+ // push the return address, while the caller must ensure that the address
-+ // is stored in LR on entry. This allows the VM wrapper to work with both
-+ // direct calls and tail calls.
-+ masm.pushReturnAddress();
-+
-+ // Push the frame pointer to finish the exit frame, then link it up.
-+ masm.Push(FramePointer);
-+ masm.moveStackPtrTo(FramePointer);
-+ masm.loadJSContext(cxreg);
-+ masm.enterExitFrame(cxreg, regs.getAny(), id);
-+
-+ // Reserve space for the outparameter.
-+ masm.reserveVMFunctionOutParamSpace(f);
-+
-+ masm.setupUnalignedABICallDontSaveRestoreSP();
-+ masm.passABIArg(cxreg);
-+
-+ size_t argDisp = ExitFrameLayout::Size();
-+
-+ // Copy any arguments.
-+ for (uint32_t explicitArg = 0; explicitArg < f.explicitArgs; explicitArg++) {
-+ switch (f.argProperties(explicitArg)) {
-+ case VMFunctionData::WordByValue:
-+ if (f.argPassedInFloatReg(explicitArg)) {
-+ masm.passABIArg(MoveOperand(FramePointer, argDisp), ABIType::Float64);
-+ } else {
-+ masm.passABIArg(MoveOperand(FramePointer, argDisp), ABIType::General);
-+ }
-+ argDisp += sizeof(void*);
-+ break;
-+ case VMFunctionData::WordByRef:
-+ masm.passABIArg(MoveOperand(FramePointer, argDisp,
-+ MoveOperand::Kind::EffectiveAddress),
-+ ABIType::General);
-+ argDisp += sizeof(void*);
-+ break;
-+ case VMFunctionData::DoubleByValue:
-+ case VMFunctionData::DoubleByRef:
-+ MOZ_CRASH("NYI: PPC64 callVM should not be used with 128bits values.");
-+ break;
-+ }
-+ }
-+
-+ // Copy the implicit outparam, if any.
-+ const int32_t outParamOffset =
-+ -int32_t(ExitFooterFrame::Size()) - f.sizeOfOutParamStackSlot();
-+ if (f.outParam != Type_Void) {
-+ masm.passABIArg(MoveOperand(FramePointer, outParamOffset,
-+ MoveOperand::Kind::EffectiveAddress),
-+ ABIType::General);
-+ }
-+
-+ masm.callWithABI(nativeFun, ABIType::General,
-+ CheckUnsafeCallWithABI::DontCheckHasExitFrame);
-+
-+ // Test for failure.
-+ switch (f.failType()) {
-+ case Type_Cell:
-+ masm.branchTestPtr(Assembler::Zero, IntArgReg0, IntArgReg0,
-+ masm.failureLabel());
-+ break;
-+ case Type_Bool:
-+ masm.branchIfFalseBool(IntArgReg0, masm.failureLabel());
-+ break;
-+ case Type_Void:
-+ break;
-+ default:
-+ MOZ_CRASH("unknown failure kind");
-+ }
-+
-+ // Load the outparam.
-+ masm.loadVMFunctionOutParam(f, Address(FramePointer, outParamOffset));
-+
-+ // Pop frame and restore frame pointer.
-+ masm.moveToStackPtr(FramePointer);
-+ masm.pop(FramePointer);
-+
-+ // Return. Subtract sizeof(void*) for the frame pointer.
-+ masm.retn(Imm32(sizeof(ExitFrameLayout) - sizeof(void*) +
-+ f.explicitStackSlots() * sizeof(void*) +
-+ f.extraValuesToPop * sizeof(Value)));
-+
-+ return true;
-+}
-+
-+uint32_t JitRuntime::generatePreBarrier(JSContext* cx, MacroAssembler& masm,
-+ MIRType type) {
-+ AutoCreatedBy acb(masm, "JitRuntime::generatePreBarrier");
-+
-+ uint32_t offset = startTrampolineCode(masm);
-+
-+ MOZ_ASSERT(PreBarrierReg == IntArgReg1); // r4
-+ Register temp1 = IntArgReg0; // r3
-+ Register temp2 = IntArgReg2; // r5
-+ Register temp3 = IntArgReg3; // r6
-+ masm.push(temp1);
-+ masm.push(temp2);
-+ masm.push(temp3);
-+
-+ Label noBarrier;
-+ masm.emitPreBarrierFastPath(type, temp1, temp2, temp3, &noBarrier);
-+
-+ // Call into C++ to mark this GC thing.
-+ masm.pop(temp3);
-+ masm.pop(temp2);
-+ masm.pop(temp1);
-+
-+ LiveRegisterSet save;
-+ save.set() = RegisterSet(GeneralRegisterSet(Registers::VolatileMask),
-+ FloatRegisterSet(FloatRegisters::VolatileMask));
-+ // On PPC64, save LR since we'll be making a call.
-+ masm.pushReturnAddress();
-+ masm.PushRegsInMask(save);
-+
-+ masm.movePtr(ImmPtr(cx->runtime()), IntArgReg0);
-+
-+ masm.setupUnalignedABICall(IntArgReg2);
-+ masm.passABIArg(IntArgReg0);
-+ masm.passABIArg(IntArgReg1);
-+ masm.callWithABI(JitPreWriteBarrier(type));
-+
-+ masm.PopRegsInMask(save);
-+ masm.ret();
-+
-+ masm.bind(&noBarrier);
-+ masm.pop(temp3);
-+ masm.pop(temp2);
-+ masm.pop(temp1);
-+ masm.abiret();
-+
-+ return offset;
-+}
-+
-+void JitRuntime::generateBailoutTailStub(MacroAssembler& masm,
-+ Label* bailoutTail) {
-+ AutoCreatedBy acb(masm, "JitRuntime::generateBailoutTailStub");
-+
-+ masm.bind(bailoutTail);
-+ masm.generateBailoutTail(IntArgReg1, IntArgReg2);
-+}
-diff --git a/js/src/jit/shared/Assembler-shared.h b/js/src/jit/shared/Assembler-shared.h
-index d5fed2fabe31..490a9f5391e0 100644
---- a/js/src/jit/shared/Assembler-shared.h
-+++ b/js/src/jit/shared/Assembler-shared.h
-@@ -30,14 +30,15 @@
-
- #if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \
- defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
-- defined(JS_CODEGEN_WASM32) || defined(JS_CODEGEN_RISCV64)
-+ defined(JS_CODEGEN_WASM32) || defined(JS_CODEGEN_RISCV64) || \
-+ defined(JS_CODEGEN_PPC64)
- // Push return addresses callee-side.
- # define JS_USE_LINK_REGISTER
- #endif
-
- #if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_ARM64) || \
- defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || \
-- defined(JS_CODEGEN_ARM)
-+ defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_PPC64)
- // JS_CODELABEL_LINKMODE gives labels additional metadata
- // describing how Bind() should patch them.
- # define JS_CODELABEL_LINKMODE
-diff --git a/js/src/jit/shared/AtomicOperations-feeling-lucky-gcc.h b/js/src/jit/shared/AtomicOperations-feeling-lucky-gcc.h
-index a6909e560bef..d886cba2c7e6 100644
---- a/js/src/jit/shared/AtomicOperations-feeling-lucky-gcc.h
-+++ b/js/src/jit/shared/AtomicOperations-feeling-lucky-gcc.h
-@@ -46,7 +46,8 @@
- // code in this file.
-
- #if defined(JS_SIMULATOR_ARM64) || defined(JS_SIMULATOR_ARM) || \
-- defined(JS_SIMULATOR_MIPS64) || defined(JS_SIMULATOR_LOONG64)
-+ defined(JS_SIMULATOR_MIPS64) || defined(JS_SIMULATOR_LOONG64) || \
-+ defined(JS_SIMULATOR_PPC64)
- // On some x86 (32-bit) systems this will not work because the compiler does not
- // open-code 64-bit atomics. If so, try linking with -latomic. If that doesn't
- // work, you're mostly on your own.
-diff --git a/js/src/jit/shared/CodeGenerator-shared.cpp b/js/src/jit/shared/CodeGenerator-shared.cpp
-index ada87f1f11a2..14468356cf31 100644
---- a/js/src/jit/shared/CodeGenerator-shared.cpp
-+++ b/js/src/jit/shared/CodeGenerator-shared.cpp
-@@ -86,8 +86,8 @@ CodeGeneratorShared::CodeGeneratorShared(MIRGenerator* gen, LIRGraph* graph,
-
- #ifdef ENABLE_WASM_SIMD
- # if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) || \
-- defined(JS_CODEGEN_ARM64)
-- // On X64/x86 and ARM64, we don't need alignment for Wasm SIMD at this time.
-+ defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_PPC64)
-+ // On X64/x86, ARM64, and PPC64, we don't need alignment for Wasm SIMD at this time.
- # else
- # error \
- "we may need padding so that local slots are SIMD-aligned and the stack must be kept SIMD-aligned too."
-@@ -1075,7 +1075,7 @@ Label* CodeGeneratorShared::getJumpLabelForBranch(MBasicBlock* block) {
- // This function is not used for MIPS64/LOONG64/RISCV64. They have
- // branchToBlock.
- #if !defined(JS_CODEGEN_MIPS64) && !defined(JS_CODEGEN_LOONG64) && \
-- !defined(JS_CODEGEN_RISCV64)
-+ !defined(JS_CODEGEN_RISCV64) && !defined(JS_CODEGEN_PPC64)
- void CodeGeneratorShared::jumpToBlock(MBasicBlock* mir,
- Assembler::Condition cond) {
- // Skip past trivial blocks.
-diff --git a/js/src/jit/shared/Lowering-shared-inl.h b/js/src/jit/shared/Lowering-shared-inl.h
-index bdcc1da7d41a..b62f8f681df1 100644
---- a/js/src/jit/shared/Lowering-shared-inl.h
-+++ b/js/src/jit/shared/Lowering-shared-inl.h
-@@ -527,7 +527,7 @@ LAllocation LIRGeneratorShared::useRegisterOrNonDoubleConstant(
-
- #if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \
- defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_MIPS64) || \
-- defined(JS_CODEGEN_RISCV64)
-+ defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- LAllocation LIRGeneratorShared::useAnyOrConstant(MDefinition* mir) {
- return useRegisterOrConstant(mir);
- }
-diff --git a/js/src/js-config.mozbuild b/js/src/js-config.mozbuild
-index 22becaf4ecfb..ff5294825e9d 100644
---- a/js/src/js-config.mozbuild
-+++ b/js/src/js-config.mozbuild
-@@ -8,6 +8,7 @@ if (
- CONFIG["JS_CODEGEN_X64"]
- or CONFIG["JS_CODEGEN_ARM64"]
- or CONFIG["JS_CODEGEN_RISCV64"]
-+ or CONFIG["JS_CODEGEN_PPC64"]
- ):
- DEFINES["WASM_SUPPORTS_HUGE_MEMORY"] = True
-
-diff --git a/js/src/jsapi-tests/testJitABIcalls.cpp b/js/src/jsapi-tests/testJitABIcalls.cpp
-index b5c03a47dd83..887ad9e3d959 100644
---- a/js/src/jsapi-tests/testJitABIcalls.cpp
-+++ b/js/src/jsapi-tests/testJitABIcalls.cpp
-@@ -718,6 +718,9 @@ class JitABICall final : public jsapitest::RuntimeTest,
- #elif defined(JS_CODEGEN_RISCV64)
- Register base = t0;
- regs.take(base);
-+#elif defined(JS_CODEGEN_PPC64)
-+ Register base = r11;
-+ regs.take(base);
- #else
- # error "Unknown architecture!"
- #endif
-diff --git a/js/src/jsapi-tests/testWasmReturnCalls.cpp b/js/src/jsapi-tests/testWasmReturnCalls.cpp
-index 4728f2404ae8..a07ddb2f214e 100644
---- a/js/src/jsapi-tests/testWasmReturnCalls.cpp
-+++ b/js/src/jsapi-tests/testWasmReturnCalls.cpp
-@@ -32,7 +32,10 @@ BEGIN_TEST(testWasmCheckSlowCallMarkerHit) {
-
- masm.bind(&check);
- # ifdef JS_USE_LINK_REGISTER
--# if !defined(JS_CODEGEN_LOONG64) && !defined(JS_CODEGEN_MIPS64) && \
-+# if defined(JS_CODEGEN_PPC64)
-+ static constexpr Register ra = ABINonArgReg3;
-+ masm.xs_mflr(ra);
-+# elif !defined(JS_CODEGEN_LOONG64) && !defined(JS_CODEGEN_MIPS64) && \
- !defined(JS_CODEGEN_RISCV64)
- static constexpr Register ra = lr;
- # endif
-@@ -70,7 +73,10 @@ BEGIN_TEST(testWasmCheckSlowCallMarkerMiss) {
-
- masm.bind(&check);
- # ifdef JS_USE_LINK_REGISTER
--# if !defined(JS_CODEGEN_LOONG64) && !defined(JS_CODEGEN_MIPS64) && \
-+# if defined(JS_CODEGEN_PPC64)
-+ static constexpr Register ra = ABINonArgReg3;
-+ masm.xs_mflr(ra);
-+# elif !defined(JS_CODEGEN_LOONG64) && !defined(JS_CODEGEN_MIPS64) && \
- !defined(JS_CODEGEN_RISCV64)
- static constexpr Register ra = lr;
- # endif
-diff --git a/js/src/jsapi-tests/testsJit.cpp b/js/src/jsapi-tests/testsJit.cpp
-index a2dfe5d0196c..7f3dcca895d2 100644
---- a/js/src/jsapi-tests/testsJit.cpp
-+++ b/js/src/jsapi-tests/testsJit.cpp
-@@ -25,6 +25,14 @@ void PrepareJit(js::jit::MacroAssembler& masm) {
- #if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
- defined(JS_CODEGEN_RISCV64)
- save.add(js::jit::ra);
-+#elif defined(JS_CODEGEN_PPC64)
-+ // LR on PPC64 isn't a GPR; save it to the stack manually.
-+ {
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.xs_mflr(scratch);
-+ masm.as_stdu(scratch, StackPointer, -8);
-+ }
- #elif defined(JS_USE_LINK_REGISTER)
- save.add(js::jit::lr);
- #endif
-@@ -44,6 +52,8 @@ bool ExecuteJit(JSContext* cx, js::jit::MacroAssembler& masm) {
- #if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
- defined(JS_CODEGEN_RISCV64)
- restore.add(js::jit::ra);
-+#elif defined(JS_CODEGEN_PPC64)
-+ // LR will be restored manually after PopRegsInMask.
- #elif defined(JS_USE_LINK_REGISTER)
- restore.add(js::jit::lr);
- #endif
-@@ -55,6 +65,16 @@ bool ExecuteJit(JSContext* cx, js::jit::MacroAssembler& masm) {
-
- // Reset stack pointer.
- masm.SetStackPointer64(PseudoStackPointer64);
-+#elif defined(JS_CODEGEN_PPC64)
-+ // Restore LR from the stack and return.
-+ {
-+ UseScratchRegisterScope temps(masm);
-+ Register scratch = temps.Acquire();
-+ masm.as_ld(scratch, StackPointer, 0);
-+ masm.xs_mtlr(scratch);
-+ masm.as_addi(StackPointer, StackPointer, 8);
-+ }
-+ masm.as_blr();
- #else
- // Exit the JIT-ed code using the ABI return style.
- masm.abiret();
-diff --git a/js/src/shell/js.cpp b/js/src/shell/js.cpp
-index 45bc0796b964..20eb1231bb7f 100644
---- a/js/src/shell/js.cpp
-+++ b/js/src/shell/js.cpp
-@@ -7895,6 +7895,13 @@ static void SingleStepCallback(void* arg, jit::Simulator* sim, void* pc) {
- state.fp = (void*)sim->getRegister(jit::Simulator::fp);
- // see WasmTailCallFPScratchReg and CollapseWasmFrameFast
- state.tempFP = (void*)sim->getRegister(jit::Simulator::t3);
-+# elif defined(JS_SIMULATOR_PPC64)
-+ state.sp = (void*)sim->getRegister(jit::Simulator::sp);
-+ state.lr = (void*)sim->getLR();
-+ state.fp = (void*)sim->getRegister(jit::Simulator::fp);
-+ // WasmTailCallFPScratchReg = ABINonArgReg3 = r22 holds the unwind FP
-+ // during the wasm tail-call collapse window (RestoreFpRa unwind info).
-+ state.tempFP = (void*)sim->getRegister(jit::Simulator::r22);
- # else
- # error "NYI: Single-step profiling support"
- # endif
-@@ -13144,6 +13151,15 @@ bool InitOptionParser(OptionParser& op) {
- "Stop the RISC-V simulator after the given "
- "NUMBER of instructions.",
- -1) ||
-+#endif
-+#ifdef JS_SIMULATOR_PPC64
-+ !op.addBoolOption('\0', "ppc64-sim-icache-checks",
-+ "Enable icache flush checks in the PPC64 "
-+ "simulator.") ||
-+ !op.addIntOption('\0', "ppc64-sim-stop-at", "NUMBER",
-+ "Stop the PPC64 simulator after the given "
-+ "NUMBER of instructions.",
-+ -1) ||
- #endif
- !op.addIntOption('\0', "nursery-size", "SIZE-MB",
- "Set the maximum nursery size in MB",
-@@ -14235,6 +14251,15 @@ bool SetContextJITOptions(JSContext* cx, const OptionParser& op) {
- if (stopAt >= 0) {
- jit::Simulator::StopSimAt = stopAt;
- }
-+#elif defined(JS_SIMULATOR_PPC64)
-+ if (op.getBoolOption("ppc64-sim-icache-checks")) {
-+ jit::SimulatorProcess::ICacheCheckingDisableCount = 0;
-+ }
-+
-+ int32_t stopAt = op.getIntOption("ppc64-sim-stop-at");
-+ if (stopAt >= 0) {
-+ jit::Simulator::StopSimAt = stopAt;
-+ }
- #endif
-
- #ifdef DEBUG
-diff --git a/js/src/shell/jsshell.h b/js/src/shell/jsshell.h
-index e8d47ba6888c..57e2b15f3cdd 100644
---- a/js/src/shell/jsshell.h
-+++ b/js/src/shell/jsshell.h
-@@ -22,7 +22,8 @@
-
- // Some platform hooks must be implemented for single-step profiling.
- #if defined(JS_SIMULATOR_ARM) || defined(JS_SIMULATOR_MIPS64) || \
-- defined(JS_SIMULATOR_ARM64) || defined(JS_SIMULATOR_LOONG64)
-+ defined(JS_SIMULATOR_ARM64) || defined(JS_SIMULATOR_LOONG64) || \
-+ defined(JS_SIMULATOR_RISCV64) || defined(JS_SIMULATOR_PPC64)
- # define SINGLESTEP_PROFILING
- #endif
-
-diff --git a/js/src/tests/shell/os.js b/js/src/tests/shell/os.js
-index 929982756548..f3d2396b17eb 100644
---- a/js/src/tests/shell/os.js
-+++ b/js/src/tests/shell/os.js
-@@ -20,7 +20,13 @@ var info = os.waitpid(kidpid, true);
- assertEq(info.hasOwnProperty("pid"), false);
- assertEq(info.hasOwnProperty("exitStatus"), false);
-
--os.kill(kidpid);
-+// Use SIGKILL (9) instead of the default SIGINT: under heavy parallel test
-+// load, SIGINT delivery can race with the child's signal-handler setup and
-+// the kernel's reaping path, leading to waitpid below blocking until the
-+// `sleep 60` exits normally. SIGKILL is uncatchable and forces immediate
-+// termination, so the assertion below ("killed process should not have
-+// exitStatus") is reliable.
-+os.kill(kidpid, 9);
-
- info = os.waitpid(kidpid);
- assertEq(info.hasOwnProperty("pid"), true, "waiting on dead process should return pid");
-diff --git a/js/src/util/Poison.h b/js/src/util/Poison.h
-index 721ecff6149d..de7981aa6f60 100644
---- a/js/src/util/Poison.h
-+++ b/js/src/util/Poison.h
-@@ -92,6 +92,8 @@ const uint8_t JS_SCOPE_DATA_TRAILING_NAMES_PATTERN = 0xCC;
- #elif defined(JS_CODEGEN_RISCV64)
- # define JS_SWEPT_CODE_PATTERN \
- 0x29 // illegal sb instruction, crashes in user mode.
-+#elif defined(JS_CODEGEN_PPC64)
-+# define JS_SWEPT_CODE_PATTERN 0x00 // illegal instruction (all zeros)
- #else
- # error "JS_SWEPT_CODE_PATTERN not defined for this platform"
- #endif
-diff --git a/js/src/wasm/WasmAnyRef.h b/js/src/wasm/WasmAnyRef.h
-index f81d4c6171b6..7200e9ab0e23 100644
---- a/js/src/wasm/WasmAnyRef.h
-+++ b/js/src/wasm/WasmAnyRef.h
-@@ -209,7 +209,7 @@ class AnyRef {
- // Truncate the value to the 31-bit value size.
- uintptr_t wideValue = uintptr_t(value & 0x7FFFFFFF);
- #elif defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_MIPS64) || \
-- defined(JS_CODEGEN_RISCV64)
-+ defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- // Sign extend the value to the native pointer size.
- uintptr_t wideValue = uintptr_t(int64_t((uint64_t(value) << 33)) >> 33);
- #elif !defined(JS_64BIT)
-@@ -234,6 +234,11 @@ class AnyRef {
- # ifdef DEBUG
- # if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM64)
- MOZ_ASSERT(value <= UINT32_MAX);
-+# elif defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_MIPS64) || \
-+ defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
-+ // On sign-extending platforms, a canonical i32 must be the sign
-+ // extension of its low 32 bits.
-+ MOZ_ASSERT(value == uintptr_t(int64_t(int32_t(value))));
- # endif
- # endif
- }
-diff --git a/js/src/wasm/WasmBCDefs.h b/js/src/wasm/WasmBCDefs.h
-index b44e91e28693..66a8c9afe8c6 100644
---- a/js/src/wasm/WasmBCDefs.h
-+++ b/js/src/wasm/WasmBCDefs.h
-@@ -44,6 +44,9 @@
- #if defined(JS_CODEGEN_RISCV64)
- # include "jit/riscv64/Assembler-riscv64.h"
- #endif
-+#if defined(JS_CODEGEN_PPC64)
-+# include "jit/ppc64/Assembler-ppc64.h"
-+#endif
- #include "js/ScalarType.h"
- #include "util/Memory.h"
- #include "wasm/WasmCodegenTypes.h"
-@@ -151,6 +154,10 @@ enum class RhsDestOp { True = true };
- # define RABALDR_PIN_INSTANCE
- #endif
-
-+#ifdef JS_CODEGEN_PPC64
-+# define RABALDR_PIN_INSTANCE
-+#endif
-+
- // Max number of pushes onto the value stack for any opcode or emitter that
- // does not push a variable, unbounded amount (anything with multiple
- // results). This includes also intermediate pushes such as values pushed as
-diff --git a/js/src/wasm/WasmBCMemory.cpp b/js/src/wasm/WasmBCMemory.cpp
-index 835512b09b8c..9137b09f4684 100644
---- a/js/src/wasm/WasmBCMemory.cpp
-+++ b/js/src/wasm/WasmBCMemory.cpp
-@@ -372,7 +372,7 @@ void BaseCompiler::boundsCheckBelow4GBAccess(uint32_t memoryIndex,
- // Make sure the ptr could be used as an index register.
- static inline void ToValidIndex(MacroAssembler& masm, RegI32 ptr) {
- #if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
-- defined(JS_CODEGEN_RISCV64)
-+ defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- // When ptr is used as an index, it will be added to a 64-bit register.
- // So we should explicitly promote ptr to 64-bit. Since now ptr holds a
- // unsigned 32-bit value, we zero-extend it to 64-bit here.
-@@ -645,6 +645,13 @@ void BaseCompiler::executeLoad(MemoryAccessDesc* access, AccessCheck* check,
- } else {
- masm.wasmLoad(*access, memoryBase, ptr, ptr, dest.any());
- }
-+#elif defined(JS_CODEGEN_PPC64)
-+ MOZ_ASSERT(temp.isInvalid());
-+ if (dest.tag == AnyReg::I64) {
-+ masm.wasmLoadI64(*access, memoryBase, ptr, ptr, dest.i64());
-+ } else {
-+ masm.wasmLoad(*access, memoryBase, ptr, ptr, dest.any());
-+ }
- #else
- MOZ_CRASH("BaseCompiler platform hook: load");
- #endif
-@@ -675,10 +682,11 @@ void BaseCompiler::load(MemoryAccessDesc* access, AccessCheck* check,
- // generated is the same for the 64-bit and the 32-bit case.
- return executeLoad(access, check, instance, memoryBase, RegI32(ptr.reg), dest,
- maybeFromI64(temp));
--#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64)
-- // On mips64 and loongarch64, the 'prepareMemoryAccess' function will make
-- // sure that ptr holds a valid 64-bit index value. Thus the code generated in
-- // 'executeLoad' is the same for the 64-bit and the 32-bit case.
-+#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
-+ defined(JS_CODEGEN_PPC64)
-+ // On mips64, loongarch64, and ppc64, the 'prepareMemoryAccess' function will
-+ // make sure that ptr holds a valid 64-bit index value. Thus the code
-+ // generated in 'executeLoad' is the same for the 64-bit and the 32-bit case.
- return executeLoad(access, check, instance, memoryBase, RegI32(ptr.reg), dest,
- maybeFromI64(temp));
- #elif defined(JS_CODEGEN_RISCV64)
-@@ -788,6 +796,13 @@ void BaseCompiler::executeStore(MemoryAccessDesc* access, AccessCheck* check,
- } else {
- masm.wasmStore(*access, src.any(), memoryBase, ptr, ptr);
- }
-+#elif defined(JS_CODEGEN_PPC64)
-+ MOZ_ASSERT(temp.isInvalid());
-+ if (access->type() == Scalar::Int64) {
-+ masm.wasmStoreI64(*access, src.i64(), memoryBase, ptr, ptr);
-+ } else {
-+ masm.wasmStore(*access, src.any(), memoryBase, ptr, ptr);
-+ }
- #else
- MOZ_CRASH("BaseCompiler platform hook: store");
- #endif
-@@ -812,7 +827,7 @@ void BaseCompiler::store(MemoryAccessDesc* access, AccessCheck* check,
- maybeFromI64(temp));
- #elif defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM64) || \
- defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
-- defined(JS_CODEGEN_RISCV64)
-+ defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- return executeStore(access, check, instance, memoryBase, RegI32(ptr.reg), src,
- maybeFromI64(temp));
- #else
-@@ -1295,7 +1310,8 @@ static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps& temps) {
- bc->freeI32(temps.t0);
- }
-
--#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64)
-+#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
-+ defined(JS_CODEGEN_PPC64)
-
- struct Temps {
- RegI32 t0, t1, t2;
-@@ -1504,7 +1520,7 @@ static void Deallocate(BaseCompiler* bc, AtomicOp op, RegI64 rv, RegI64 temp) {
- }
-
- #elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \
-- defined(JS_CODEGEN_LOONG64)
-+ defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_PPC64)
-
- static void PopAndAllocate(BaseCompiler* bc, AtomicOp op, RegI64* rd,
- RegI64* rv, RegI64* temp) {
-@@ -1678,7 +1694,8 @@ static void Deallocate(BaseCompiler* bc, RegI32 rv, const Temps&) {
- bc->freeI32(rv);
- }
-
--#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64)
-+#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
-+ defined(JS_CODEGEN_PPC64)
-
- struct Temps {
- RegI32 t0, t1, t2;
-@@ -1844,7 +1861,7 @@ static void Deallocate(BaseCompiler* bc, RegI64 rd, RegI64 rv) {
- }
-
- #elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \
-- defined(JS_CODEGEN_LOONG64)
-+ defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_PPC64)
-
- static void PopAndAllocate(BaseCompiler* bc, RegI64* rd, RegI64* rv) {
- *rv = bc->popI64();
-@@ -2017,7 +2034,8 @@ static void Deallocate(BaseCompiler* bc, RegI32 rexpect, RegI32 rnew,
- bc->freeI32(rexpect);
- }
-
--#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64)
-+#elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
-+ defined(JS_CODEGEN_PPC64)
-
- struct Temps {
- RegI32 t0, t1, t2;
-@@ -2287,7 +2305,7 @@ static void Deallocate(BaseCompiler* bc, RegI64 rexpect, RegI64 rnew) {
- }
-
- #elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \
-- defined(JS_CODEGEN_LOONG64)
-+ defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_PPC64)
-
- template <typename RegAddressType>
- static void PopAndAllocate(BaseCompiler* bc, RegI64* rexpect, RegI64* rnew,
-@@ -2885,6 +2903,11 @@ void BaseCompiler::loadExtend(MemoryAccessDesc* access, Scalar::Type viewType) {
- RegI64 rs = popI64();
- RegV128 rd = needV128();
- masm.moveGPR64ToDouble(rs, rd);
-+#ifdef JS_CODEGEN_PPC64
-+ // mtvsrd places value in BE dw0 (= LE dw1). widenLow* operates on LE dw0.
-+ // Swap dwords to move loaded data to the correct half.
-+ masm.as_xxpermdi(rd, rd, rd, 2);
-+#endif
- switch (viewType) {
- case Scalar::Int8:
- masm.widenLowInt8x16(rd, rd);
-diff --git a/js/src/wasm/WasmBCRegDefs.h b/js/src/wasm/WasmBCRegDefs.h
-index bb84f0863de2..fd37bd464f39 100644
---- a/js/src/wasm/WasmBCRegDefs.h
-+++ b/js/src/wasm/WasmBCRegDefs.h
-@@ -118,6 +118,13 @@ static constexpr Register RabaldrScratchI32 = CallTempReg2;
- static constexpr Register RabaldrScratchI32 = CallTempReg2;
- #endif
-
-+#ifdef JS_CODEGEN_PPC64
-+# define RABALDR_SCRATCH_I32
-+// Use r25 (callee-saved, non-arg, not used by any wasm infrastructure)
-+// instead of CallTempReg2 (r10) which is IntArgReg7.
-+static constexpr Register RabaldrScratchI32 = r25;
-+#endif
-+
- #ifdef RABALDR_SCRATCH_F32_ALIASES_F64
- # if !defined(RABALDR_SCRATCH_F32) || !defined(RABALDR_SCRATCH_F64)
- # error "Bad configuration"
-@@ -386,8 +393,9 @@ struct SpecificRegs {
-
- SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
- };
--#elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \
-- defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64)
-+#elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \
-+ defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || \
-+ defined(JS_CODEGEN_PPC64)
- struct SpecificRegs {
- // Required by gcc.
- SpecificRegs() {}
-diff --git a/js/src/wasm/WasmBaselineCompile.cpp b/js/src/wasm/WasmBaselineCompile.cpp
-index 2af7ad7f583b..c57180dd362b 100644
---- a/js/src/wasm/WasmBaselineCompile.cpp
-+++ b/js/src/wasm/WasmBaselineCompile.cpp
-@@ -376,11 +376,15 @@ void BaseCompiler::tableSwitch(Label* theTable, RegI32 switchValue,
- masm.ma_ldr(DTRAddr(scratch, DtrRegImmShift(switchValue, LSL, 2)), pc, Offset,
- Assembler::Always);
- #elif defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
-- defined(JS_CODEGEN_RISCV64)
-+ defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- ScratchI32 scratch(*this);
- CodeLabel tableCl;
-
-+# if defined(JS_CODEGEN_PPC64)
-+ masm.mov(&tableCl, scratch);
-+# else
- masm.ma_li(scratch, &tableCl);
-+# endif
-
- tableCl.target()->bind(theTable->offset());
- masm.addCodeLabel(tableCl);
-@@ -898,7 +902,7 @@ void BaseCompiler::insertBreakablePoint(CallSiteKind kind) {
- masm.append(CallSiteDesc(iter_.lastOpcodeOffset(), kind),
- CodeOffset(masm.currentOffset()));
- #elif defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_MIPS64) || \
-- defined(JS_CODEGEN_RISCV64)
-+ defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- ScratchPtr scratch(*this);
- Label L;
- masm.loadPtr(Address(InstanceReg, Instance::offsetOfDebugStub()), scratch);
-@@ -972,7 +976,7 @@ void BaseCompiler::insertPerFunctionDebugStub() {
- masm.ma_bx(lr, Assembler::Zero);
- }
- #elif defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_MIPS64) || \
-- defined(JS_CODEGEN_RISCV64)
-+ defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- {
- ScratchPtr scratch(*this);
-
-@@ -1403,7 +1407,7 @@ void BaseCompiler::popStackResults(ABIResultIter& iter, StackHeight stackBase) {
- switch (v.kind()) {
- case Stk::ConstI32:
- #if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
-- defined(JS_CODEGEN_RISCV64)
-+ defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- fr.storeImmediatePtrToStack(v.i32val_, resultHeight, temp);
- #else
- fr.storeImmediatePtrToStack(uint32_t(v.i32val_), resultHeight, temp);
-@@ -1723,6 +1727,13 @@ void BaseCompiler::passArg(ValType type, const Stk& arg, FunctionCall* call) {
- argLoc.offsetFromArgBase()));
- } else {
- loadI32(arg, RegI32(argLoc.gpr()));
-+#ifdef JS_CODEGEN_PPC64
-+ // addi can sign-extend, which yields wrong values when the C++
-+ // callee expects a uint32_t. Clear the upper 32 bits.
-+ if (call->abiKind == ABIKind::System) {
-+ masm.as_rldicl(argLoc.gpr(), argLoc.gpr(), 0, 32);
-+ }
-+#endif
- }
- break;
- }
-@@ -2372,9 +2383,10 @@ void BaseCompiler::finishTryNote(size_t tryNoteIndex) {
- RegI32 BaseCompiler::needRotate64Temp() {
- #if defined(JS_CODEGEN_X86)
- return needI32();
--#elif defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) || \
-- defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \
-- defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64)
-+#elif defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) || \
-+ defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \
-+ defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || \
-+ defined(JS_CODEGEN_PPC64)
- return RegI32::Invalid();
- #else
- MOZ_CRASH("BaseCompiler platform hook: needRotate64Temp");
-@@ -2433,6 +2445,8 @@ void BaseCompiler::popAndAllocateForMulI64(RegI64* r0, RegI64* r1,
- pop2xI64(r0, r1);
- #elif defined(JS_CODEGEN_RISCV64)
- pop2xI64(r0, r1);
-+#elif defined(JS_CODEGEN_PPC64)
-+ pop2xI64(r0, r1);
- #else
- MOZ_CRASH("BaseCompiler porting interface: popAndAllocateForMulI64");
- #endif
-@@ -2866,6 +2880,9 @@ static RegI32 PopcntTemp(BaseCompiler& bc) {
- defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
- defined(JS_CODEGEN_RISCV64)
- return bc.needI32();
-+#elif defined(JS_CODEGEN_PPC64)
-+ // PPC64 has native popcntd/popcntw; no temp register needed.
-+ return RegI32::Invalid();
- #else
- MOZ_CRASH("BaseCompiler platform hook: PopcntTemp");
- #endif
-@@ -9362,6 +9379,11 @@ static void MulI64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd,
- RegV128 temp1, RegV128 temp2) {
- masm.mulInt64x2(rsd, rs, rsd, temp1, temp2);
- }
-+# elif defined(JS_CODEGEN_PPC64)
-+static void MulI64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd,
-+ RegV128 temp1, RegV128 temp2) {
-+ masm.mulInt64x2(rsd, rs, rsd, temp1, temp2);
-+}
- # endif
-
- static void MulF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
-@@ -9376,7 +9398,8 @@ static void DivF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
- masm.divFloat64x2(rsd, rs, rsd);
- }
-
--# if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
-+# if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) || \
-+ defined(JS_CODEGEN_PPC64)
- static void MinF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd,
- RegV128 temp1, RegV128 temp2) {
- masm.minFloat32x4(rsd, rs, rsd, temp1, temp2);
-@@ -9397,6 +9420,22 @@ static void MaxF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd,
- masm.maxFloat64x2(rsd, rs, rsd, temp1, temp2);
- }
-
-+# if defined(JS_CODEGEN_PPC64)
-+// PPC64: use non-RhsDestOp convention (first=rhs, second=lhsDest),
-+// matching the pseudoMin/Max function signature.
-+static void PMinF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
-+ masm.pseudoMinFloat32x4(rs, rsd);
-+}
-+static void PMinF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
-+ masm.pseudoMinFloat64x2(rs, rsd);
-+}
-+static void PMaxF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
-+ masm.pseudoMaxFloat32x4(rs, rsd);
-+}
-+static void PMaxF64x2(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
-+ masm.pseudoMaxFloat64x2(rs, rsd);
-+}
-+# else
- static void PMinF32x4(MacroAssembler& masm, RegV128 rsd, RegV128 rs,
- RhsDestOp) {
- masm.pseudoMinFloat32x4(rsd, rs);
-@@ -9416,6 +9455,7 @@ static void PMaxF64x2(MacroAssembler& masm, RegV128 rsd, RegV128 rs,
- RhsDestOp) {
- masm.pseudoMaxFloat64x2(rsd, rs);
- }
-+# endif
- # elif defined(JS_CODEGEN_ARM64)
- static void MinF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
- masm.minFloat32x4(rs, rsd);
-@@ -9806,6 +9846,68 @@ static void ShiftRightI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
- masm.rightShiftInt64x2(rsd, temp, rsd);
- }
-
-+static void ShiftRightUI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
-+ RegI32 temp) {
-+ ShiftOpMask(masm, SimdOp::I64x2ShrU, rs, temp);
-+ masm.unsignedRightShiftInt64x2(rsd, temp, rsd);
-+}
-+# elif defined(JS_CODEGEN_PPC64)
-+// PPC64: same as ARM64 pattern (shift amount in GPR, result in vector reg)
-+static void ShiftLeftI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
-+ RegI32 temp) {
-+ ShiftOpMask(masm, SimdOp::I8x16Shl, rs, temp);
-+ masm.leftShiftInt8x16(rsd, temp, rsd);
-+}
-+static void ShiftLeftI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
-+ RegI32 temp) {
-+ ShiftOpMask(masm, SimdOp::I16x8Shl, rs, temp);
-+ masm.leftShiftInt16x8(rsd, temp, rsd);
-+}
-+static void ShiftLeftI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
-+ RegI32 temp) {
-+ ShiftOpMask(masm, SimdOp::I32x4Shl, rs, temp);
-+ masm.leftShiftInt32x4(rsd, temp, rsd);
-+}
-+static void ShiftLeftI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
-+ RegI32 temp) {
-+ ShiftOpMask(masm, SimdOp::I64x2Shl, rs, temp);
-+ masm.leftShiftInt64x2(rsd, temp, rsd);
-+}
-+static void ShiftRightI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
-+ RegI32 temp) {
-+ ShiftOpMask(masm, SimdOp::I8x16ShrS, rs, temp);
-+ masm.rightShiftInt8x16(rsd, temp, rsd);
-+}
-+static void ShiftRightUI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
-+ RegI32 temp) {
-+ ShiftOpMask(masm, SimdOp::I8x16ShrU, rs, temp);
-+ masm.unsignedRightShiftInt8x16(rsd, temp, rsd);
-+}
-+static void ShiftRightI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
-+ RegI32 temp) {
-+ ShiftOpMask(masm, SimdOp::I16x8ShrS, rs, temp);
-+ masm.rightShiftInt16x8(rsd, temp, rsd);
-+}
-+static void ShiftRightUI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
-+ RegI32 temp) {
-+ ShiftOpMask(masm, SimdOp::I16x8ShrU, rs, temp);
-+ masm.unsignedRightShiftInt16x8(rsd, temp, rsd);
-+}
-+static void ShiftRightI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
-+ RegI32 temp) {
-+ ShiftOpMask(masm, SimdOp::I32x4ShrS, rs, temp);
-+ masm.rightShiftInt32x4(rsd, temp, rsd);
-+}
-+static void ShiftRightUI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
-+ RegI32 temp) {
-+ ShiftOpMask(masm, SimdOp::I32x4ShrU, rs, temp);
-+ masm.unsignedRightShiftInt32x4(rsd, temp, rsd);
-+}
-+static void ShiftRightI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
-+ RegI32 temp) {
-+ ShiftOpMask(masm, SimdOp::I64x2ShrS, rs, temp);
-+ masm.rightShiftInt64x2(rsd, temp, rsd);
-+}
- static void ShiftRightUI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd,
- RegI32 temp) {
- ShiftOpMask(masm, SimdOp::I64x2ShrU, rs, temp);
-@@ -10107,6 +10209,23 @@ static void BitmaskI32x4(MacroAssembler& masm, RegV128 rs, RegI32 rd,
- masm.bitmaskInt32x4(rs, rd, temp);
- }
-
-+static void BitmaskI64x2(MacroAssembler& masm, RegV128 rs, RegI32 rd,
-+ RegV128 temp) {
-+ masm.bitmaskInt64x2(rs, rd, temp);
-+}
-+# elif defined(JS_CODEGEN_PPC64)
-+static void BitmaskI8x16(MacroAssembler& masm, RegV128 rs, RegI32 rd,
-+ RegV128 temp) {
-+ masm.bitmaskInt8x16(rs, rd, temp);
-+}
-+static void BitmaskI16x8(MacroAssembler& masm, RegV128 rs, RegI32 rd,
-+ RegV128 temp) {
-+ masm.bitmaskInt16x8(rs, rd, temp);
-+}
-+static void BitmaskI32x4(MacroAssembler& masm, RegV128 rs, RegI32 rd,
-+ RegV128 temp) {
-+ masm.bitmaskInt32x4(rs, rd, temp);
-+}
- static void BitmaskI64x2(MacroAssembler& masm, RegV128 rs, RegI32 rd,
- RegV128 temp) {
- masm.bitmaskInt64x2(rs, rd, temp);
-@@ -10182,6 +10301,13 @@ static void BitselectV128(MacroAssembler& masm, RegV128 rhs, RegV128 control,
- masm.bitwiseSelectSimd128(lhsDest, rhs, temp);
- masm.moveSimd128(temp, lhsDest);
- }
-+# elif defined(JS_CODEGEN_PPC64)
-+static void BitselectV128(MacroAssembler& masm, RegV128 rhs, RegV128 control,
-+ RegV128 lhsDest, RegV128 temp) {
-+ masm.moveSimd128(control, temp);
-+ masm.bitwiseSelectSimd128(lhsDest, rhs, temp);
-+ masm.moveSimd128(temp, lhsDest);
-+}
- # endif
-
- # ifdef ENABLE_WASM_RELAXED_SIMD
-@@ -10257,7 +10383,7 @@ void BaseCompiler::emitDotI8x16I7x16AddS() {
- RegV128 rsd = popV128();
- RegV128 rs0, rs1;
- pop2xV128(&rs0, &rs1);
--# if defined(JS_CODEGEN_ARM64)
-+# if defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_PPC64)
- RegV128 temp = needV128();
- masm.dotInt8x16Int7x16ThenAdd(rs0, rs1, rsd, temp);
- freeV128(temp);
-@@ -10463,7 +10589,7 @@ bool BaseCompiler::emitVectorLaneSelect() {
- freeV128(lhs);
- freeV128(mask);
- pushV128(rhsDest);
--# elif defined(JS_CODEGEN_ARM64)
-+# elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_PPC64)
- RegV128 maskDest = popV128();
- RegV128 rhs = popV128();
- RegV128 lhs = popV128();
-@@ -12628,7 +12754,7 @@ bool js::wasm::BaselinePlatformSupport() {
- #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) || \
- defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \
- defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
-- defined(JS_CODEGEN_RISCV64)
-+ defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- return true;
- #else
- return false;
-diff --git a/js/src/wasm/WasmCodegenConstants.h b/js/src/wasm/WasmCodegenConstants.h
-index 9c10d307ae6f..e25332b5464e 100644
---- a/js/src/wasm/WasmCodegenConstants.h
-+++ b/js/src/wasm/WasmCodegenConstants.h
-@@ -43,7 +43,8 @@ static const unsigned InterpFailInstanceReg = 0xbad;
- // The following thresholds were derived from a microbenchmark. If we begin to
- // ship this optimization for more platforms, we will need to extend this list.
-
--#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM64)
-+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM64) || \
-+ defined(JS_CODEGEN_PPC64)
- static const uint32_t MaxInlineMemoryCopyLength = 64;
- static const uint32_t MaxInlineMemoryFillLength = 64;
- #elif defined(JS_CODEGEN_X86)
-diff --git a/js/src/wasm/WasmCodegenTypes.cpp b/js/src/wasm/WasmCodegenTypes.cpp
-index 8b9f32639ea3..e906c4afecc3 100644
---- a/js/src/wasm/WasmCodegenTypes.cpp
-+++ b/js/src/wasm/WasmCodegenTypes.cpp
-@@ -144,14 +144,15 @@ void TrapSitesForKind::checkInvariants(const uint8_t* codeBase) const {
- last = pcOffset;
- }
-
--# if (defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) || \
-- defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_ARM) || \
-- defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_MIPS64))
-+# if (defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) || \
-+ defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_ARM) || \
-+ defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_MIPS64) || \
-+ defined(JS_CODEGEN_PPC64))
- // Check that each trapsite is associated with a plausible instruction. The
- // required instruction kind depends on the trapsite kind.
- //
-- // NOTE: currently enabled on x86_{32,64}, arm{32,64}, loongson64 and mips64.
-- // Ideally it should be extended to riscv64 too.
-+ // NOTE: currently enabled on x86_{32,64}, arm{32,64}, loongson64, mips64,
-+ // and ppc64. Ideally it should be extended to riscv64 too.
- //
- for (uint32_t i = 0; i < length(); i++) {
- uint32_t pcOffset = pcOffsets_[i];
-diff --git a/js/src/wasm/WasmCompile.cpp b/js/src/wasm/WasmCompile.cpp
-index 051c60ebaa55..89447aa668ff 100644
---- a/js/src/wasm/WasmCompile.cpp
-+++ b/js/src/wasm/WasmCompile.cpp
-@@ -71,8 +71,9 @@ uint32_t wasm::ObservedCPUFeatures() {
- ARM64 = 0x6,
- LOONG64 = 0x7,
- RISCV64 = 0x8,
-+ PPC64 = 0x9,
-
-- LAST = RISCV64,
-+ LAST = PPC64,
- ARCH_BITS = 4
- };
-
-@@ -101,6 +102,9 @@ uint32_t wasm::ObservedCPUFeatures() {
- #elif defined(JS_CODEGEN_RISCV64)
- MOZ_ASSERT(jit::GetRISCV64Flags() <= (UINT32_MAX >> ARCH_BITS));
- return RISCV64 | (jit::GetRISCV64Flags() << ARCH_BITS);
-+#elif defined(JS_CODEGEN_PPC64)
-+ MOZ_ASSERT(jit::GetPPC64Flags() <= (UINT32_MAX >> ARCH_BITS));
-+ return PPC64 | (jit::GetPPC64Flags() << ARCH_BITS);
- #elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32)
- return 0;
- #else
-diff --git a/js/src/wasm/WasmFrameIter.cpp b/js/src/wasm/WasmFrameIter.cpp
-index b3b264bc625a..b540acf9a05d 100644
---- a/js/src/wasm/WasmFrameIter.cpp
-+++ b/js/src/wasm/WasmFrameIter.cpp
-@@ -622,6 +622,19 @@ static const unsigned PushedFP = 16;
- static const unsigned SetFP = 20;
- static const unsigned PoppedFP = 4;
- static const unsigned PoppedFPJitEntry = 8;
-+#elif defined(JS_CODEGEN_PPC64)
-+// pushReturnAddress = mflr(4) + stdu(4) = 8 bytes.
-+// push(FP) = stdu(4) = 4 bytes (PPC64 stdu is a single DS-form instruction).
-+// moveStackPtrTo = mr(4) = 4 bytes.
-+static const unsigned PushedRetAddr = 8;
-+static const unsigned PushedFP = 12;
-+static const unsigned SetFP = 16;
-+// Callable + jit-entry epilogues between poppedFP and *ret are:
-+// mtlr r0; addi sp, sp, 16 (two 4-byte instructions — 8 bytes).
-+// mtlr must come before addi so LR holds the caller's RA throughout the
-+// post-poppedFP window (single-step profiling fires every instruction).
-+static const unsigned PoppedFP = 8;
-+static const unsigned PoppedFPJitEntry = 8;
- #elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32)
- // Synthetic values to satisfy asserts and avoid compiler warnings.
- static const unsigned PushedRetAddr = 0;
-@@ -710,6 +723,17 @@ static void GenerateCallablePrologue(MacroAssembler& masm, uint32_t* entry) {
- masm.moveStackPtrTo(FramePointer);
- MOZ_ASSERT_IF(!masm.oom(), SetFP == masm.currentOffset() - *entry);
- }
-+#elif defined(JS_CODEGEN_PPC64)
-+ {
-+ *entry = masm.currentOffset();
-+
-+ masm.pushReturnAddress();
-+ MOZ_ASSERT_IF(!masm.oom(), PushedRetAddr == masm.currentOffset() - *entry);
-+ masm.push(FramePointer);
-+ MOZ_ASSERT_IF(!masm.oom(), PushedFP == masm.currentOffset() - *entry);
-+ masm.moveStackPtrTo(FramePointer);
-+ MOZ_ASSERT_IF(!masm.oom(), SetFP == masm.currentOffset() - *entry);
-+ }
- #elif defined(JS_CODEGEN_ARM64)
- {
- // We do not use the PseudoStackPointer. However, we may be called in a
-@@ -803,6 +827,38 @@ static void GenerateCallableEpilogue(MacroAssembler& masm, unsigned framePushed,
- masm.jalr(zero, ra, 0);
- masm.nop();
- }
-+#elif defined(JS_CODEGEN_PPC64)
-+ // Load RA and FP from the Frame while it's still on the stack.
-+ // Using r0 (js::jit::r0) for RA is safe: it's volatile, used as
-+ // RT (not base), and we're in an epilogue where it's not live.
-+ masm.loadPtr(Address(StackPointer, Frame::returnAddressOffset()),
-+ js::jit::r0);
-+ masm.loadPtr(Address(StackPointer, Frame::callerFPOffset()), FramePointer);
-+
-+ // Fence the pool BEFORE capturing poppedFP. PoppedFP is a static 8
-+ // (mtlr + addi); enterNoPool itself can emit insertNopFill() and a
-+ // preemptive finishPool() at its top edge, so any leading insertions
-+ // must land before poppedFP — not between poppedFP and *ret. A pool
-+ // flush inside the 2-insn window would otherwise extend *ret - poppedFP
-+ // and trip the post-condition assertion below. P9 routes FP constants
-+ // through the pool so flushes are more frequent than on P8 (the
-+ // assertion was historically silent on P8 but reproducible on P9 dbgopt).
-+ masm.enterNoPool(2);
-+ poppedFP = masm.currentOffset();
-+
-+ // Move RA into LR BEFORE popping the Frame. If the order were addi/mtlr,
-+ // single-step profiling firing at the mtlr instruction would see: sp
-+ // already moved (so saved RA at sp[8] is gone), addi already executed,
-+ // and LR still holding the address right after the function's last `bl`
-+ // (i.e. inside this function, not the caller's RA). With mtlr first,
-+ // the entire post-poppedFP window has LR == caller's RA available
-+ // either via sp[8] (pre-addi) or registers.lr (post-mtlr).
-+ masm.xs_mtlr(js::jit::r0);
-+ masm.addToStackPtr(Imm32(sizeof(Frame)));
-+ *ret = masm.currentOffset();
-+ masm.leaveNoPool();
-+ masm.as_blr();
-+
- #elif defined(JS_CODEGEN_ARM64)
-
- // See comment at equivalent place in |GenerateCallablePrologue| above.
-@@ -1483,6 +1539,9 @@ void wasm::GenerateJitEntryPrologue(MacroAssembler& masm,
- BlockTrampolinePoolScope block_trampoline_pool(&masm, 10);
- offsets->begin = masm.currentOffset();
- masm.push(ra);
-+#elif defined(JS_CODEGEN_PPC64)
-+ offsets->begin = masm.currentOffset();
-+ masm.pushReturnAddress();
- #elif defined(JS_CODEGEN_ARM64)
- {
- AutoForbidPoolsAndNops afp(&masm,
-@@ -1536,6 +1595,20 @@ void wasm::GenerateJitEntryEpilogue(MacroAssembler& masm,
- masm.Ret(ARMRegister(lr, 64));
- masm.setFramePushed(0);
- }
-+#elif defined(JS_CODEGEN_PPC64)
-+ // Load RA and FP from the frame while it's still on the stack, then
-+ // restore LR, pop the frame, and return. mtlr must precede addi so LR
-+ // holds the caller's RA across the whole post-poppedFP window (see
-+ // GenerateCallableEpilogue for the matching rationale).
-+ masm.loadPtr(Address(StackPointer, Frame::returnAddressOffset()),
-+ js::jit::r0);
-+ masm.loadPtr(Address(StackPointer, Frame::callerFPOffset()), FramePointer);
-+ poppedFP = masm.currentOffset();
-+
-+ masm.xs_mtlr(js::jit::r0);
-+ masm.addToStackPtr(Imm32(sizeof(Frame)));
-+ offsets->ret = masm.currentOffset();
-+ masm.as_blr();
- #else
- // Forbid pools for the same reason as described in GenerateCallablePrologue.
- # if defined(JS_CODEGEN_ARM)
-@@ -1905,6 +1978,22 @@ bool js::wasm::StartUnwinding(const RegisterState& registers,
- fixedFP = fp;
- AssertMatchesCallSite(fixedPC, fixedFP);
- } else
-+#elif defined(JS_CODEGEN_PPC64)
-+ if (codeRange->isThunk()) {
-+ // The FarJumpIsland sequence temporary scrambles the link register.
-+ fixedPC = pc;
-+ fixedFP = fp;
-+ *unwoundCaller = false;
-+ AssertMatchesCallSite(
-+ Frame::fromUntaggedWasmExitFP(fp)->returnAddress(),
-+ Frame::fromUntaggedWasmExitFP(fp)->rawCaller());
-+ } else if (offsetFromEntry < PushedFP) {
-+ // On PPC64 the return address is in LR (registers.lr) until
-+ // pushReturnAddress() saves it to the stack.
-+ fixedPC = (uint8_t*)registers.lr;
-+ fixedFP = fp;
-+ AssertMatchesCallSite(fixedPC, fixedFP);
-+ } else
- #elif defined(JS_CODEGEN_ARM64)
- if (offsetFromEntry < SetFP || codeRange->isThunk()) {
- // On ARM64 we rely on register state instead of state saved on
-@@ -1956,6 +2045,35 @@ bool js::wasm::StartUnwinding(const RegisterState& registers,
- fixedPC = Frame::fromUntaggedWasmExitFP(sp)->returnAddress();
- fixedFP = fp;
- AssertMatchesCallSite(fixedPC, fixedFP);
-+#elif defined(JS_CODEGEN_PPC64)
-+ } else if (offsetInCode >= codeRange->ret() - PoppedFP &&
-+ offsetInCode < codeRange->ret()) {
-+ // PPC64 epilogue (RA loaded into r0, FP restored, RA not yet
-+ // moved to LR, SP not yet adjusted):
-+ // ld r0, 8(sp) ; restore caller's RA into r0
-+ // ld FP, 0(sp) ; restore caller's FP
-+ // <-- poppedFP -->
-+ // mtlr r0 ; LR := caller's RA
-+ // addi sp, sp, 16 ; pop the Frame
-+ // <-- ret -->
-+ // blr
-+ // In the [poppedFP, ret) window the addi has not run, so *sp
-+ // is still the saved Frame and sp[8] is the caller's RA.
-+ // (registers.lr would also be correct after mtlr executes, but
-+ // sp[8] is valid throughout this window — including before mtlr —
-+ // so we read it consistently.)
-+ MOZ_ASSERT(*sp == fp);
-+ fixedPC = Frame::fromUntaggedWasmExitFP(sp)->returnAddress();
-+ fixedFP = fp;
-+ AssertMatchesCallSite(fixedPC, fixedFP);
-+ } else if (offsetInCode == codeRange->ret()) {
-+ // PPC64 epilogue, at the blr: addi has run, so SP is the
-+ // caller's and *sp is unrelated memory. mtlr ran earlier in
-+ // the [poppedFP, ret) window, so LR holds the caller's RA.
-+ // fp holds the restored caller's FP.
-+ fixedPC = (uint8_t*)registers.lr;
-+ fixedFP = fp;
-+ AssertMatchesCallSite(fixedPC, fixedFP);
- #elif defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_LOONG64)
- // The stack pointer does not move until all values have
- // been restored so several cases can be coalesced here.
-diff --git a/js/src/wasm/WasmGC.cpp b/js/src/wasm/WasmGC.cpp
-index e59cd4f5aba0..21cd01fd1c5e 100644
---- a/js/src/wasm/WasmGC.cpp
-+++ b/js/src/wasm/WasmGC.cpp
-@@ -444,6 +444,14 @@ bool wasm::IsPlausibleStackMapKey(const uint8_t* nextPC) {
- insn[-1] == 0x00000013 /* addi zero, zero, 0 */) || // jal; nop
- (insn[-1] == 0x00100073 &&
- (insn[-2] & kITypeMask) == RO_CSRRWI))); // wasm trap
-+# elif defined(JS_CODEGEN_PPC64)
-+ const uint32_t* insn = reinterpret_cast<const uint32_t*>(nextPC);
-+ MOZ_ASSERT((uintptr_t(insn) & 3) == 0);
-+ // xs_trap() = tw 31,r0,r0 (PPC_trap); bctrl = PPC_bctr|1; bl = I-form
-+ // opcode 18 (PPC_b) with LK=1, AA=0, checked via 0xFC000003 mask.
-+ return insn[-1] == uint32_t(PPC_trap) ||
-+ insn[-1] == (uint32_t(PPC_bctr) | 1u) ||
-+ (insn[-1] & 0xFC000003u) == (uint32_t(PPC_b) | 1u);
- # else
- MOZ_CRASH("IsValidStackMapKey: requires implementation on this platform");
- # endif
-diff --git a/js/src/wasm/WasmGenerator.cpp b/js/src/wasm/WasmGenerator.cpp
-index 2dafac73e96a..07ffe150fc57 100644
---- a/js/src/wasm/WasmGenerator.cpp
-+++ b/js/src/wasm/WasmGenerator.cpp
-@@ -930,7 +930,23 @@ bool ModuleGenerator::finishCodeBlock(CodeBlockResult* result) {
- callSiteTargets_.clear();
- callFarJumps_.clear();
-
-- // None of the linking or far-jump operations should emit masm metadata.
-+ // None of the linking or far-jump operations should emit masm metadata,
-+ // except on PPC64 where patchFarJump uses addLongJump to create CodeLabels
-+ // for absolute-address far jumps. Drain those into linkData_ here.
-+#ifdef JS_CODEGEN_PPC64
-+ for (const jit::CodeLabel& codeLabel : masm_->codeLabels()) {
-+ LinkData::InternalLink link;
-+ link.patchAtOffset = codeLabel.patchAt().offset();
-+ link.targetOffset = codeLabel.target().offset();
-+# ifdef JS_CODELABEL_LINKMODE
-+ link.mode = codeLabel.linkMode();
-+# endif
-+ if (!linkData_->internalLinks.append(link)) {
-+ return false;
-+ }
-+ }
-+ masm_->codeLabels().clear();
-+#endif
-
- MOZ_ASSERT(masm_->inliningContext().empty());
- MOZ_ASSERT(masm_->callSites().empty());
-diff --git a/js/src/wasm/WasmIonCompile.cpp b/js/src/wasm/WasmIonCompile.cpp
-index 9c79b9cf0704..0d0e661770af 100644
---- a/js/src/wasm/WasmIonCompile.cpp
-+++ b/js/src/wasm/WasmIonCompile.cpp
-@@ -11602,7 +11602,7 @@ bool js::wasm::IonPlatformSupport() {
- #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) || \
- defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS64) || \
- defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_LOONG64) || \
-- defined(JS_CODEGEN_RISCV64)
-+ defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- return true;
- #else
- return false;
-diff --git a/js/src/wasm/WasmMemory.cpp b/js/src/wasm/WasmMemory.cpp
-index 0e3e6d3509ad..feee9f6ea1c9 100644
---- a/js/src/wasm/WasmMemory.cpp
-+++ b/js/src/wasm/WasmMemory.cpp
-@@ -288,9 +288,9 @@ static_assert(MaxMemoryAccessSize <= HugeUnalignedGuardPage,
- static_assert(HugeOffsetGuardLimit < UINT32_MAX,
- "checking for overflow against OffsetGuardLimit is enough.");
-
--// We have only tested huge memory on x64, arm64 and riscv64.
-+// We have only tested huge memory on x64, arm64, riscv64 and ppc64.
- # if !(defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM64) || \
-- defined(JS_CODEGEN_RISCV64))
-+ defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64))
- # error "Not an expected configuration"
- # endif
-
-diff --git a/js/src/wasm/WasmSignalHandlers.cpp b/js/src/wasm/WasmSignalHandlers.cpp
-index cc8bc2755745..84d3c4ec164d 100644
---- a/js/src/wasm/WasmSignalHandlers.cpp
-+++ b/js/src/wasm/WasmSignalHandlers.cpp
-@@ -111,7 +111,9 @@ using namespace js::wasm;
- # if defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || \
- defined(__PPC64LE__)
- # define R01_sig(p) ((p)->sc_frame.fixreg[1])
-+# define R31_sig(p) ((p)->sc_frame.fixreg[31])
- # define R32_sig(p) ((p)->sc_frame.srr0)
-+# define R36_sig(p) ((p)->sc_frame.lr)
- # endif
- # elif defined(__linux__) || defined(__sun)
- # if defined(__linux__)
-@@ -157,7 +159,9 @@ using namespace js::wasm;
- # if defined(__linux__) && (defined(__ppc64__) || defined(__PPC64__) || \
- defined(__ppc64le__) || defined(__PPC64LE__))
- # define R01_sig(p) ((p)->uc_mcontext.gp_regs[1])
-+# define R31_sig(p) ((p)->uc_mcontext.gp_regs[31])
- # define R32_sig(p) ((p)->uc_mcontext.gp_regs[32])
-+# define R36_sig(p) ((p)->uc_mcontext.gp_regs[36])
- # endif
- # if defined(__linux__) && defined(__loongarch__)
- # define EPC_sig(p) ((p)->uc_mcontext.__pc)
-@@ -200,7 +204,9 @@ using namespace js::wasm;
- # if defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || \
- defined(__PPC64LE__)
- # define R01_sig(p) ((p)->uc_mcontext.__gregs[_REG_R1])
-+# define R31_sig(p) ((p)->uc_mcontext.__gregs[_REG_R31])
- # define R32_sig(p) ((p)->uc_mcontext.__gregs[_REG_PC])
-+# define R36_sig(p) ((p)->uc_mcontext.__gregs[_REG_LR])
- # endif
- # elif defined(__DragonFly__) || defined(__FreeBSD__) || \
- defined(__FreeBSD_kernel__)
-@@ -234,7 +240,9 @@ using namespace js::wasm;
- # if defined(__FreeBSD__) && (defined(__ppc64__) || defined(__PPC64__) || \
- defined(__ppc64le__) || defined(__PPC64LE__))
- # define R01_sig(p) ((p)->uc_mcontext.mc_gpr[1])
-+# define R31_sig(p) ((p)->uc_mcontext.mc_gpr[31])
- # define R32_sig(p) ((p)->uc_mcontext.mc_srr0)
-+# define R36_sig(p) ((p)->uc_mcontext.mc_lr)
- # endif
- # elif defined(XP_DARWIN)
- # define EIP_sig(p) ((p)->thread.uts.ts32.__eip)
-@@ -412,7 +420,8 @@ struct macos_aarch64_context {
- defined(__PPC64LE__)
- # define PC_sig(p) R32_sig(p)
- # define SP_sig(p) R01_sig(p)
--# define FP_sig(p) R01_sig(p)
-+# define FP_sig(p) R31_sig(p)
-+# define LR_sig(p) R36_sig(p)
- # elif defined(__loongarch__)
- # define PC_sig(p) EPC_sig(p)
- # define FP_sig(p) RFP_sig(p)
-@@ -458,7 +467,8 @@ static uint8_t* ContextToSP(CONTEXT* context) {
- }
-
- # if defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
-- defined(__loongarch__) || defined(__riscv)
-+ defined(__loongarch__) || defined(__riscv) || \
-+ defined(__ppc64__) || defined(__PPC64__)
- static uint8_t* ContextToLR(CONTEXT* context) {
- # ifdef LR_sig
- return mozilla::BitwiseCast<uint8_t*>(LR_sig(context));
-@@ -475,7 +485,8 @@ static JS::ProfilingFrameIterator::RegisterState ToRegisterState(
- state.pc = ContextToPC(context);
- state.sp = ContextToSP(context);
- # if defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
-- defined(__loongarch__) || defined(__riscv)
-+ defined(__loongarch__) || defined(__riscv) || \
-+ defined(__ppc64__) || defined(__PPC64__)
- state.lr = ContextToLR(context);
- # else
- state.lr = (void*)UINTPTR_MAX;
-@@ -776,6 +787,9 @@ static void MachExceptionHandlerThread() {
-
- # if defined(__mips__) || defined(__loongarch__)
- static const uint32_t kWasmTrapSignal = SIGFPE;
-+# elif defined(__ppc64__) || defined(__PPC64__) || \
-+ defined(__ppc64le__) || defined(__PPC64LE__)
-+static const uint32_t kWasmTrapSignal = SIGTRAP;
- # else
- static const uint32_t kWasmTrapSignal = SIGILL;
- # endif
-diff --git a/js/src/wasm/WasmStacks.cpp b/js/src/wasm/WasmStacks.cpp
-index 71497353c5c1..6514d8b0e2e4 100644
---- a/js/src/wasm/WasmStacks.cpp
-+++ b/js/src/wasm/WasmStacks.cpp
-@@ -426,6 +426,30 @@ static constexpr size_t ContStackMaxJitStackSize = 10 * 1024 * 1024;
- // or stack snapshots utilities.
- static constexpr size_t ContStackRedZoneSize = 0x8000;
-
-+// Effective red-zone size used when laying out a continuation stack.
-+//
-+// The jit stack (and therefore the bottom guard page) must start on a page
-+// boundary; otherwise gc::ProtectPages trips MOZ_RELEASE_ASSERT(length %
-+// pageSize == 0). The red zone sits between the top guard page and the jit
-+// stack, so its size has to be a page multiple to keep that start aligned.
-+//
-+// Rounding the red zone up to a page is correct on every platform and would
-+// also cover any configuration whose page size exceeds ContStackRedZoneSize
-+// (32K) -- e.g. a 64K-page AArch64 kernel -- but ContStackRedZoneSize is
-+// already a multiple of the 4K/16K pages used on the tier-1 platforms, so the
-+// round-up is a no-op there today. We deliberately gate it to PPC64 (64K
-+// pages, where the round-up is load-bearing) so this patch cannot alter
-+// continuation stack layout on any tier-1 platform. Drop the gate if the
-+// general case is ever wanted.
-+static inline size_t ContStackEffectiveRedZoneSize(
-+ [[maybe_unused]] size_t pageSize) {
-+#ifdef JS_CODEGEN_PPC64
-+ return RoundUp(ContStackRedZoneSize, pageSize);
-+#else
-+ return ContStackRedZoneSize;
-+#endif
-+}
-+
- // Number of guard pages at the top and bottom of each continuation stack slot.
- static constexpr size_t ContStackTopGuardPages = 1;
- static constexpr size_t ContStackBottomGuardPages = 1;
-@@ -444,8 +468,8 @@ void ContStackSize::compute() {
- ContStackMinJitStackSize, ContStackMaxJitStackSize),
- pageSize);
- headerSize = RoundUp(sizeof(ContStack), pageSize);
-- totalSize = topGuardSize + ContStackRedZoneSize + jitStackSize +
-- bottomGuardSize + headerSize;
-+ totalSize = topGuardSize + ContStackEffectiveRedZoneSize(pageSize) +
-+ jitStackSize + bottomGuardSize + headerSize;
-
- // Assert we can't overflow when multiplying our size by capacity. Assume
- // 32-bit integers to be conservative.
-@@ -467,7 +491,8 @@ void ContStack::init(ContStackArena* arena, uintptr_t allocationBase,
- uintptr_t topGuardPagePhysicalStart = allocationBase;
- uintptr_t topGuardPagePhysicalEnd = allocationBase + topGuardPageSize;
- uintptr_t redZonePhysicalStart = topGuardPagePhysicalEnd;
-- uintptr_t jitStackPhysicalStart = redZonePhysicalStart + ContStackRedZoneSize;
-+ uintptr_t jitStackPhysicalStart =
-+ redZonePhysicalStart + ContStackEffectiveRedZoneSize(pageSize);
- uintptr_t jitStackPhysicalEnd = jitStackPhysicalStart + jitStackSize;
- uintptr_t bottomGuardPagePhysicalStart = jitStackPhysicalEnd;
- uintptr_t headerPhysicalStart =
-diff --git a/js/src/wasm/WasmStubs.cpp b/js/src/wasm/WasmStubs.cpp
-index 8a98e201a452..8497814fcd37 100644
---- a/js/src/wasm/WasmStubs.cpp
-+++ b/js/src/wasm/WasmStubs.cpp
-@@ -646,8 +646,9 @@ static bool GenerateInterpEntry(MacroAssembler& masm, const FuncExport& fe,
-
- // Save the return address if it wasn't already saved by the call insn.
- #ifdef JS_USE_LINK_REGISTER
--# if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS64) || \
-- defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64)
-+# if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS64) || \
-+ defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || \
-+ defined(JS_CODEGEN_PPC64)
- masm.pushReturnAddress();
- # elif defined(JS_CODEGEN_ARM64)
- // WasmPush updates framePushed() unlike pushReturnAddress(), but that's
-@@ -2123,9 +2124,10 @@ static bool GenerateImportInterpExit(MacroAssembler& masm, const FuncImport& fi,
- // The native ABI preserves the instance, heap and global registers since they
- // are non-volatile.
- MOZ_ASSERT(NonVolatileRegs.has(InstanceReg));
--#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) || \
-- defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \
-- defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64)
-+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) || \
-+ defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS64) || \
-+ defined(JS_CODEGEN_LOONG64) || defined(JS_CODEGEN_RISCV64) || \
-+ defined(JS_CODEGEN_PPC64)
- MOZ_ASSERT(NonVolatileRegs.has(HeapReg));
- #endif
-
-@@ -2571,6 +2573,15 @@ bool wasm::GenerateBuiltinThunk(MacroAssembler& masm, ABIFunctionType abiType,
- Register::FromCode(regId + 1));
- }
- }
-+#endif
-+#ifdef JS_CODEGEN_PPC64
-+ // PPC64 32-bit operations do not zero-extend to 64 bits (unlike
-+ // x86-64/ARM64/LA64). The ELFv2 ABI requires callers to zero/sign-extend
-+ // narrow args. Wasm i32 values may have garbage upper bits in 64-bit
-+ // registers, so zero-extend them before calling C++ builtins.
-+ if (selfArgs.mirType() == MIRType::Int32) {
-+ masm.move32ZeroExtendToPtr(selfArgs->gpr(), selfArgs->gpr());
-+ }
- #endif
- continue;
- }
-@@ -2659,6 +2670,28 @@ static const LiveRegisterSet RegsToPreserve(
- # ifdef ENABLE_WASM_SIMD
- # error "high lanes of SIMD registers need to be saved too."
- # endif
-+#elif defined(JS_CODEGEN_PPC64)
-+// Exclude r0 (ScratchRegister, not allocatable, special addressing semantics),
-+// r1 (SP), r2 (TOC pointer, reserved), and r13 (TLS pointer, reserved).
-+static const LiveRegisterSet RegsToPreserve(
-+ GeneralRegisterSet(Registers::AllMask & ~((uint32_t(1) << Registers::r0) |
-+ (uint32_t(1) << Registers::r1) |
-+ (uint32_t(1) << Registers::r2) |
-+ (uint32_t(1) << Registers::r13))),
-+# ifdef ENABLE_WASM_SIMD
-+ // Unlike ARM64, where the vector registers alias the doubles, PPC64
-+ // doubles live in the FPRs (VSR0-31) while wasm v128 values live in the
-+ // VRs (VSR32-63) -- two disjoint physical pools, so both must be
-+ // preserved. Saving only the doubles loses the entire live v128 state: a
-+ // trap firing while a v128 is live (notably the interrupt-check trap,
-+ // which fires constantly in hot loops) resumes with whatever the C++
-+ // handler's libc left in the VRs (e.g. glibc's vector memcpy leaves lvsl
-+ // alignment-control patterns in low VRs).
-+ FloatRegisterSet(FloatRegisters::AllDoubleMask |
-+ FloatRegisters::AllSimd128Mask));
-+# else
-+ FloatRegisterSet(FloatRegisters::AllDoubleMask));
-+# endif
- #elif defined(JS_CODEGEN_ARM64)
- // We assume that traps do not happen while lr is live. This both ensures that
- // the size of RegsToPreserve is a multiple of 2 (preserving WasmStackAlignment)
-diff --git a/js/src/wasm/WasmSummarizeInsn.cpp b/js/src/wasm/WasmSummarizeInsn.cpp
-index 7bb4f4b7a725..2ae55a1b1b9e 100644
---- a/js/src/wasm/WasmSummarizeInsn.cpp
-+++ b/js/src/wasm/WasmSummarizeInsn.cpp
-@@ -1731,6 +1731,169 @@ Maybe<TrapMachineInsn> SummarizeTrapInstruction(const uint8_t* insnAddr) {
- return Nothing();
- }
-
-+// ================================================================== ppc64 ====
-+
-+# elif defined(JS_CODEGEN_PPC64)
-+
-+Maybe<TrapMachineInsn> SummarizeTrapInstruction(const uint8_t* insnAddr) {
-+ MOZ_ASSERT(0 == (uintptr_t(insnAddr) & 3));
-+
-+ const uint32_t insn = *(uint32_t*)insnAddr;
-+ const uint32_t majorOp = insn >> 26;
-+ // X-form secondary opcode: bits 10..1.
-+ const uint32_t xo = (insn >> 1) & 0x3FF;
-+
-+ // PPC_trap = 0x7FE00008 = tw 31,0,0.
-+ if (insn == 0x7FE00008) {
-+ return Some(TrapMachineInsn::OfficialUD);
-+ }
-+
-+ // D-form / DS-form loads.
-+ switch (majorOp) {
-+ case 34: // lbz
-+ return Some(TrapMachineInsn::Load8);
-+ case 40: // lhz
-+ case 42: // lha
-+ return Some(TrapMachineInsn::Load16);
-+ case 32: // lwz
-+ return Some(TrapMachineInsn::Load32);
-+ case 58: // ld (DS=0) / lwa (DS=2)
-+ if ((insn & 3) == 2) {
-+ return Some(TrapMachineInsn::Load32); // lwa
-+ }
-+ return Some(TrapMachineInsn::Load64); // ld
-+ case 48: // lfs
-+ return Some(TrapMachineInsn::Load32);
-+ case 50: // lfd
-+ return Some(TrapMachineInsn::Load64);
-+ default:
-+ break;
-+ }
-+
-+ // D-form / DS-form stores.
-+ switch (majorOp) {
-+ case 38: // stb
-+ return Some(TrapMachineInsn::Store8);
-+ case 44: // sth
-+ return Some(TrapMachineInsn::Store16);
-+ case 36: // stw
-+ case 37: // stwu
-+ return Some(TrapMachineInsn::Store32);
-+ case 52: // stfs
-+ return Some(TrapMachineInsn::Store32);
-+ case 62: // std (DS=0) / stdu (DS=1)
-+ return Some(TrapMachineInsn::Store64);
-+ case 54: // stfd
-+ case 55: // stfdu
-+ return Some(TrapMachineInsn::Store64);
-+ default:
-+ break;
-+ }
-+
-+ // X-form instructions (major opcode 31).
-+ if (majorOp == 31) {
-+ switch (xo) {
-+ // Indexed loads.
-+ case 87: // lbzx
-+ return Some(TrapMachineInsn::Load8);
-+ case 279: // lhzx
-+ case 343: // lhax
-+ return Some(TrapMachineInsn::Load16);
-+ case 23: // lwzx
-+ return Some(TrapMachineInsn::Load32);
-+ case 21: // ldx
-+ return Some(TrapMachineInsn::Load64);
-+ case 535: // lfsx
-+ case 855: // lfiwax
-+ case 887: // lfiwzx
-+ return Some(TrapMachineInsn::Load32);
-+ case 599: // lfdx
-+ return Some(TrapMachineInsn::Load64);
-+ case 790: // lhbrx (byte-reverse halfword)
-+ return Some(TrapMachineInsn::Load16);
-+ case 534: // lwbrx (byte-reverse word)
-+ return Some(TrapMachineInsn::Load32);
-+
-+ // Indexed stores.
-+ case 215: // stbx
-+ return Some(TrapMachineInsn::Store8);
-+ case 407: // sthx
-+ return Some(TrapMachineInsn::Store16);
-+ case 151: // stwx
-+ return Some(TrapMachineInsn::Store32);
-+ case 149: // stdx
-+ return Some(TrapMachineInsn::Store64);
-+ case 663: // stfsx
-+ return Some(TrapMachineInsn::Store32);
-+ case 727: // stfdx
-+ return Some(TrapMachineInsn::Store64);
-+ case 918: // sthbrx (byte-reverse halfword store)
-+ return Some(TrapMachineInsn::Store16);
-+ case 662: // stwbrx (byte-reverse word store)
-+ return Some(TrapMachineInsn::Store32);
-+
-+ // VSX SIMD indexed load/store (XX1-form, same major opcode 31).
-+ case 268: // lxvx (POWER9)
-+ case 844: // lxvd2x (POWER8)
-+ return Some(TrapMachineInsn::Load128);
-+ case 396: // stxvx (POWER9)
-+ case 972: // stxvd2x (POWER8)
-+ return Some(TrapMachineInsn::Store128);
-+
-+ // Atomic (load-reserve / store-conditional).
-+ case 20: // lwarx
-+ case 52: // lbarx (POWER7+)
-+ case 84: // ldarx
-+ case 116: // lharx (POWER7+)
-+ return Some(TrapMachineInsn::Atomic);
-+ default:
-+ break;
-+ }
-+ // stwcx. (XO=150, Rc=1), stdcx. (XO=214, Rc=1), stbcx. (XO=694, Rc=1)
-+ // and sthcx. (XO=726, Rc=1) have bit 0 set. Note xo above already
-+ // discards bit 0, so we need a separate low-11-bit match.
-+ const uint32_t xoRc = insn & 0x7FF; // bits 10..0
-+ if (xoRc == ((150 << 1) | 1) || xoRc == ((214 << 1) | 1) ||
-+ xoRc == ((694 << 1) | 1) || xoRc == ((726 << 1) | 1)) {
-+ return Some(TrapMachineInsn::Atomic);
-+ }
-+ }
-+
-+ // POWER10 prefixed loads/stores (major opcode 1). The trap-site PC
-+ // points at the prefix word; the actual load/store kind is encoded in
-+ // the suffix word at insnAddr + 4. The 64-byte-boundary rule
-+ // (ensurePrefixedAlignment) guarantees the suffix is in the same block.
-+ if (majorOp == 1) {
-+ const uint32_t suffix = *(uint32_t*)(insnAddr + 4);
-+ const uint32_t suffixOp6 = suffix >> 26; // 6-bit suffix op
-+ const uint32_t suffixOp5 = suffix >> 27; // 5-bit suffix op (plxv/pstxv)
-+ switch (suffixOp6) {
-+ case 57: // pld
-+ return Some(TrapMachineInsn::Load64);
-+ case 50: // plfd
-+ return Some(TrapMachineInsn::Load64);
-+ case 48: // plfs
-+ return Some(TrapMachineInsn::Load32);
-+ case 61: // pstd
-+ return Some(TrapMachineInsn::Store64);
-+ case 54: // pstfd
-+ return Some(TrapMachineInsn::Store64);
-+ case 52: // pstfs
-+ return Some(TrapMachineInsn::Store32);
-+ default:
-+ break;
-+ }
-+ if (suffixOp5 == 25) { // plxv
-+ return Some(TrapMachineInsn::Load128);
-+ }
-+ if (suffixOp5 == 27) { // pstxv
-+ return Some(TrapMachineInsn::Store128);
-+ }
-+ }
-+
-+ return Nothing();
-+}
-+
- // ================================================================== none ====
-
- # elif defined(JS_CODEGEN_NONE)
-diff --git a/js/src/wasm/WasmValue.cpp b/js/src/wasm/WasmValue.cpp
-index fda0996851e1..45fff24fa582 100644
---- a/js/src/wasm/WasmValue.cpp
-+++ b/js/src/wasm/WasmValue.cpp
-@@ -430,7 +430,7 @@ bool ToWebAssemblyValue_i32(JSContext* cx, HandleValue val, int32_t* loc,
- bool ok = ToInt32(cx, val, loc);
- if (ok && mustWrite64) {
- #if defined(JS_CODEGEN_MIPS64) || defined(JS_CODEGEN_LOONG64) || \
-- defined(JS_CODEGEN_RISCV64)
-+ defined(JS_CODEGEN_RISCV64) || defined(JS_CODEGEN_PPC64)
- loc[1] = loc[0] >> 31;
- #else
- loc[1] = 0;
-diff --git a/mfbt/Assertions.h b/mfbt/Assertions.h
-index a436d019a197..4887af7e7676 100644
---- a/mfbt/Assertions.h
-+++ b/mfbt/Assertions.h
-@@ -282,6 +282,11 @@ static inline void MOZ_CrashSequence(void* aAddress, intptr_t aLine) {
- "st.d %1,%0,0;\n" // Write the line number to the crashing address
- : // no output registers
- : "r"(aAddress), "r"(aLine));
-+# elif defined(__powerpc64__)
-+ asm volatile(
-+ "std %1,0(%0);\n" // Write the line number to the crashing address
-+ : // no output registers
-+ : "r"(aAddress), "r"(aLine));
- # else
- # warning \
- "Unsupported architecture, replace the code below with assembly suitable to crash the process"
---
-2.52.0
-
diff --git a/firefox.spec b/firefox.spec
index be8abaf..06a6900 100644
--- a/firefox.spec
+++ b/firefox.spec
@@ -281,11 +281,6 @@ Patch600: pgo.patch
Patch602: mozilla-1516803.patch
Patch603: firefox-gcc-always-inline.patch
-# ppc64le JIT
-Patch900: 0001-Add-VSX-instructions-for-SKIA.patch
-Patch901: 0002-Add-VSX-instructions-for-libwebp.patch
-Patch902: 0003-Add-PPC64LE-JIT-backend.patch
-
%if %{?system_nss}
BuildRequires: pkgconfig(nspr) >= %{nspr_version}
@@ -606,11 +601,6 @@ cat %{SOURCE49} | sed -e "s|LIBCLANG_RT_PLACEHOLDER|`pwd`/wasi-sdk-30/build/sysr
%endif
%patch -P603 -p1 -b .inline
-# ppc64le JIT
-%patch -P900 -p1
-%patch -P901 -p1
-%patch -P902 -p1
-
rm -f .mozconfig
cp %{SOURCE10} .mozconfig
echo "ac_add_options --enable-default-toolkit=cairo-gtk3-wayland" >> .mozconfig
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2026-06-16 13:11 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-06-16 13:11 [rpms/firefox] rawhide: Revert "add ppc64le JIT"
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox