Skip to content

Commit

Permalink
Bug 1940795: Don't tear aligned subwords in AtomicMemcpy. r=spidermon…
Browse files Browse the repository at this point in the history
…key-reviewers,jandem

Reads/writes to integer typed arrays mustn't tear, so they can't be split into
single byte operations.

Replace single byte copy loops with `AtomicCopy{Down,Up}NoTearIfAlignedUnsynchronized`,
which performs additional checks when aligned word or dword copying is needed.

Test262 tests: <tc39/test262#4369>

Differential Revision: https://phabricator.services.mozilla.com/D233710
  • Loading branch information
anba committed Jan 14, 2025
1 parent 4827890 commit 869019a
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 19 deletions.
16 changes: 15 additions & 1 deletion js/src/jit/GenerateAtomicOperations.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,9 @@ def gen_copy(fun_name, cpp_type, size, unroll, direction):
if size == 1:
insns += fmt_insn("movb OFFSET(%[src]), %[scratch]")
insns += fmt_insn("movb %[scratch], OFFSET(%[dst])")
elif size == 2:
insns += fmt_insn("movw OFFSET(%[src]), %[scratch]")
insns += fmt_insn("movw %[scratch], OFFSET(%[dst])")
elif size == 4:
insns += fmt_insn("movl OFFSET(%[src]), %[scratch]")
insns += fmt_insn("movl %[scratch], OFFSET(%[dst])")
Expand All @@ -675,6 +678,12 @@ def gen_copy(fun_name, cpp_type, size, unroll, direction):
if size == 1:
insns += fmt_insn("ldrb %w[scratch], [%x[src], OFFSET]")
insns += fmt_insn("strb %w[scratch], [%x[dst], OFFSET]")
elif size == 2:
insns += fmt_insn("ldrh %w[scratch], [%x[src], OFFSET]")
insns += fmt_insn("strh %w[scratch], [%x[dst], OFFSET]")
elif size == 4:
insns += fmt_insn("ldr %w[scratch], [%x[src], OFFSET]")
insns += fmt_insn("str %w[scratch], [%x[dst], OFFSET]")
else:
assert size == 8
insns += fmt_insn("ldr %x[scratch], [%x[src], OFFSET]")
Expand All @@ -683,6 +692,9 @@ def gen_copy(fun_name, cpp_type, size, unroll, direction):
if size == 1:
insns += fmt_insn("ldrb %[scratch], [%[src], #OFFSET]")
insns += fmt_insn("strb %[scratch], [%[dst], #OFFSET]")
elif size == 2:
insns += fmt_insn("ldrh %[scratch], [%[src], #OFFSET]")
insns += fmt_insn("strh %[scratch], [%[dst], #OFFSET]")
else:
assert size == 4
insns += fmt_insn("ldr %[scratch], [%[src], #OFFSET]")
Expand Down Expand Up @@ -864,7 +876,9 @@ def generate_atomics_header(c_out):
contents += gen_copy(
"AtomicCopyWordUnsynchronized", "uintptr_t", wordsize, 1, "down"
)
contents += gen_copy("AtomicCopyByteUnsynchronized", "uint8_t", 1, 1, "down")
contents += gen_copy("AtomicCopy32Unsynchronized", "uint32_t", 4, 1, "down")
contents += gen_copy("AtomicCopy16Unsynchronized", "uint16_t", 2, 1, "down")
contents += gen_copy("AtomicCopy8Unsynchronized", "uint8_t", 1, 1, "down")

contents += "\n"
contents += (
Expand Down
148 changes: 130 additions & 18 deletions js/src/jit/shared/AtomicOperations-shared-jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,26 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "mozilla/Assertions.h"
#include "mozilla/Attributes.h"
#include "mozilla/MathAlgorithms.h"

#include <atomic>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <tuple>
#include <utility>

#include "jit/AtomicOperations.h"
#include "js/GCAPI.h"

#if defined(__arm__)
# include "jit/arm/Architecture-arm.h"
#endif

#ifdef JS_HAVE_GENERATED_ATOMIC_OPS

# include <atomic>

# include "js/GCAPI.h"

using namespace js;
using namespace js::jit;

Expand Down Expand Up @@ -70,6 +78,64 @@ void AtomicCompilerFence() {
}
# endif

/**
* Return `true` if all pointers are aligned to `Alignment`.
*/
template <size_t Alignment>
static inline bool CanCopyAligned(const uint8_t* dest, const uint8_t* src,
const uint8_t* lim) {
static_assert(mozilla::IsPowerOfTwo(Alignment));
return ((uintptr_t(dest) | uintptr_t(src) | uintptr_t(lim)) &
(Alignment - 1)) == 0;
}

/**
* Return `true` if both pointers have the same alignment and can be aligned to
* `Alignment`.
*/
template <size_t Alignment>
static inline bool CanAlignTo(const uint8_t* dest, const uint8_t* src) {
static_assert(mozilla::IsPowerOfTwo(Alignment));
return ((uintptr_t(dest) ^ uintptr_t(src)) & (Alignment - 1)) == 0;
}

/**
* Copy a datum smaller than `WORDSIZE`. Prevents tearing when `dest` and `src`
* are both aligned.
*
* No tearing is a requirement for integer TypedArrays.
*
* https://tc39.es/ecma262/#sec-isnotearconfiguration
* https://tc39.es/ecma262/#sec-tear-free-aligned-reads
* https://tc39.es/ecma262/#sec-valid-executions
*/
static MOZ_ALWAYS_INLINE auto AtomicCopyDownNoTearIfAlignedUnsynchronized(
uint8_t* dest, const uint8_t* src, const uint8_t* srcEnd) {
MOZ_ASSERT(src <= srcEnd);
MOZ_ASSERT(size_t(srcEnd - src) < WORDSIZE);

if (WORDSIZE > 4 && CanCopyAligned<4>(dest, src, srcEnd)) {
static_assert(WORDSIZE <= 8, "copies 32-bits at most once");

if (src < srcEnd) {
AtomicCopy32Unsynchronized(dest, src);
dest += 4;
src += 4;
}
} else if (CanCopyAligned<2>(dest, src, srcEnd)) {
while (src < srcEnd) {
AtomicCopy16Unsynchronized(dest, src);
dest += 2;
src += 2;
}
} else {
while (src < srcEnd) {
AtomicCopy8Unsynchronized(dest++, src++);
}
}
return std::pair{dest, src};
}

void AtomicMemcpyDownUnsynchronized(uint8_t* dest, const uint8_t* src,
size_t nbytes) {
JS::AutoSuppressGCAnalysis nogc;
Expand All @@ -85,12 +151,14 @@ void AtomicMemcpyDownUnsynchronized(uint8_t* dest, const uint8_t* src,
void (*copyBlock)(uint8_t* dest, const uint8_t* src);
void (*copyWord)(uint8_t* dest, const uint8_t* src);

if (((uintptr_t(dest) ^ uintptr_t(src)) & WORDMASK) == 0) {
if (CanAlignTo<WORDSIZE>(dest, src)) {
const uint8_t* cutoff = (const uint8_t*)RoundUp(uintptr_t(src), WORDSIZE);
MOZ_ASSERT(cutoff <= lim); // because nbytes >= WORDSIZE
while (src < cutoff) {
AtomicCopyByteUnsynchronized(dest++, src++);
}

// Copy initial bytes to align to word size.
std::tie(dest, src) =
AtomicCopyDownNoTearIfAlignedUnsynchronized(dest, src, cutoff);

copyBlock = AtomicCopyBlockDownUnsynchronized;
copyWord = AtomicCopyWordUnsynchronized;
} else if (UnalignedAccessesAreOK()) {
Expand Down Expand Up @@ -118,11 +186,46 @@ void AtomicMemcpyDownUnsynchronized(uint8_t* dest, const uint8_t* src,
}
}

// Byte copy any remaining tail.
// Copy any remaining tail.

while (src < lim) {
AtomicCopyByteUnsynchronized(dest++, src++);
AtomicCopyDownNoTearIfAlignedUnsynchronized(dest, src, lim);
}

/**
* Copy a datum smaller than `WORDSIZE`. Prevents tearing when `dest` and `src`
* are both aligned.
*
* No tearing is a requirement for integer TypedArrays.
*
* https://tc39.es/ecma262/#sec-isnotearconfiguration
* https://tc39.es/ecma262/#sec-tear-free-aligned-reads
* https://tc39.es/ecma262/#sec-valid-executions
*/
static MOZ_ALWAYS_INLINE auto AtomicCopyUpNoTearIfAlignedUnsynchronized(
uint8_t* dest, const uint8_t* src, const uint8_t* srcBegin) {
MOZ_ASSERT(src >= srcBegin);
MOZ_ASSERT(size_t(src - srcBegin) < WORDSIZE);

if (WORDSIZE > 4 && CanCopyAligned<4>(dest, src, srcBegin)) {
static_assert(WORDSIZE <= 8, "copies 32-bits at most once");

if (src > srcBegin) {
dest -= 4;
src -= 4;
AtomicCopy32Unsynchronized(dest, src);
}
} else if (CanCopyAligned<2>(dest, src, srcBegin)) {
while (src > srcBegin) {
dest -= 2;
src -= 2;
AtomicCopy16Unsynchronized(dest, src);
}
} else {
while (src > srcBegin) {
AtomicCopy8Unsynchronized(--dest, --src);
}
}
return std::pair{dest, src};
}

void AtomicMemcpyUpUnsynchronized(uint8_t* dest, const uint8_t* src,
Expand All @@ -134,16 +237,23 @@ void AtomicMemcpyUpUnsynchronized(uint8_t* dest, const uint8_t* src,
src += nbytes;
dest += nbytes;

// Set up bulk copying. The cases are ordered the way they are on the
// assumption that if we can achieve aligned copies even with a little
// preprocessing then that is better than unaligned copying on a platform
// that supports it.

if (nbytes >= WORDSIZE) {
void (*copyBlock)(uint8_t* dest, const uint8_t* src);
void (*copyWord)(uint8_t* dest, const uint8_t* src);

if (((uintptr_t(dest) ^ uintptr_t(src)) & WORDMASK) == 0) {
if (CanAlignTo<WORDSIZE>(dest, src)) {
const uint8_t* cutoff = (const uint8_t*)(uintptr_t(src) & ~WORDMASK);
MOZ_ASSERT(cutoff >= lim); // Because nbytes >= WORDSIZE
while (src > cutoff) {
AtomicCopyByteUnsynchronized(--dest, --src);
}

// Copy initial bytes to align to word size.
std::tie(dest, src) =
AtomicCopyUpNoTearIfAlignedUnsynchronized(dest, src, cutoff);

copyBlock = AtomicCopyBlockUpUnsynchronized;
copyWord = AtomicCopyWordUnsynchronized;
} else if (UnalignedAccessesAreOK()) {
Expand All @@ -154,6 +264,8 @@ void AtomicMemcpyUpUnsynchronized(uint8_t* dest, const uint8_t* src,
copyWord = AtomicCopyUnalignedWordUpUnsynchronized;
}

// Bulk copy, first larger blocks and then individual words.

const uint8_t* blocklim = src - ((src - lim) & ~BLOCKMASK);
while (src > blocklim) {
dest -= BLOCKSIZE;
Expand All @@ -169,9 +281,9 @@ void AtomicMemcpyUpUnsynchronized(uint8_t* dest, const uint8_t* src,
}
}

while (src > lim) {
AtomicCopyByteUnsynchronized(--dest, --src);
}
// Copy any remaining tail.

AtomicCopyUpNoTearIfAlignedUnsynchronized(dest, src, lim);
}

} // namespace jit
Expand Down

0 comments on commit 869019a

Please sign in to comment.