Skip to content

Commit

Permalink
Make heap large chunk bit twiddling branchless (#4526)
Browse files Browse the repository at this point in the history
* Make heap large chunk bit twiddling branchless

We use some bit twiddling operations for setting some flags on large
chunks in actor heaps. Before this commit, the functionality was
implemented with a ternary operator which would result in a branch.

This commit changes the logic to use bit shifts instead to avoid
the branching.

* use `#define` preprocessor macros
  • Loading branch information
dipinhora authored Oct 15, 2024
1 parent e1f9e64 commit 411e798
Showing 1 changed file with 18 additions and 6 deletions.
24 changes: 18 additions & 6 deletions src/libponyrt/mem/heap.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,12 @@ typedef struct small_chunk_t
#define SMALL_CHUNK_SIZECLASS_BITMASK (uintptr_t)0x1C
// TODO: figure out how to calculate the `2` from `SMALL_CHUNK_SIZECLASS_BITMASK` at compile time
#define SMALL_CHUNK_SIZECLASS_SHIFT (uintptr_t)2
#define LARGE_CHUNK_SLOT_BITMASK (uintptr_t)0x4
#define LARGE_CHUNK_SHALLOW_BITMASK (uintptr_t)0x8
#define LARGE_CHUNK_FINALISER_BITMASK (uintptr_t)0x10
#define LARGE_CHUNK_SLOT_BITMASK_SHIFT_AMOUNT (uintptr_t)2
#define LARGE_CHUNK_SLOT_BITMASK ((uintptr_t)0x1 << LARGE_CHUNK_SLOT_BITMASK_SHIFT_AMOUNT)
#define LARGE_CHUNK_SHALLOW_BITMASK_SHIFT_AMOUNT (uintptr_t)3
#define LARGE_CHUNK_SHALLOW_BITMASK ((uintptr_t)0x1 << LARGE_CHUNK_SHALLOW_BITMASK_SHIFT_AMOUNT)
#define LARGE_CHUNK_FINALISER_BITMASK_SHIFT_AMOUNT (uintptr_t)4
#define LARGE_CHUNK_FINALISER_BITMASK ((uintptr_t)0x1 << LARGE_CHUNK_FINALISER_BITMASK_SHIFT_AMOUNT)
#define CHUNK_M_BITMASK ~(CHUNK_TYPE_BITMASK | CHUNK_NEEDS_TO_BE_CLEARED_BITMASK | SMALL_CHUNK_SIZECLASS_BITMASK | LARGE_CHUNK_SLOT_BITMASK | LARGE_CHUNK_SHALLOW_BITMASK | LARGE_CHUNK_FINALISER_BITMASK)

enum
Expand Down Expand Up @@ -172,7 +175,10 @@ static void set_large_chunk_slot(large_chunk_t* chunk, uint32_t slot)
{
// `!!` to normalize to 1 or 0
slot = !!slot;
((chunk_t*)chunk)->m = (char*)(((uintptr_t)((chunk_t*)chunk)->m & ~LARGE_CHUNK_SLOT_BITMASK) | (slot == 1 ? LARGE_CHUNK_SLOT_BITMASK : 0));
// left shift size to get bits in the right spot for OR'ing into `chunk->m`
slot = slot << LARGE_CHUNK_SLOT_BITMASK_SHIFT_AMOUNT;
pony_assert(slot == LARGE_CHUNK_SLOT_BITMASK || slot == 0);
((chunk_t*)chunk)->m = (char*)(((uintptr_t)((chunk_t*)chunk)->m & ~LARGE_CHUNK_SLOT_BITMASK) | slot);
}

static uint32_t get_large_chunk_shallow(large_chunk_t* chunk)
Expand All @@ -185,7 +191,10 @@ static void set_large_chunk_shallow(large_chunk_t* chunk, uint32_t shallow)
{
// `!!` to normalize to 1 or 0
shallow = !!shallow;
((chunk_t*)chunk)->m = (char*)(((uintptr_t)((chunk_t*)chunk)->m & ~LARGE_CHUNK_SHALLOW_BITMASK) | (shallow == 1 ? LARGE_CHUNK_SHALLOW_BITMASK : 0));
// left shift size to get bits in the right spot for OR'ing into `chunk->m`
shallow = shallow << LARGE_CHUNK_SHALLOW_BITMASK_SHIFT_AMOUNT;
pony_assert(shallow == LARGE_CHUNK_SHALLOW_BITMASK || shallow == 0);
((chunk_t*)chunk)->m = (char*)(((uintptr_t)((chunk_t*)chunk)->m & ~LARGE_CHUNK_SHALLOW_BITMASK) | shallow);
}

static uint32_t get_large_chunk_finaliser(large_chunk_t* chunk)
Expand All @@ -198,7 +207,10 @@ static void set_large_chunk_finaliser(large_chunk_t* chunk, uint32_t finaliser)
{
// `!!` to normalize to 1 or 0
finaliser = !!finaliser;
((chunk_t*)chunk)->m = (char*)(((uintptr_t)((chunk_t*)chunk)->m & ~LARGE_CHUNK_FINALISER_BITMASK) | (finaliser == 1 ? LARGE_CHUNK_FINALISER_BITMASK : 0));
// left shift size to get bits in the right spot for OR'ing into `chunk->m`
finaliser = finaliser << LARGE_CHUNK_FINALISER_BITMASK_SHIFT_AMOUNT;
pony_assert(finaliser == LARGE_CHUNK_FINALISER_BITMASK || finaliser == 0);
((chunk_t*)chunk)->m = (char*)(((uintptr_t)((chunk_t*)chunk)->m & ~LARGE_CHUNK_FINALISER_BITMASK) | finaliser);
}

static size_t get_small_chunk_size(small_chunk_t* chunk)
Expand Down

0 comments on commit 411e798

Please sign in to comment.