Replaced SDL_SIMDAlloc(), SDL_SIMDRealloc(), and SDL_SIMDFree() with SDL_aligned_alloc() and SDL_aligned_free()

Fixes https://github.com/libsdl-org/SDL/issues/5641
This commit is contained in:
Sam Lantinga 2023-01-09 17:42:16 -08:00
parent 9597c482fa
commit 2aa9569b3e
13 changed files with 132 additions and 194 deletions

View file

@ -20,5 +20,6 @@ General:
* Added SDL_DelayNS() to specify a delay in nanoseconds, to the highest precision the system will support
* The timestamp member of the SDL_Event structure is now in nanoseconds, filled in with the time the event was generated, or the time it was queued if that's not available
* Added SDL_modf() and SDL_modff() to separate the whole and fractional portions of a floating point number
* Added SDL_aligned_alloc() and SDL_aligned_free() to allocate and free memory with a given alignment
* Added SDL_GetRenderVSync() to get vsync of the given renderer
* Added SDL_PlayAudioDevice() to start audio playback

View file

@ -76,6 +76,8 @@ Use the SDL_AudioDevice functions instead.
SDL_Has3DNow() has been removed; there is no replacement.
SDL_SIMDAlloc(), SDL_SIMDRealloc(), and SDL_SIMDFree() have been removed. You can use SDL_aligned_alloc() and SDL_aligned_free() with SDL_SIMDGetAlignment() to get the same functionality.
The following headers are no longer automatically included, and will need to be included manually:
- immintrin.h
- mm3dnow.h

View file

@ -367,96 +367,12 @@ extern DECLSPEC int SDLCALL SDL_GetSystemRAM(void);
* instructions.
*
* \since This function is available since SDL 3.0.0.
*
* \sa SDL_aligned_alloc
* \sa SDL_aligned_free
*/
extern DECLSPEC size_t SDLCALL SDL_SIMDGetAlignment(void);
/**
* Allocate memory in a SIMD-friendly way.
*
* This will allocate a block of memory that is suitable for use with SIMD
* instructions. Specifically, it will be properly aligned and padded for the
* system's supported vector instructions.
*
* The memory returned will be padded such that it is safe to read or write an
* incomplete vector at the end of the memory block. This can be useful so you
* don't have to drop back to a scalar fallback at the end of your SIMD
* processing loop to deal with the final elements without overflowing the
* allocated buffer.
*
* You must free this memory with SDL_FreeSIMD(), not free() or SDL_free() or
* delete[], etc.
*
* Note that SDL will only deal with SIMD instruction sets it is aware of; for
* example, SDL 2.0.8 knows that SSE wants 16-byte vectors (SDL_HasSSE()), and
* AVX2 wants 32 bytes (SDL_HasAVX2()), but doesn't know that AVX-512 wants
* 64. To be clear: if you can't decide to use an instruction set with an
* SDL_Has*() function, don't use that instruction set with memory allocated
* through here.
*
* SDL_AllocSIMD(0) will return a non-NULL pointer, assuming the system isn't
* out of memory, but you are not allowed to dereference it (because you only
* own zero bytes of that buffer).
*
* \param len The length, in bytes, of the block to allocate. The actual
* allocated block might be larger due to padding, etc.
* \returns a pointer to the newly-allocated block, NULL if out of memory.
*
* \since This function is available since SDL 3.0.0.
*
* \sa SDL_SIMDGetAlignment
* \sa SDL_SIMDRealloc
* \sa SDL_SIMDFree
*/
extern DECLSPEC void * SDLCALL SDL_SIMDAlloc(const size_t len);
/**
* Reallocate memory obtained from SDL_SIMDAlloc
*
* It is not valid to use this function on a pointer from anything but
* SDL_SIMDAlloc(). It can't be used on pointers from malloc, realloc,
* SDL_malloc, memalign, new[], etc.
*
* \param mem The pointer obtained from SDL_SIMDAlloc. This function also
* accepts NULL, at which point this function is the same as
* calling SDL_SIMDAlloc with a NULL pointer.
* \param len The length, in bytes, of the block to allocated. The actual
* allocated block might be larger due to padding, etc. Passing 0
* will return a non-NULL pointer, assuming the system isn't out of
* memory.
* \returns a pointer to the newly-reallocated block, NULL if out of memory.
*
* \since This function is available since SDL 3.0.0.
*
* \sa SDL_SIMDGetAlignment
* \sa SDL_SIMDAlloc
* \sa SDL_SIMDFree
*/
extern DECLSPEC void * SDLCALL SDL_SIMDRealloc(void *mem, const size_t len);
/**
* Deallocate memory obtained from SDL_SIMDAlloc
*
* It is not valid to use this function on a pointer from anything but
* SDL_SIMDAlloc() or SDL_SIMDRealloc(). It can't be used on pointers from
* malloc, realloc, SDL_malloc, memalign, new[], etc.
*
* However, SDL_SIMDFree(NULL) is a legal no-op.
*
* The memory pointed to by `ptr` is no longer valid for access upon return,
* and may be returned to the system or reused by a future allocation. The
* pointer passed to this function is no longer safe to dereference once this
* function returns, and should be discarded.
*
* \param ptr The pointer, returned from SDL_SIMDAlloc or SDL_SIMDRealloc, to
* deallocate. NULL is a legal no-op.
*
* \since This function is available since SDL 3.0.0.
*
* \sa SDL_SIMDAlloc
* \sa SDL_SIMDRealloc
*/
extern DECLSPEC void SDLCALL SDL_SIMDFree(void *ptr);
/* Ends C function definitions when using C++ */
#ifdef __cplusplus
}

View file

@ -421,6 +421,34 @@ extern DECLSPEC int SDLCALL SDL_SetMemoryFunctions(SDL_malloc_func malloc_func,
SDL_realloc_func realloc_func,
SDL_free_func free_func);
/**
* Allocate memory aligned to a specific value
*
* If `alignment` is less than the size of `void *`, then it will be increased to match that.
*
* The returned memory address will be a multiple of the alignment value, and the amount of memory allocated will be a multiple of the alignment value.
*
* The memory returned by this function must be freed with SDL_aligned_free()
*
* \param alignment the alignment requested
* \param size the size to allocate
* \returns a pointer to the aligned memory
*
* \since This function is available since SDL 3.0.0.
*
* \sa SDL_aligned_free
*/
extern DECLSPEC SDL_MALLOC void *SDLCALL SDL_aligned_alloc(size_t alignment, size_t size);
/**
* Free memory allocated by SDL_aligned_alloc()
*
* \since This function is available since SDL 3.0.0.
*
* \sa SDL_aligned_alloc
*/
extern DECLSPEC void SDLCALL SDL_aligned_free(void *mem);
/**
* Get the number of outstanding (unfreed) allocations
*

View file

@ -139,7 +139,7 @@ static int CPU_haveCPUID(void)
: "%eax", "%ecx"
);
#elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__)
/* Technically, if this is being compiled under __x86_64__ then it has
/* Technically, if this is being compiled under __x86_64__ then it has
CPUid by definition. But it's nice to be able to prove it. :) */
__asm__ (
" pushfq # Get original EFLAGS \n"
@ -1094,91 +1094,6 @@ SDL_SIMDGetAlignment(void)
return SDL_SIMDAlignment;
}
void *
SDL_SIMDAlloc(const size_t len)
{
const size_t alignment = SDL_SIMDGetAlignment();
const size_t padding = (alignment - (len % alignment)) % alignment;
Uint8 *retval = NULL;
Uint8 *ptr;
size_t to_allocate;
/* alignment + padding + sizeof (void *) is bounded (a few hundred
* bytes max), so no need to check for overflow within that argument */
if (SDL_size_add_overflow(len, alignment + padding + sizeof(void *), &to_allocate)) {
return NULL;
}
ptr = (Uint8 *)SDL_malloc(to_allocate);
if (ptr) {
/* store the actual allocated pointer right before our aligned pointer. */
retval = ptr + sizeof(void *);
retval += alignment - (((size_t)retval) % alignment);
*(((void **)retval) - 1) = ptr;
}
return retval;
}
void *
SDL_SIMDRealloc(void *mem, const size_t len)
{
const size_t alignment = SDL_SIMDGetAlignment();
const size_t padding = (alignment - (len % alignment)) % alignment;
Uint8 *retval = (Uint8 *)mem;
void *oldmem = mem;
size_t memdiff = 0, ptrdiff;
Uint8 *ptr;
size_t to_allocate;
/* alignment + padding + sizeof (void *) is bounded (a few hundred
* bytes max), so no need to check for overflow within that argument */
if (SDL_size_add_overflow(len, alignment + padding + sizeof(void *), &to_allocate)) {
return NULL;
}
if (mem) {
mem = *(((void **)mem) - 1);
/* Check the delta between the real pointer and user pointer */
memdiff = ((size_t)oldmem) - ((size_t)mem);
}
ptr = (Uint8 *)SDL_realloc(mem, to_allocate);
if (ptr == NULL) {
return NULL; /* Out of memory, bail! */
}
/* Store the actual allocated pointer right before our aligned pointer. */
retval = ptr + sizeof(void *);
retval += alignment - (((size_t)retval) % alignment);
/* Make sure the delta is the same! */
if (mem) {
ptrdiff = ((size_t)retval) - ((size_t)ptr);
if (memdiff != ptrdiff) { /* Delta has changed, copy to new offset! */
oldmem = (void *)(((uintptr_t)ptr) + memdiff);
/* Even though the data past the old `len` is undefined, this is the
* only length value we have, and it guarantees that we copy all the
* previous memory anyhow.
*/
SDL_memmove(retval, oldmem, len);
}
}
/* Actually store the allocated pointer, finally. */
*(((void **)retval) - 1) = ptr;
return retval;
}
void SDL_SIMDFree(void *ptr)
{
if (ptr) {
SDL_free(*(((void **)ptr) - 1));
}
}
#ifdef TEST_MAIN
#include <stdio.h>

View file

@ -555,10 +555,7 @@ SDL3_0.0.0 {
SDL_RumbleJoystick;
SDL_RumbleJoystickTriggers;
SDL_RunApp;
SDL_SIMDAlloc;
SDL_SIMDFree;
SDL_SIMDGetAlignment;
SDL_SIMDRealloc;
SDL_SaveBMP_RW;
SDL_ScreenKeyboardShown;
SDL_ScreenSaverEnabled;
@ -843,6 +840,8 @@ SDL3_0.0.0 {
SDL_modff;
SDL_GetRenderVSync;
SDL_PlayAudioDevice;
SDL_aligned_alloc;
SDL_aligned_free;
# extra symbols go here (don't modify this line)
local: *;
};

View file

@ -581,10 +581,7 @@
#define SDL_RumbleJoystick SDL_RumbleJoystick_REAL
#define SDL_RumbleJoystickTriggers SDL_RumbleJoystickTriggers_REAL
#define SDL_RunApp SDL_RunApp_REAL
#define SDL_SIMDAlloc SDL_SIMDAlloc_REAL
#define SDL_SIMDFree SDL_SIMDFree_REAL
#define SDL_SIMDGetAlignment SDL_SIMDGetAlignment_REAL
#define SDL_SIMDRealloc SDL_SIMDRealloc_REAL
#define SDL_SaveBMP_RW SDL_SaveBMP_RW_REAL
#define SDL_ScreenKeyboardShown SDL_ScreenKeyboardShown_REAL
#define SDL_ScreenSaverEnabled SDL_ScreenSaverEnabled_REAL
@ -871,3 +868,5 @@
#define SDL_modff SDL_modff_REAL
#define SDL_GetRenderVSync SDL_GetRenderVSync_REAL
#define SDL_PlayAudioDevice SDL_PlayAudioDevice_REAL
#define SDL_aligned_alloc SDL_aligned_alloc_REAL
#define SDL_aligned_free SDL_aligned_free_REAL

View file

@ -637,10 +637,7 @@ SDL_DYNAPI_PROC(int,SDL_RumbleGamepadTriggers,(SDL_Gamepad *a, Uint16 b, Uint16
SDL_DYNAPI_PROC(int,SDL_RumbleJoystick,(SDL_Joystick *a, Uint16 b, Uint16 c, Uint32 d),(a,b,c,d),return)
SDL_DYNAPI_PROC(int,SDL_RumbleJoystickTriggers,(SDL_Joystick *a, Uint16 b, Uint16 c, Uint32 d),(a,b,c,d),return)
SDL_DYNAPI_PROC(int,SDL_RunApp,(int a, char *b[], SDL_main_func c, void *d),(a,b,c,d),return)
SDL_DYNAPI_PROC(void*,SDL_SIMDAlloc,(const size_t a),(a),return)
SDL_DYNAPI_PROC(void,SDL_SIMDFree,(void *a),(a),)
SDL_DYNAPI_PROC(size_t,SDL_SIMDGetAlignment,(void),(),return)
SDL_DYNAPI_PROC(void*,SDL_SIMDRealloc,(void *a, const size_t b),(a,b),return)
SDL_DYNAPI_PROC(int,SDL_SaveBMP_RW,(SDL_Surface *a, SDL_RWops *b, int c),(a,b,c),return)
SDL_DYNAPI_PROC(SDL_bool,SDL_ScreenKeyboardShown,(SDL_Window *a),(a),return)
SDL_DYNAPI_PROC(SDL_bool,SDL_ScreenSaverEnabled,(void),(),return)
@ -916,3 +913,5 @@ SDL_DYNAPI_PROC(double,SDL_modf,(double a, double *b),(a,b),return)
SDL_DYNAPI_PROC(float,SDL_modff,(float a, float *b),(a,b),return)
SDL_DYNAPI_PROC(int,SDL_GetRenderVSync,(SDL_Renderer *a, int *b),(a,b),return)
SDL_DYNAPI_PROC(void,SDL_PlayAudioDevice,(SDL_AudioDeviceID a),(a),)
SDL_DYNAPI_PROC(void*,SDL_aligned_alloc,(size_t a, size_t b),(a,b),return)
SDL_DYNAPI_PROC(void,SDL_aligned_free,(void *a),(a),)

View file

@ -63,7 +63,7 @@ SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
SDL_OutOfMemory();
return NULL;
}
swdata->pixels = (Uint8 *)SDL_SIMDAlloc(dst_size);
swdata->pixels = (Uint8 *)SDL_aligned_alloc(SDL_SIMDGetAlignment(), dst_size);
if (!swdata->pixels) {
SDL_SW_DestroyYUVTexture(swdata);
SDL_OutOfMemory();
@ -394,7 +394,7 @@ int SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture *swdata, const SDL_Rect *srcrect,
void SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture *swdata)
{
if (swdata) {
SDL_SIMDFree(swdata->pixels);
SDL_aligned_free(swdata->pixels);
SDL_DestroySurface(swdata->stretch);
SDL_DestroySurface(swdata->display);
SDL_free(swdata);

View file

@ -686,3 +686,40 @@ int SDL_isblank(int x)
return ((x) == ' ') || ((x) == '\t');
}
#endif
void *SDL_aligned_alloc(size_t alignment, size_t size)
{
size_t padding;
Uint8 *retval = NULL;
if (alignment < sizeof(void*)) {
alignment = sizeof(void*);
}
padding = (alignment - (size % alignment));
if (SDL_size_add_overflow(size, alignment, &size) == 0 &&
SDL_size_add_overflow(size, sizeof(void *), &size) == 0 &&
SDL_size_add_overflow(size, padding, &size) == 0) {
void *original = SDL_malloc(size);
if (original) {
/* Make sure we have enough space to store the original pointer */
retval = (Uint8 *)original + sizeof(original);
/* Align the pointer we're going to return */
retval += alignment - (((size_t)retval) % alignment);
/* Store the original pointer right before the returned value */
SDL_memcpy(retval - sizeof(original), &original, sizeof(original));
}
}
return retval;
}
void SDL_aligned_free(void *mem)
{
if (mem) {
void *original;
SDL_memcpy(&original, ((Uint8 *)mem - sizeof(original)), sizeof(original));
SDL_free(original);
}
}

View file

@ -1215,7 +1215,7 @@ static int RLEAlphaSurface(SDL_Surface *surface)
/* Now that we have it encoded, release the original pixels */
if (!(surface->flags & SDL_PREALLOC)) {
if (surface->flags & SDL_SIMD_ALIGNED) {
SDL_SIMDFree(surface->pixels);
SDL_aligned_free(surface->pixels);
surface->flags &= ~SDL_SIMD_ALIGNED;
} else {
SDL_free(surface->pixels);
@ -1382,7 +1382,7 @@ static int RLEColorkeySurface(SDL_Surface *surface)
/* Now that we have it encoded, release the original pixels */
if (!(surface->flags & SDL_PREALLOC)) {
if (surface->flags & SDL_SIMD_ALIGNED) {
SDL_SIMDFree(surface->pixels);
SDL_aligned_free(surface->pixels);
surface->flags &= ~SDL_SIMD_ALIGNED;
} else {
SDL_free(surface->pixels);
@ -1482,6 +1482,7 @@ static SDL_bool UnRLEAlpha(SDL_Surface *surface)
RLEDestFormat *, SDL_PixelFormat *);
int w = surface->w;
int bpp = df->BytesPerPixel;
size_t size;
if (bpp == 2) {
uncopy_opaque = uncopy_opaque_16;
@ -1490,7 +1491,11 @@ static SDL_bool UnRLEAlpha(SDL_Surface *surface)
uncopy_opaque = uncopy_transl = uncopy_32;
}
surface->pixels = SDL_SIMDAlloc((size_t)surface->h * surface->pitch);
if (SDL_size_mul_overflow(surface->h, surface->pitch, &size)) {
return SDL_FALSE;
}
surface->pixels = SDL_aligned_alloc(SDL_SIMDGetAlignment(), size);
if (surface->pixels == NULL) {
return SDL_FALSE;
}
@ -1554,9 +1559,15 @@ void SDL_UnRLESurface(SDL_Surface *surface, int recode)
if (recode && !(surface->flags & SDL_PREALLOC)) {
if (surface->map->info.flags & SDL_COPY_RLE_COLORKEY) {
SDL_Rect full;
size_t size;
/* re-create the original surface */
surface->pixels = SDL_SIMDAlloc((size_t)surface->h * surface->pitch);
if (SDL_size_mul_overflow(surface->h, surface->pitch, &size)) {
/* Memory corruption? */
surface->flags |= SDL_RLEACCEL;
return;
}
surface->pixels = SDL_aligned_alloc(SDL_SIMDGetAlignment(), size);
if (surface->pixels == NULL) {
/* Oh crap... */
surface->flags |= SDL_RLEACCEL;

View file

@ -165,7 +165,7 @@ SDL_CreateSurface(int width, int height, Uint32 format)
}
}
surface->pixels = SDL_SIMDAlloc(size);
surface->pixels = SDL_aligned_alloc(SDL_SIMDGetAlignment(), size);
if (!surface->pixels) {
SDL_DestroySurface(surface);
SDL_OutOfMemory();
@ -1553,7 +1553,7 @@ void SDL_DestroySurface(SDL_Surface *surface)
/* Don't free */
} else if (surface->flags & SDL_SIMD_ALIGNED) {
/* Free aligned */
SDL_SIMDFree(surface->pixels);
SDL_aligned_free(surface->pixels);
} else {
/* Normal */
SDL_free(surface->pixels);

View file

@ -444,6 +444,32 @@ int stdlib_sscanf(void *arg)
#define SIZE_FORMAT "zu"
#endif
/**
* @brief Call to SDL_aligned_alloc
*/
int stdlib_aligned_alloc(void *arg)
{
size_t i, alignment;
void *ptr;
for (i = 0; i < 2*sizeof(void *); ++i) {
SDLTest_AssertPass("Call to SDL_aligned_alloc(%"SIZE_FORMAT")", i);
ptr = SDL_aligned_alloc(i, 1);
if (i < sizeof(void *)) {
alignment = sizeof(void *);
} else {
alignment = i;
}
SDLTest_AssertCheck(ptr != NULL, "Check output, expected non-NULL, got: %p", ptr);
SDLTest_AssertCheck((((size_t)ptr) % alignment) == 0, "Check output, expected aligned pointer, actual offset: %"SIZE_FORMAT, (((size_t)ptr) % alignment));
SDLTest_AssertPass("Filling memory to alignment value");
SDL_memset(ptr, 0xAA, alignment);
SDL_aligned_free(ptr);
}
return TEST_COMPLETED;
}
typedef struct
{
size_t a;
@ -600,19 +626,23 @@ stdlib_overflow(void *arg)
/* Standard C routine test cases */
static const SDLTest_TestCaseReference stdlibTest1 = {
(SDLTest_TestCaseFp)stdlib_strlcpy, "stdlib_strlcpy", "Call to SDL_strlcpy", TEST_ENABLED
stdlib_strlcpy, "stdlib_strlcpy", "Call to SDL_strlcpy", TEST_ENABLED
};
static const SDLTest_TestCaseReference stdlibTest2 = {
(SDLTest_TestCaseFp)stdlib_snprintf, "stdlib_snprintf", "Call to SDL_snprintf", TEST_ENABLED
stdlib_snprintf, "stdlib_snprintf", "Call to SDL_snprintf", TEST_ENABLED
};
static const SDLTest_TestCaseReference stdlibTest3 = {
(SDLTest_TestCaseFp)stdlib_getsetenv, "stdlib_getsetenv", "Call to SDL_getenv and SDL_setenv", TEST_ENABLED
stdlib_getsetenv, "stdlib_getsetenv", "Call to SDL_getenv and SDL_setenv", TEST_ENABLED
};
static const SDLTest_TestCaseReference stdlibTest4 = {
(SDLTest_TestCaseFp)stdlib_sscanf, "stdlib_sscanf", "Call to SDL_sscanf", TEST_ENABLED
stdlib_sscanf, "stdlib_sscanf", "Call to SDL_sscanf", TEST_ENABLED
};
static const SDLTest_TestCaseReference stdlibTest5 = {
stdlib_aligned_alloc, "stdlib_aligned_alloc", "Call to SDL_aligned_alloc", TEST_ENABLED
};
static const SDLTest_TestCaseReference stdlibTestOverflow = {
@ -625,6 +655,7 @@ static const SDLTest_TestCaseReference *stdlibTests[] = {
&stdlibTest2,
&stdlibTest3,
&stdlibTest4,
&stdlibTest5,
&stdlibTestOverflow,
NULL
};