Prevent Clang from emitting atomic libcalls Clang expects 8-byte alignment for some 64-bit atomic operations in some 32-bit targets. Native instruction lock cmpxchg8b (for x86) should only require 4-byte alignment. This patch tries to add 8-byte alignents to data needing atomic ops which helps clang to not generate the libatomic calls but emit builtins directly. Upstream-Status: Submitted[https://jira.mariadb.org/browse/MDEV-28162] Signed-off-by: Khem Raj --- a/include/my_atomic.h +++ b/include/my_atomic.h @@ -115,6 +115,16 @@ #include "atomic/gcc_builtins.h" #endif +#include + +# ifdef __GNUC__ +typedef __attribute__((__aligned__(8))) int64 ATOMIC_I64; +typedef __attribute__((__aligned__(8))) uint64 ATOMIC_U64; +# else +typedef int64 ATOMIC_I64; +typedef uint64 ATOMIC_U64; +# endif + #if SIZEOF_LONG == 4 #define my_atomic_addlong(A,B) my_atomic_add32((int32*) (A), (B)) #define my_atomic_loadlong(A) my_atomic_load32((int32*) (A)) @@ -123,12 +133,12 @@ #define my_atomic_faslong(A,B) my_atomic_fas32((int32*) (A), (B)) #define my_atomic_caslong(A,B,C) my_atomic_cas32((int32*) (A), (int32*) (B), (C)) #else -#define my_atomic_addlong(A,B) my_atomic_add64((int64*) (A), (B)) -#define my_atomic_loadlong(A) my_atomic_load64((int64*) (A)) -#define my_atomic_loadlong_explicit(A,O) my_atomic_load64_explicit((int64*) (A), (O)) -#define my_atomic_storelong(A,B) my_atomic_store64((int64*) (A), (B)) -#define my_atomic_faslong(A,B) my_atomic_fas64((int64*) (A), (B)) -#define my_atomic_caslong(A,B,C) my_atomic_cas64((int64*) (A), (int64*) (B), (C)) +#define my_atomic_addlong(A,B) my_atomic_add64((ATOMIC_I64*) (A), (B)) +#define my_atomic_loadlong(A) my_atomic_load64((ATOMIC_I64*) (A)) +#define my_atomic_loadlong_explicit(A,O) my_atomic_load64_explicit((ATOMIC_I64*) (A), (O)) +#define my_atomic_storelong(A,B) my_atomic_store64((ATOMIC_I64*) (A), (B)) +#define my_atomic_faslong(A,B) my_atomic_fas64((ATOMIC_I64*) (A), (B)) +#define my_atomic_caslong(A,B,C) my_atomic_cas64((ATOMIC_I64*) (A), (ATOMIC_I64*) (B), (C)) #endif #ifndef MY_MEMORY_ORDER_SEQ_CST --- a/storage/perfschema/pfs_atomic.h +++ b/storage/perfschema/pfs_atomic.h @@ -41,7 +41,7 @@ public: } /** Atomic load. */ - static inline int64 load_64(int64 *ptr) + static inline int64 load_64(ATOMIC_I64 *ptr) { return my_atomic_load64(ptr); } @@ -53,9 +53,9 @@ public: } /** Atomic load. */ - static inline uint64 load_u64(uint64 *ptr) + static inline uint64 load_u64(ATOMIC_U64 *ptr) { - return (uint64) my_atomic_load64((int64*) ptr); + return (uint64) my_atomic_load64((ATOMIC_I64*) ptr); } /** Atomic store. */ @@ -65,7 +65,7 @@ public: } /** Atomic store. */ - static inline void store_64(int64 *ptr, int64 value) + static inline void store_64(ATOMIC_I64 *ptr, int64 value) { my_atomic_store64(ptr, value); } @@ -77,9 +77,9 @@ public: } /** Atomic store. */ - static inline void store_u64(uint64 *ptr, uint64 value) + static inline void store_u64(ATOMIC_U64 *ptr, uint64 value) { - my_atomic_store64((int64*) ptr, (int64) value); + my_atomic_store64((ATOMIC_I64*) ptr, (int64) value); } /** Atomic add. */ @@ -89,7 +89,7 @@ public: } /** Atomic add. */ - static inline int64 add_64(int64 *ptr, int64 value) + static inline int64 add_64(ATOMIC_I64 *ptr, int64 value) { return my_atomic_add64(ptr, value); } @@ -101,9 +101,9 @@ public: } /** Atomic add. */ - static inline uint64 add_u64(uint64 *ptr, uint64 value) + static inline uint64 add_u64(ATOMIC_U64 *ptr, uint64 value) { - return (uint64) my_atomic_add64((int64*) ptr, (int64) value); + return (uint64) my_atomic_add64((ATOMIC_I64*) ptr, (int64) value); } /** Atomic compare and swap. */ @@ -114,7 +114,7 @@ public: } /** Atomic compare and swap. */ - static inline bool cas_64(int64 *ptr, int64 *old_value, + static inline bool cas_64(ATOMIC_I64 *ptr, ATOMIC_I64 *old_value, int64 new_value) { return my_atomic_cas64(ptr, old_value, new_value); @@ -129,10 +129,10 @@ public: } /** Atomic compare and swap. */ - static inline bool cas_u64(uint64 *ptr, uint64 *old_value, + static inline bool cas_u64(ATOMIC_U64 *ptr, ATOMIC_U64 *old_value, uint64 new_value) { - return my_atomic_cas64((int64*) ptr, (int64*) old_value, + return my_atomic_cas64((ATOMIC_I64*) ptr, (ATOMIC_I64*) old_value, (uint64) new_value); } }; --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1049,7 +1049,7 @@ static inline void update_global_memory_ (longlong) global_status_var.global_memory_used, size)); // workaround for gcc 4.2.4-1ubuntu4 -fPIE (from DEB_BUILD_HARDENING=1) - int64 volatile * volatile ptr= &global_status_var.global_memory_used; + ATOMIC_I64 volatile * volatile ptr= &global_status_var.global_memory_used; my_atomic_add64_explicit(ptr, size, MY_MEMORY_ORDER_RELAXED); } --- a/storage/innobase/include/srv0mon.h +++ b/storage/innobase/include/srv0mon.h @@ -49,7 +49,7 @@ enum monitor_running_status { typedef enum monitor_running_status monitor_running_t; /** Monitor counter value type */ -typedef int64_t mon_type_t; +typedef ATOMIC_I64 mon_type_t; /** Two monitor structures are defined in this file. One is "monitor_value_t" which contains dynamic counter values for each @@ -568,7 +568,7 @@ Use MONITOR_INC if appropriate mutex pro if (enabled) { \ ib_uint64_t value; \ value = my_atomic_add64_explicit( \ - (int64*) &MONITOR_VALUE(monitor), 1, \ + (ATOMIC_I64*) &MONITOR_VALUE(monitor), 1, \ MY_MEMORY_ORDER_RELAXED) + 1; \ /* Note: This is not 100% accurate because of the \ inherent race, we ignore it due to performance. */ \ @@ -585,7 +585,7 @@ Use MONITOR_DEC if appropriate mutex pro if (enabled) { \ ib_uint64_t value; \ value = my_atomic_add64_explicit( \ - (int64*) &MONITOR_VALUE(monitor), -1, \ + (ATOMIC_I64*) &MONITOR_VALUE(monitor), -1, \ MY_MEMORY_ORDER_RELAXED) - 1; \ /* Note: This is not 100% accurate because of the \ inherent race, we ignore it due to performance. */ \