https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102125
--- Comment #5 from Richard Earnshaw <rearnsha at gcc dot gnu.org> --- Testcase was not quite complete. Extending it to: typedef unsigned long long uint64_t; typedef unsigned long uint32_t; typedef unsigned char uint8_t; uint64_t bar64(const uint8_t *rData1) { uint64_t buffer; __builtin_memcpy(&buffer, rData1, sizeof(buffer)); return buffer; } uint32_t bar32(const uint8_t *rData1) { uint32_t buffer; __builtin_memcpy(&buffer, rData1, sizeof(buffer)); return buffer; } and then looking at the optimized tree output we see: ;; Function bar64 (bar64, funcdef_no=0, decl_uid=4196, cgraph_uid=1, symbol_order=0) uint64_t bar64 (const uint8_t * rData1) { uint64_t buffer; uint64_t _4; <bb 2> [local count: 1073741824]: __builtin_memcpy (&buffer, rData1_2(D), 8); _4 = buffer; buffer ={v} {CLOBBER}; return _4; } ;; Function bar32 (bar32, funcdef_no=1, decl_uid=4200, cgraph_uid=2, symbol_order=1) uint32_t bar32 (const uint8_t * rData1) { unsigned int _3; <bb 2> [local count: 1073741824]: _3 = MEM <unsigned int> [(char * {ref-all})rData1_2(D)]; return _3; } So in the 32-bit case we've eliminated the memcpy at the tree level, but failed to do that for 64-bit objects. We probably need to add 64-bit support to the movmisalign<mode> pattern.