Skip to content

[AArch64] Suboptimal memcpy/memmove/memset for non-power of two sizes #165625

@Kmeakin

Description

@Kmeakin

memcpy

https://godbolt.org/z/Yfq56nfTP

LLVM assembly:

copy65:
        ldr     q0, [x1]
        str     q0, [x0]
        ldp     q2, q0, [x1, #32]
        ldrb    w8, [x1, #64]
        ldr     q1, [x1, #16]
        strb    w8, [x0, #64]
        stp     q2, q0, [x0, #32]
        str     q1, [x0, #16]
        ret

GCC assembly:

copy65:
        ldp     q29, q28, [x1]
        ldp     q31, q30, [x1, 32]
        ldrb    w1, [x1, 64]
        stp     q29, q28, [x0]
        stp     q31, q30, [x0, 32]
        strb    w1, [x0, 64]
        ret

memmove

https://godbolt.org/z/TafacnzY3

LLVM assembly:

move7:
        ldrb    w8, [x1, #6]
        ldrh    w9, [x1, #4]
        ldr     w10, [x1]
        strb    w8, [x0, #6]
        strh    w9, [x0, #4]
        str     w10, [x0]
        ret

GCC assembly:

move7:
        ldr     w2, [x1]
        ldr     w1, [x1, 3]
        str     w2, [x0]
        str     w1, [x0, 3]
        ret

memset

https://godbolt.org/z/n3qq3EYrT

LLVM assembly:

set4:
        mov     w8, #16843009
        and     w9, w1, #0xff
        mul     w8, w9, w8
        str     w8, [x0]
        ret

GCC assembly:

set4:
        dup     v31.16b, w1
        str     s31, [x0]
        ret

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions