# ------------------------------------------------------------------------- .arch armv7a .text .align 4 # ------------------------------------------------------------------------- .global convert_asm_neon .type convert_asm_neon, %function # ------------------------------------------------------------------------- # ------------------------------------------------------------------------- convert_asm_neon: # ------------------------------------------------------------------------- # C prototype: # # void convert_asm_neon (char * dest, char *src, int n); # # ------------------------------------------------------------------------- # r0 = dest # r1 = src # r2 = cnt # ------------------ # used neon regs: # ------------------ # d0,d1,d2 = rgb # d3 = rfac # d4 = gfac # d5 = bfac # q3(d6,d7) = temp # # must safe: d8-d15 / q4-q7 # # ------------------------------------------------------------------------- # note: iters multiple of 8, never less than 8. # ------------------------------------------------------------------------- push {r4-r5,lr} lsr r2, r2, #3 mov r3, #77 mov r4, #151 mov r5, #28 vdup.8 d3, r3 vdup.8 d4, r4 vdup.8 d5, r5 .loop: vld3.8 {d0-d2}, [r1]! vmull.u8 q3, d0, d3 vmlal.u8 q3, d1, d4 vmlal.u8 q3, d2, d5 vshrn.u16 d6, q3, #8 vst1.8 {d6}, [r0]! subs r2, r2, #1 bne .loop pop { r4-r5, pc }