| /* |
| * linux/arch/arm/lib/csumpartialcopygeneric.S |
| * |
| * Copyright (C) 1995-2001 Russell King |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License version 2 as |
| * published by the Free Software Foundation. |
| */ |
| #include <asm/assembler.h> |
| #include <asm/export.h> |
| |
| /* |
| * unsigned int |
| * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) |
| * r0 = src, r1 = dst, r2 = len, r3 = sum |
| * Returns : r0 = checksum |
| * |
| * Note that 'tst' and 'teq' preserve the carry flag. |
| */ |
| |
| src .req r0 |
| dst .req r1 |
| len .req r2 |
| sum .req r3 |
| |
| .Lzero: mov r0, sum |
| load_regs |
| |
| /* |
| * Align an unaligned destination pointer. We know that |
| * we have >= 8 bytes here, so we don't need to check |
| * the length. Note that the source pointer hasn't been |
| * aligned yet. |
| */ |
| .Ldst_unaligned: |
| tst dst, #1 |
| beq .Ldst_16bit |
| |
| load1b ip |
| sub len, len, #1 |
| adcs sum, sum, ip, put_byte_1 @ update checksum |
| strb ip, [dst], #1 |
| tst dst, #2 |
| reteq lr @ dst is now 32bit aligned |
| |
| .Ldst_16bit: load2b r8, ip |
| sub len, len, #2 |
| adcs sum, sum, r8, put_byte_0 |
| strb r8, [dst], #1 |
| adcs sum, sum, ip, put_byte_1 |
| strb ip, [dst], #1 |
| ret lr @ dst is now 32bit aligned |
| |
| /* |
| * Handle 0 to 7 bytes, with any alignment of source and |
| * destination pointers. Note that when we get here, C = 0 |
| */ |
| .Lless8: teq len, #0 @ check for zero count |
| beq .Lzero |
| |
| /* we must have at least one byte. */ |
| tst dst, #1 @ dst 16-bit aligned |
| beq .Lless8_aligned |
| |
| /* Align dst */ |
| load1b ip |
| sub len, len, #1 |
| adcs sum, sum, ip, put_byte_1 @ update checksum |
| strb ip, [dst], #1 |
| tst len, #6 |
| beq .Lless8_byteonly |
| |
| 1: load2b r8, ip |
| sub len, len, #2 |
| adcs sum, sum, r8, put_byte_0 |
| strb r8, [dst], #1 |
| adcs sum, sum, ip, put_byte_1 |
| strb ip, [dst], #1 |
| .Lless8_aligned: |
| tst len, #6 |
| bne 1b |
| .Lless8_byteonly: |
| tst len, #1 |
| beq .Ldone |
| load1b r8 |
| adcs sum, sum, r8, put_byte_0 @ update checksum |
| strb r8, [dst], #1 |
| b .Ldone |
| |
| FN_ENTRY |
| save_regs |
| |
| cmp len, #8 @ Ensure that we have at least |
| blo .Lless8 @ 8 bytes to copy. |
| |
| adds sum, sum, #0 @ C = 0 |
| tst dst, #3 @ Test destination alignment |
| blne .Ldst_unaligned @ align destination, return here |
| |
| /* |
| * Ok, the dst pointer is now 32bit aligned, and we know |
| * that we must have more than 4 bytes to copy. Note |
| * that C contains the carry from the dst alignment above. |
| */ |
| |
| tst src, #3 @ Test source alignment |
| bne .Lsrc_not_aligned |
| |
| /* Routine for src & dst aligned */ |
| |
| bics ip, len, #15 |
| beq 2f |
| |
| 1: load4l r4, r5, r6, r7 |
| stmia dst!, {r4, r5, r6, r7} |
| adcs sum, sum, r4 |
| adcs sum, sum, r5 |
| adcs sum, sum, r6 |
| adcs sum, sum, r7 |
| sub ip, ip, #16 |
| teq ip, #0 |
| bne 1b |
| |
| 2: ands ip, len, #12 |
| beq 4f |
| tst ip, #8 |
| beq 3f |
| load2l r4, r5 |
| stmia dst!, {r4, r5} |
| adcs sum, sum, r4 |
| adcs sum, sum, r5 |
| tst ip, #4 |
| beq 4f |
| |
| 3: load1l r4 |
| str r4, [dst], #4 |
| adcs sum, sum, r4 |
| |
| 4: ands len, len, #3 |
| beq .Ldone |
| load1l r4 |
| tst len, #2 |
| mov r5, r4, get_byte_0 |
| beq .Lexit |
| adcs sum, sum, r4, lspush #16 |
| strb r5, [dst], #1 |
| mov r5, r4, get_byte_1 |
| strb r5, [dst], #1 |
| mov r5, r4, get_byte_2 |
| .Lexit: tst len, #1 |
| strneb r5, [dst], #1 |
| andne r5, r5, #255 |
| adcnes sum, sum, r5, put_byte_0 |
| |
| /* |
| * If the dst pointer was not 16-bit aligned, we |
| * need to rotate the checksum here to get around |
| * the inefficient byte manipulations in the |
| * architecture independent code. |
| */ |
| .Ldone: adc r0, sum, #0 |
| ldr sum, [sp, #0] @ dst |
| tst sum, #1 |
| movne r0, r0, ror #8 |
| load_regs |
| |
| .Lsrc_not_aligned: |
| adc sum, sum, #0 @ include C from dst alignment |
| and ip, src, #3 |
| bic src, src, #3 |
| load1l r5 |
| cmp ip, #2 |
| beq .Lsrc2_aligned |
| bhi .Lsrc3_aligned |
| mov r4, r5, lspull #8 @ C = 0 |
| bics ip, len, #15 |
| beq 2f |
| 1: load4l r5, r6, r7, r8 |
| orr r4, r4, r5, lspush #24 |
| mov r5, r5, lspull #8 |
| orr r5, r5, r6, lspush #24 |
| mov r6, r6, lspull #8 |
| orr r6, r6, r7, lspush #24 |
| mov r7, r7, lspull #8 |
| orr r7, r7, r8, lspush #24 |
| stmia dst!, {r4, r5, r6, r7} |
| adcs sum, sum, r4 |
| adcs sum, sum, r5 |
| adcs sum, sum, r6 |
| adcs sum, sum, r7 |
| mov r4, r8, lspull #8 |
| sub ip, ip, #16 |
| teq ip, #0 |
| bne 1b |
| 2: ands ip, len, #12 |
| beq 4f |
| tst ip, #8 |
| beq 3f |
| load2l r5, r6 |
| orr r4, r4, r5, lspush #24 |
| mov r5, r5, lspull #8 |
| orr r5, r5, r6, lspush #24 |
| stmia dst!, {r4, r5} |
| adcs sum, sum, r4 |
| adcs sum, sum, r5 |
| mov r4, r6, lspull #8 |
| tst ip, #4 |
| beq 4f |
| 3: load1l r5 |
| orr r4, r4, r5, lspush #24 |
| str r4, [dst], #4 |
| adcs sum, sum, r4 |
| mov r4, r5, lspull #8 |
| 4: ands len, len, #3 |
| beq .Ldone |
| mov r5, r4, get_byte_0 |
| tst len, #2 |
| beq .Lexit |
| adcs sum, sum, r4, lspush #16 |
| strb r5, [dst], #1 |
| mov r5, r4, get_byte_1 |
| strb r5, [dst], #1 |
| mov r5, r4, get_byte_2 |
| b .Lexit |
| |
| .Lsrc2_aligned: mov r4, r5, lspull #16 |
| adds sum, sum, #0 |
| bics ip, len, #15 |
| beq 2f |
| 1: load4l r5, r6, r7, r8 |
| orr r4, r4, r5, lspush #16 |
| mov r5, r5, lspull #16 |
| orr r5, r5, r6, lspush #16 |
| mov r6, r6, lspull #16 |
| orr r6, r6, r7, lspush #16 |
| mov r7, r7, lspull #16 |
| orr r7, r7, r8, lspush #16 |
| stmia dst!, {r4, r5, r6, r7} |
| adcs sum, sum, r4 |
| adcs sum, sum, r5 |
| adcs sum, sum, r6 |
| adcs sum, sum, r7 |
| mov r4, r8, lspull #16 |
| sub ip, ip, #16 |
| teq ip, #0 |
| bne 1b |
| 2: ands ip, len, #12 |
| beq 4f |
| tst ip, #8 |
| beq 3f |
| load2l r5, r6 |
| orr r4, r4, r5, lspush #16 |
| mov r5, r5, lspull #16 |
| orr r5, r5, r6, lspush #16 |
| stmia dst!, {r4, r5} |
| adcs sum, sum, r4 |
| adcs sum, sum, r5 |
| mov r4, r6, lspull #16 |
| tst ip, #4 |
| beq 4f |
| 3: load1l r5 |
| orr r4, r4, r5, lspush #16 |
| str r4, [dst], #4 |
| adcs sum, sum, r4 |
| mov r4, r5, lspull #16 |
| 4: ands len, len, #3 |
| beq .Ldone |
| mov r5, r4, get_byte_0 |
| tst len, #2 |
| beq .Lexit |
| adcs sum, sum, r4 |
| strb r5, [dst], #1 |
| mov r5, r4, get_byte_1 |
| strb r5, [dst], #1 |
| tst len, #1 |
| beq .Ldone |
| load1b r5 |
| b .Lexit |
| |
| .Lsrc3_aligned: mov r4, r5, lspull #24 |
| adds sum, sum, #0 |
| bics ip, len, #15 |
| beq 2f |
| 1: load4l r5, r6, r7, r8 |
| orr r4, r4, r5, lspush #8 |
| mov r5, r5, lspull #24 |
| orr r5, r5, r6, lspush #8 |
| mov r6, r6, lspull #24 |
| orr r6, r6, r7, lspush #8 |
| mov r7, r7, lspull #24 |
| orr r7, r7, r8, lspush #8 |
| stmia dst!, {r4, r5, r6, r7} |
| adcs sum, sum, r4 |
| adcs sum, sum, r5 |
| adcs sum, sum, r6 |
| adcs sum, sum, r7 |
| mov r4, r8, lspull #24 |
| sub ip, ip, #16 |
| teq ip, #0 |
| bne 1b |
| 2: ands ip, len, #12 |
| beq 4f |
| tst ip, #8 |
| beq 3f |
| load2l r5, r6 |
| orr r4, r4, r5, lspush #8 |
| mov r5, r5, lspull #24 |
| orr r5, r5, r6, lspush #8 |
| stmia dst!, {r4, r5} |
| adcs sum, sum, r4 |
| adcs sum, sum, r5 |
| mov r4, r6, lspull #24 |
| tst ip, #4 |
| beq 4f |
| 3: load1l r5 |
| orr r4, r4, r5, lspush #8 |
| str r4, [dst], #4 |
| adcs sum, sum, r4 |
| mov r4, r5, lspull #24 |
| 4: ands len, len, #3 |
| beq .Ldone |
| mov r5, r4, get_byte_0 |
| tst len, #2 |
| beq .Lexit |
| strb r5, [dst], #1 |
| adcs sum, sum, r4 |
| load1l r4 |
| mov r5, r4, get_byte_0 |
| strb r5, [dst], #1 |
| adcs sum, sum, r4, lspush #24 |
| mov r5, r4, get_byte_1 |
| b .Lexit |
| FN_EXIT |
| FN_EXPORT |