blob: 19674ca2acfcec658089d64f4ea89fbf48dcc096 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Optmized version of the ip_fast_csum() function
3 * Used for calculating IP header checksum
4 *
5 * Return: 16bit checksum, complemented
6 *
7 * Inputs:
8 * in0: address of buffer to checksum (char *)
9 * in1: length of the buffer (int)
10 *
11 * Copyright (C) 2002 Intel Corp.
12 * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
13 */
14
15#include <asm/asmmacro.h>
16
17/*
18 * Since we know that most likely this function is called with buf aligned
19 * on 4-byte boundary and 20 bytes in length, we can execution rather quickly
20 * versus calling generic version of do_csum, which has lots of overhead in
21 * handling various alignments and sizes. However, due to lack of constrains
22 * put on the function input argument, cases with alignment not on 4-byte or
23 * size not equal to 20 bytes will be handled by the generic do_csum function.
24 */
25
26#define in0 r32
27#define in1 r33
28#define ret0 r8
29
30GLOBAL_ENTRY(ip_fast_csum)
31 .prologue
32 .body
33 cmp.ne p6,p7=5,in1 // size other than 20 byte?
34 and r14=3,in0 // is it aligned on 4-byte?
35 add r15=4,in0 // second source pointer
36 ;;
37 cmp.ne.or.andcm p6,p7=r14,r0
38 ;;
39(p7) ld4 r20=[in0],8
40(p7) ld4 r21=[r15],8
41(p6) br.spnt .generic
42 ;;
43 ld4 r22=[in0],8
44 ld4 r23=[r15],8
45 ;;
46 ld4 r24=[in0]
47 add r20=r20,r21
48 add r22=r22,r23
49 ;;
50 add r20=r20,r22
51 ;;
52 add r20=r20,r24
53 ;;
54 shr.u ret0=r20,16 // now need to add the carry
55 zxt2 r20=r20
56 ;;
57 add r20=ret0,r20
58 ;;
59 shr.u ret0=r20,16 // add carry again
60 zxt2 r20=r20
61 ;;
62 add r20=ret0,r20
63 ;;
64 shr.u ret0=r20,16
65 zxt2 r20=r20
66 ;;
67 add r20=ret0,r20
68 ;;
69 andcm ret0=-1,r20
70 .restore sp // reset frame state
71 br.ret.sptk.many b0
72 ;;
73
74.generic:
75 .prologue
76 .save ar.pfs, r35
77 alloc r35=ar.pfs,2,2,2,0
78 .save rp, r34
79 mov r34=b0
80 .body
81 dep.z out1=in1,2,30
82 mov out0=in0
83 ;;
84 br.call.sptk.many b0=do_csum
85 ;;
86 andcm ret0=-1,ret0
87 mov ar.pfs=r35
88 mov b0=r34
89 br.ret.sptk.many b0
90END(ip_fast_csum)