move new files out from platform support patch
[openwrt/staging/yousong.git] / target / linux / ubicom32 / files / arch / ubicom32 / lib / checksum.c
1 /*
2 * arch/ubicom32/lib/checksum.c
3 * Optimized checksum utilities for IP.
4 *
5 * (C) Copyright 2009, Ubicom, Inc.
6 *
7 * This file is part of the Ubicom32 Linux Kernel Port.
8 *
9 * The Ubicom32 Linux Kernel Port is free software: you can redistribute
10 * it and/or modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation, either version 2 of the
12 * License, or (at your option) any later version.
13 *
14 * The Ubicom32 Linux Kernel Port is distributed in the hope that it
15 * will be useful, but WITHOUT ANY WARRANTY; without even the implied
16 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
17 * the GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with the Ubicom32 Linux Kernel Port. If not,
21 * see <http://www.gnu.org/licenses/>.
22 *
23 * Ubicom32 implementation derived from (with many thanks):
24 * arch/m68knommu
25 * arch/blackfin
26 * arch/parisc
27 */
28 /*
29 * INET An implementation of the TCP/IP protocol suite for the LINUX
30 * operating system. INET is implemented using the BSD Socket
31 * interface as the means of communication with the user level.
32 *
33 * IP/TCP/UDP checksumming routines
34 *
35 * Authors: Jorge Cwik, <jorge@laser.satlink.net>
36 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
37 * Tom May, <ftom@netcom.com>
38 * Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de>
39 * Lots of code moved from tcp.c and ip.c; see those files
40 * for more names.
41 *
42 * 03/02/96 Jes Sorensen, Andreas Schwab, Roman Hodek:
43 * Fixed some nasty bugs, causing some horrible crashes.
44 * A: At some points, the sum (%0) was used as
45 * length-counter instead of the length counter
46 * (%1). Thanks to Roman Hodek for pointing this out.
47 * B: GCC seems to mess up if one uses too many
48 * data-registers to hold input values and one tries to
49 * specify d0 and d1 as scratch registers. Letting gcc choose these
50 * registers itself solves the problem.
51 *
52 * This program is free software; you can redistribute it and/or
53 * modify it under the terms of the GNU General Public License
54 * as published by the Free Software Foundation; either version
55 * 2 of the License, or (at your option) any later version.
56 */
57
58 /* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access kills, so most
59 of the assembly has to go. */
60
61 #include <linux/module.h>
62 #include <net/checksum.h>
63
64 static unsigned long do_csum(const unsigned char * buff, int len)
65 {
66 int count;
67 unsigned long result = 0;
68
69 /*
70 * The following optimized assembly code cannot handle data length less than 7 bytes!
71 */
72 if (likely(len >= 7)) {
73 len -= (4 - (int)buff) & 3;
74 count = len >> 2;
75 asm (
76 " sub.4 d15, #0, %2 \n\t" // set up for jump table
77 " and.4 d15, #(32-1), d15 \n\t" // d15 = (-m) & (32 - 1)
78
79 " bfextu d14, %0, #2 \n\t" // test 2 LSB of buff
80 " jmpne.w.f 100f \n\t"
81 " add.4 %1, #0, %1 \n\t" // clear C
82 " moveai a3, #%%hi(1f) \n\t" // table jump
83 " lea.1 a3, %%lo(1f)(a3) \n\t"
84 " lea.4 a3, (a3,d15) \n\t"
85 " calli a3, 0(a3) \n\t"
86
87 "100: sub.4 %0, %0, d14 \n\t"
88 " sub.4 d14, #4, d14 \n\t"
89 " lsl.4 d14, d14, #3 \n\t"
90 " add.4 %1, #0, %1 \n\t" // clear C
91 " moveai a3, #%%hi(1f) \n\t" // table jump
92 " lea.1 a3, %%lo(1f)(a3) \n\t"
93 " lea.4 a3, (a3,d15) \n\t"
94 " bfextu %1, (%0)4++, d14 \n\t" // read first partial word
95 " calli a3, 0(a3) \n\t"
96 #if 1
97 "200: lsl.4 %3, %3, #3 \n\t"
98 " bfrvrs d15, (%0), #0 \n\t" // read last word (partial)
99 " bfextu d15, d15, %3 \n\t"
100 " bfrvrs d15, d15, #0 \n\t"
101 " add.4 %1, d15, %1 \n\t"
102 " addc %1, #0, %1 \n\t" // sample C again
103 " jmpt.w.t 2f \n\t"
104 #else
105 "200: move.1 d15, 0(%0) \n\t"
106 " lsl.4 d15, d15, #8 \n\t"
107 " add.4 %1, d15, %1 \n\t"
108 " addc %1, #0, %1 \n\t" // sample C again
109 " add.4 %3, #-1, %3 \n\t"
110 " jmpeq.w.t 2f \n\t"
111
112 " move.1 d15, 1(%0) \n\t"
113 " add.4 %1, d15, %1 \n\t"
114 " addc %1, #0, %1 \n\t" // sample C again
115 " add.4 %3, #-1, %3 \n\t"
116 " jmpeq.w.t 2f \n\t"
117
118 " move.1 d15, 2(%0) \n\t"
119 " lsl.4 d15, d15, #8 \n\t"
120 " add.4 %1, d15, %1 \n\t"
121 " addc %1, #0, %1 \n\t" // sample C again
122 " jmpt.w.t 2f \n\t"
123 #endif
124 #if defined(IP7000) || defined(IP7000_REV2)
125 "300: swapb.2 %1, %1 \n\t"
126 #else
127 "300: shmrg.2 %1, %1, %1 \n\t"
128 " lsr.4 %1, %1, #8 \n\t"
129 " bfextu %1, %1, #16 \n\t"
130 #endif
131 " jmpt.w.t 3f \n\t"
132
133 "1: add.4 %1, (%0)4++, %1 \n\t" // first add without C
134 " .rept 31 \n\t"
135 " addc %1, (%0)4++, %1 \n\t"
136 " .endr \n\t"
137 " addc %1, #0, %1 \n\t" // sample C again
138 " add.4 %2, #-32, %2 \n\t"
139 " jmpgt.w.t 1b \n\t"
140
141 " and.4 %3, #3, %3 \n\t" // check n
142 " jmpne.w.f 200b \n\t"
143
144 "2: .rept 2 \n\t"
145 " lsr.4 d15, %1, #16 \n\t"
146 " bfextu %1, %1, #16 \n\t"
147 " add.4 %1, d15, %1 \n\t"
148 " .endr \n\t"
149 " btst d14, #3 \n\t" // start from odd address (<< 3)?
150 " jmpne.w.f 300b \n\t"
151 "3: \n\t"
152
153 : "+a"(buff), "+d"(result), "+d"(count), "+d"(len)
154 :
155 : "d15", "d14", "a3", "cc"
156 );
157
158 return result;
159 }
160
161 /*
162 * handle a few bytes and fold result into 16-bit
163 */
164 while (len-- > 0) {
165 result += (*buff++ << 8);
166 if (len) {
167 result += *buff++;
168 len--;
169 }
170 }
171 asm (
172 " .rept 2 \n\t"
173 " lsr.4 d15, %0, #16 \n\t"
174 " bfextu %0, %0, #16 \n\t"
175 " add.4 %0, d15, %0 \n\t"
176 " .endr \n\t"
177 : "+d" (result)
178 :
179 : "d15", "cc"
180 );
181
182 return result;
183 }
184
185 /*
186 * This is a version of ip_compute_csum() optimized for IP headers,
187 * which always checksum on 4 octet boundaries.
188 */
189 __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
190 {
191 return (__force __sum16)~do_csum(iph,ihl*4);
192 }
193
194 /*
195 * computes the checksum of a memory block at buff, length len,
196 * and adds in "sum" (32-bit)
197 *
198 * returns a 32-bit number suitable for feeding into itself
199 * or csum_tcpudp_magic
200 *
201 * this function must be called with even lengths, except
202 * for the last fragment, which may be odd
203 *
204 * it's best to have buff aligned on a 32-bit boundary
205 */
206 __wsum csum_partial(const void *buff, int len, __wsum sum)
207 {
208 unsigned int result = do_csum(buff, len);
209
210 /* add in old sum, and carry.. */
211 result += (__force u32)sum;
212 if ((__force u32)sum > result)
213 result += 1;
214 return (__force __wsum)result;
215 }
216
217 EXPORT_SYMBOL(csum_partial);
218
219 /*
220 * this routine is used for miscellaneous IP-like checksums, mainly
221 * in icmp.c
222 */
223 __sum16 ip_compute_csum(const void *buff, int len)
224 {
225 return (__force __sum16)~do_csum(buff,len);
226 }
227
228 /*
229 * copy from fs while checksumming, otherwise like csum_partial
230 */
231
232 __wsum
233 csum_partial_copy_from_user(const void __user *src, void *dst,
234 int len, __wsum sum, int *csum_err)
235 {
236 if (csum_err) *csum_err = 0;
237 memcpy(dst, (__force const void *)src, len);
238 return csum_partial(dst, len, sum);
239 }
240
241 /*
242 * copy from ds while checksumming, otherwise like csum_partial
243 */
244
245 __wsum
246 csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
247 {
248 memcpy(dst, src, len);
249 return csum_partial(dst, len, sum);
250 }