]>
Commit | Line | Data |
---|---|---|
d03fb293 OM |
1 | /** |
2 | * \file bn_mul.h | |
3 | * | |
4 | * \brief Multi-precision integer library | |
5 | * | |
6 | * Copyright (C) 2006-2010, Brainspark B.V. | |
7 | * | |
8 | * This file is part of PolarSSL (http://www.polarssl.org) | |
9 | * Lead Maintainer: Paul Bakker <polarssl_maintainer at polarssl.org> | |
10 | * | |
11 | * All rights reserved. | |
12 | * | |
13 | * This program is free software; you can redistribute it and/or modify | |
14 | * it under the terms of the GNU General Public License as published by | |
15 | * the Free Software Foundation; either version 2 of the License, or | |
16 | * (at your option) any later version. | |
17 | * | |
18 | * This program is distributed in the hope that it will be useful, | |
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 | * GNU General Public License for more details. | |
22 | * | |
23 | * You should have received a copy of the GNU General Public License along | |
24 | * with this program; if not, write to the Free Software Foundation, Inc., | |
25 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
26 | */ | |
27 | /* | |
28 | * Multiply source vector [s] with b, add result | |
29 | * to destination vector [d] and set carry c. | |
30 | * | |
31 | * Currently supports: | |
32 | * | |
33 | * . IA-32 (386+) . AMD64 / EM64T | |
34 | * . IA-32 (SSE2) . Motorola 68000 | |
35 | * . PowerPC, 32-bit . MicroBlaze | |
36 | * . PowerPC, 64-bit . TriCore | |
37 | * . SPARC v8 . ARM v3+ | |
38 | * . Alpha . MIPS32 | |
39 | * . C, longlong . C, generic | |
40 | */ | |
41 | #ifndef POLARSSL_BN_MUL_H | |
42 | #define POLARSSL_BN_MUL_H | |
43 | ||
44 | #include "bignum.h" | |
45 | ||
46 | #if defined(POLARSSL_HAVE_ASM) | |
47 | ||
48 | #if defined(__GNUC__) | |
49 | #if defined(__i386__) | |
50 | ||
51 | #define MULADDC_INIT \ | |
52 | asm( " \ | |
53 | movl %%ebx, %0; \ | |
54 | movl %5, %%esi; \ | |
55 | movl %6, %%edi; \ | |
56 | movl %7, %%ecx; \ | |
57 | movl %8, %%ebx; \ | |
58 | " | |
59 | ||
60 | #define MULADDC_CORE \ | |
61 | " \ | |
62 | lodsl; \ | |
63 | mull %%ebx; \ | |
64 | addl %%ecx, %%eax; \ | |
65 | adcl $0, %%edx; \ | |
66 | addl (%%edi), %%eax; \ | |
67 | adcl $0, %%edx; \ | |
68 | movl %%edx, %%ecx; \ | |
69 | stosl; \ | |
70 | " | |
71 | ||
72 | #if defined(POLARSSL_HAVE_SSE2) | |
73 | ||
74 | #define MULADDC_HUIT \ | |
75 | " \ | |
76 | movd %%ecx, %%mm1; \ | |
77 | movd %%ebx, %%mm0; \ | |
78 | movd (%%edi), %%mm3; \ | |
79 | paddq %%mm3, %%mm1; \ | |
80 | movd (%%esi), %%mm2; \ | |
81 | pmuludq %%mm0, %%mm2; \ | |
82 | movd 4(%%esi), %%mm4; \ | |
83 | pmuludq %%mm0, %%mm4; \ | |
84 | movd 8(%%esi), %%mm6; \ | |
85 | pmuludq %%mm0, %%mm6; \ | |
86 | movd 12(%%esi), %%mm7; \ | |
87 | pmuludq %%mm0, %%mm7; \ | |
88 | paddq %%mm2, %%mm1; \ | |
89 | movd 4(%%edi), %%mm3; \ | |
90 | paddq %%mm4, %%mm3; \ | |
91 | movd 8(%%edi), %%mm5; \ | |
92 | paddq %%mm6, %%mm5; \ | |
93 | movd 12(%%edi), %%mm4; \ | |
94 | paddq %%mm4, %%mm7; \ | |
95 | movd %%mm1, (%%edi); \ | |
96 | movd 16(%%esi), %%mm2; \ | |
97 | pmuludq %%mm0, %%mm2; \ | |
98 | psrlq $32, %%mm1; \ | |
99 | movd 20(%%esi), %%mm4; \ | |
100 | pmuludq %%mm0, %%mm4; \ | |
101 | paddq %%mm3, %%mm1; \ | |
102 | movd 24(%%esi), %%mm6; \ | |
103 | pmuludq %%mm0, %%mm6; \ | |
104 | movd %%mm1, 4(%%edi); \ | |
105 | psrlq $32, %%mm1; \ | |
106 | movd 28(%%esi), %%mm3; \ | |
107 | pmuludq %%mm0, %%mm3; \ | |
108 | paddq %%mm5, %%mm1; \ | |
109 | movd 16(%%edi), %%mm5; \ | |
110 | paddq %%mm5, %%mm2; \ | |
111 | movd %%mm1, 8(%%edi); \ | |
112 | psrlq $32, %%mm1; \ | |
113 | paddq %%mm7, %%mm1; \ | |
114 | movd 20(%%edi), %%mm5; \ | |
115 | paddq %%mm5, %%mm4; \ | |
116 | movd %%mm1, 12(%%edi); \ | |
117 | psrlq $32, %%mm1; \ | |
118 | paddq %%mm2, %%mm1; \ | |
119 | movd 24(%%edi), %%mm5; \ | |
120 | paddq %%mm5, %%mm6; \ | |
121 | movd %%mm1, 16(%%edi); \ | |
122 | psrlq $32, %%mm1; \ | |
123 | paddq %%mm4, %%mm1; \ | |
124 | movd 28(%%edi), %%mm5; \ | |
125 | paddq %%mm5, %%mm3; \ | |
126 | movd %%mm1, 20(%%edi); \ | |
127 | psrlq $32, %%mm1; \ | |
128 | paddq %%mm6, %%mm1; \ | |
129 | movd %%mm1, 24(%%edi); \ | |
130 | psrlq $32, %%mm1; \ | |
131 | paddq %%mm3, %%mm1; \ | |
132 | movd %%mm1, 28(%%edi); \ | |
133 | addl $32, %%edi; \ | |
134 | addl $32, %%esi; \ | |
135 | psrlq $32, %%mm1; \ | |
136 | movd %%mm1, %%ecx; \ | |
137 | " | |
138 | ||
139 | #define MULADDC_STOP \ | |
140 | " \ | |
141 | emms; \ | |
142 | movl %4, %%ebx; \ | |
143 | movl %%ecx, %1; \ | |
144 | movl %%edi, %2; \ | |
145 | movl %%esi, %3; \ | |
146 | " \ | |
147 | : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \ | |
148 | : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \ | |
149 | : "eax", "ecx", "edx", "esi", "edi" \ | |
150 | ); | |
151 | ||
152 | #else | |
153 | ||
154 | #define MULADDC_STOP \ | |
155 | " \ | |
156 | movl %4, %%ebx; \ | |
157 | movl %%ecx, %1; \ | |
158 | movl %%edi, %2; \ | |
159 | movl %%esi, %3; \ | |
160 | " \ | |
161 | : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \ | |
162 | : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \ | |
163 | : "eax", "ecx", "edx", "esi", "edi" \ | |
164 | ); | |
165 | #endif /* SSE2 */ | |
166 | #endif /* i386 */ | |
167 | ||
168 | #if defined(__amd64__) || defined (__x86_64__) | |
169 | ||
170 | #define MULADDC_INIT \ | |
171 | asm( "movq %0, %%rsi " :: "m" (s)); \ | |
172 | asm( "movq %0, %%rdi " :: "m" (d)); \ | |
173 | asm( "movq %0, %%rcx " :: "m" (c)); \ | |
174 | asm( "movq %0, %%rbx " :: "m" (b)); \ | |
175 | asm( "xorq %r8, %r8 " ); | |
176 | ||
177 | #define MULADDC_CORE \ | |
178 | asm( "movq (%rsi),%rax " ); \ | |
179 | asm( "mulq %rbx " ); \ | |
180 | asm( "addq $8, %rsi " ); \ | |
181 | asm( "addq %rcx, %rax " ); \ | |
182 | asm( "movq %r8, %rcx " ); \ | |
183 | asm( "adcq $0, %rdx " ); \ | |
184 | asm( "nop " ); \ | |
185 | asm( "addq %rax, (%rdi) " ); \ | |
186 | asm( "adcq %rdx, %rcx " ); \ | |
187 | asm( "addq $8, %rdi " ); | |
188 | ||
189 | #define MULADDC_STOP \ | |
190 | asm( "movq %%rcx, %0 " : "=m" (c)); \ | |
191 | asm( "movq %%rdi, %0 " : "=m" (d)); \ | |
192 | asm( "movq %%rsi, %0 " : "=m" (s) :: \ | |
193 | "rax", "rcx", "rdx", "rbx", "rsi", "rdi", "r8" ); | |
194 | ||
195 | #endif /* AMD64 */ | |
196 | ||
197 | #if defined(__mc68020__) || defined(__mcpu32__) | |
198 | ||
199 | #define MULADDC_INIT \ | |
200 | asm( "movl %0, %%a2 " :: "m" (s)); \ | |
201 | asm( "movl %0, %%a3 " :: "m" (d)); \ | |
202 | asm( "movl %0, %%d3 " :: "m" (c)); \ | |
203 | asm( "movl %0, %%d2 " :: "m" (b)); \ | |
204 | asm( "moveq #0, %d0 " ); | |
205 | ||
206 | #define MULADDC_CORE \ | |
207 | asm( "movel %a2@+, %d1 " ); \ | |
208 | asm( "mulul %d2, %d4:%d1 " ); \ | |
209 | asm( "addl %d3, %d1 " ); \ | |
210 | asm( "addxl %d0, %d4 " ); \ | |
211 | asm( "moveq #0, %d3 " ); \ | |
212 | asm( "addl %d1, %a3@+ " ); \ | |
213 | asm( "addxl %d4, %d3 " ); | |
214 | ||
215 | #define MULADDC_STOP \ | |
216 | asm( "movl %%d3, %0 " : "=m" (c)); \ | |
217 | asm( "movl %%a3, %0 " : "=m" (d)); \ | |
218 | asm( "movl %%a2, %0 " : "=m" (s) :: \ | |
219 | "d0", "d1", "d2", "d3", "d4", "a2", "a3" ); | |
220 | ||
221 | #define MULADDC_HUIT \ | |
222 | asm( "movel %a2@+, %d1 " ); \ | |
223 | asm( "mulul %d2, %d4:%d1 " ); \ | |
224 | asm( "addxl %d3, %d1 " ); \ | |
225 | asm( "addxl %d0, %d4 " ); \ | |
226 | asm( "addl %d1, %a3@+ " ); \ | |
227 | asm( "movel %a2@+, %d1 " ); \ | |
228 | asm( "mulul %d2, %d3:%d1 " ); \ | |
229 | asm( "addxl %d4, %d1 " ); \ | |
230 | asm( "addxl %d0, %d3 " ); \ | |
231 | asm( "addl %d1, %a3@+ " ); \ | |
232 | asm( "movel %a2@+, %d1 " ); \ | |
233 | asm( "mulul %d2, %d4:%d1 " ); \ | |
234 | asm( "addxl %d3, %d1 " ); \ | |
235 | asm( "addxl %d0, %d4 " ); \ | |
236 | asm( "addl %d1, %a3@+ " ); \ | |
237 | asm( "movel %a2@+, %d1 " ); \ | |
238 | asm( "mulul %d2, %d3:%d1 " ); \ | |
239 | asm( "addxl %d4, %d1 " ); \ | |
240 | asm( "addxl %d0, %d3 " ); \ | |
241 | asm( "addl %d1, %a3@+ " ); \ | |
242 | asm( "movel %a2@+, %d1 " ); \ | |
243 | asm( "mulul %d2, %d4:%d1 " ); \ | |
244 | asm( "addxl %d3, %d1 " ); \ | |
245 | asm( "addxl %d0, %d4 " ); \ | |
246 | asm( "addl %d1, %a3@+ " ); \ | |
247 | asm( "movel %a2@+, %d1 " ); \ | |
248 | asm( "mulul %d2, %d3:%d1 " ); \ | |
249 | asm( "addxl %d4, %d1 " ); \ | |
250 | asm( "addxl %d0, %d3 " ); \ | |
251 | asm( "addl %d1, %a3@+ " ); \ | |
252 | asm( "movel %a2@+, %d1 " ); \ | |
253 | asm( "mulul %d2, %d4:%d1 " ); \ | |
254 | asm( "addxl %d3, %d1 " ); \ | |
255 | asm( "addxl %d0, %d4 " ); \ | |
256 | asm( "addl %d1, %a3@+ " ); \ | |
257 | asm( "movel %a2@+, %d1 " ); \ | |
258 | asm( "mulul %d2, %d3:%d1 " ); \ | |
259 | asm( "addxl %d4, %d1 " ); \ | |
260 | asm( "addxl %d0, %d3 " ); \ | |
261 | asm( "addl %d1, %a3@+ " ); \ | |
262 | asm( "addxl %d0, %d3 " ); | |
263 | ||
264 | #endif /* MC68000 */ | |
265 | ||
266 | #if defined(__powerpc__) || defined(__ppc__) | |
267 | #if defined(__powerpc64__) || defined(__ppc64__) | |
268 | ||
269 | #if defined(__MACH__) && defined(__APPLE__) | |
270 | ||
271 | #define MULADDC_INIT \ | |
272 | asm( "ld r3, %0 " :: "m" (s)); \ | |
273 | asm( "ld r4, %0 " :: "m" (d)); \ | |
274 | asm( "ld r5, %0 " :: "m" (c)); \ | |
275 | asm( "ld r6, %0 " :: "m" (b)); \ | |
276 | asm( "addi r3, r3, -8 " ); \ | |
277 | asm( "addi r4, r4, -8 " ); \ | |
278 | asm( "addic r5, r5, 0 " ); | |
279 | ||
280 | #define MULADDC_CORE \ | |
281 | asm( "ldu r7, 8(r3) " ); \ | |
282 | asm( "mulld r8, r7, r6 " ); \ | |
283 | asm( "mulhdu r9, r7, r6 " ); \ | |
284 | asm( "adde r8, r8, r5 " ); \ | |
285 | asm( "ld r7, 8(r4) " ); \ | |
286 | asm( "addze r5, r9 " ); \ | |
287 | asm( "addc r8, r8, r7 " ); \ | |
288 | asm( "stdu r8, 8(r4) " ); | |
289 | ||
290 | #define MULADDC_STOP \ | |
291 | asm( "addze r5, r5 " ); \ | |
292 | asm( "addi r4, r4, 8 " ); \ | |
293 | asm( "addi r3, r3, 8 " ); \ | |
294 | asm( "std r5, %0 " : "=m" (c)); \ | |
295 | asm( "std r4, %0 " : "=m" (d)); \ | |
296 | asm( "std r3, %0 " : "=m" (s) :: \ | |
297 | "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); | |
298 | ||
299 | #else | |
300 | ||
301 | #define MULADDC_INIT \ | |
302 | asm( "ld %%r3, %0 " :: "m" (s)); \ | |
303 | asm( "ld %%r4, %0 " :: "m" (d)); \ | |
304 | asm( "ld %%r5, %0 " :: "m" (c)); \ | |
305 | asm( "ld %%r6, %0 " :: "m" (b)); \ | |
306 | asm( "addi %r3, %r3, -8 " ); \ | |
307 | asm( "addi %r4, %r4, -8 " ); \ | |
308 | asm( "addic %r5, %r5, 0 " ); | |
309 | ||
310 | #define MULADDC_CORE \ | |
311 | asm( "ldu %r7, 8(%r3) " ); \ | |
312 | asm( "mulld %r8, %r7, %r6 " ); \ | |
313 | asm( "mulhdu %r9, %r7, %r6 " ); \ | |
314 | asm( "adde %r8, %r8, %r5 " ); \ | |
315 | asm( "ld %r7, 8(%r4) " ); \ | |
316 | asm( "addze %r5, %r9 " ); \ | |
317 | asm( "addc %r8, %r8, %r7 " ); \ | |
318 | asm( "stdu %r8, 8(%r4) " ); | |
319 | ||
320 | #define MULADDC_STOP \ | |
321 | asm( "addze %r5, %r5 " ); \ | |
322 | asm( "addi %r4, %r4, 8 " ); \ | |
323 | asm( "addi %r3, %r3, 8 " ); \ | |
324 | asm( "std %%r5, %0 " : "=m" (c)); \ | |
325 | asm( "std %%r4, %0 " : "=m" (d)); \ | |
326 | asm( "std %%r3, %0 " : "=m" (s) :: \ | |
327 | "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); | |
328 | ||
329 | #endif | |
330 | ||
331 | #else /* PPC32 */ | |
332 | ||
333 | #if defined(__MACH__) && defined(__APPLE__) | |
334 | ||
335 | #define MULADDC_INIT \ | |
336 | asm( "lwz r3, %0 " :: "m" (s)); \ | |
337 | asm( "lwz r4, %0 " :: "m" (d)); \ | |
338 | asm( "lwz r5, %0 " :: "m" (c)); \ | |
339 | asm( "lwz r6, %0 " :: "m" (b)); \ | |
340 | asm( "addi r3, r3, -4 " ); \ | |
341 | asm( "addi r4, r4, -4 " ); \ | |
342 | asm( "addic r5, r5, 0 " ); | |
343 | ||
344 | #define MULADDC_CORE \ | |
345 | asm( "lwzu r7, 4(r3) " ); \ | |
346 | asm( "mullw r8, r7, r6 " ); \ | |
347 | asm( "mulhwu r9, r7, r6 " ); \ | |
348 | asm( "adde r8, r8, r5 " ); \ | |
349 | asm( "lwz r7, 4(r4) " ); \ | |
350 | asm( "addze r5, r9 " ); \ | |
351 | asm( "addc r8, r8, r7 " ); \ | |
352 | asm( "stwu r8, 4(r4) " ); | |
353 | ||
354 | #define MULADDC_STOP \ | |
355 | asm( "addze r5, r5 " ); \ | |
356 | asm( "addi r4, r4, 4 " ); \ | |
357 | asm( "addi r3, r3, 4 " ); \ | |
358 | asm( "stw r5, %0 " : "=m" (c)); \ | |
359 | asm( "stw r4, %0 " : "=m" (d)); \ | |
360 | asm( "stw r3, %0 " : "=m" (s) :: \ | |
361 | "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); | |
362 | ||
363 | #else | |
364 | ||
365 | #define MULADDC_INIT \ | |
366 | asm( "lwz %%r3, %0 " :: "m" (s)); \ | |
367 | asm( "lwz %%r4, %0 " :: "m" (d)); \ | |
368 | asm( "lwz %%r5, %0 " :: "m" (c)); \ | |
369 | asm( "lwz %%r6, %0 " :: "m" (b)); \ | |
370 | asm( "addi %r3, %r3, -4 " ); \ | |
371 | asm( "addi %r4, %r4, -4 " ); \ | |
372 | asm( "addic %r5, %r5, 0 " ); | |
373 | ||
374 | #define MULADDC_CORE \ | |
375 | asm( "lwzu %r7, 4(%r3) " ); \ | |
376 | asm( "mullw %r8, %r7, %r6 " ); \ | |
377 | asm( "mulhwu %r9, %r7, %r6 " ); \ | |
378 | asm( "adde %r8, %r8, %r5 " ); \ | |
379 | asm( "lwz %r7, 4(%r4) " ); \ | |
380 | asm( "addze %r5, %r9 " ); \ | |
381 | asm( "addc %r8, %r8, %r7 " ); \ | |
382 | asm( "stwu %r8, 4(%r4) " ); | |
383 | ||
384 | #define MULADDC_STOP \ | |
385 | asm( "addze %r5, %r5 " ); \ | |
386 | asm( "addi %r4, %r4, 4 " ); \ | |
387 | asm( "addi %r3, %r3, 4 " ); \ | |
388 | asm( "stw %%r5, %0 " : "=m" (c)); \ | |
389 | asm( "stw %%r4, %0 " : "=m" (d)); \ | |
390 | asm( "stw %%r3, %0 " : "=m" (s) :: \ | |
391 | "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); | |
392 | ||
393 | #endif | |
394 | ||
395 | #endif /* PPC32 */ | |
396 | #endif /* PPC64 */ | |
397 | ||
398 | #if defined(__sparc__) && defined(__sparc64__) | |
399 | ||
400 | #define MULADDC_INIT \ | |
401 | asm( \ | |
402 | " \ | |
403 | ldx %3, %%o0; \ | |
404 | ldx %4, %%o1; \ | |
405 | ld %5, %%o2; \ | |
406 | ld %6, %%o3; \ | |
407 | " | |
408 | ||
409 | #define MULADDC_CORE \ | |
410 | " \ | |
411 | ld [%%o0], %%o4; \ | |
412 | inc 4, %%o0; \ | |
413 | ld [%%o1], %%o5; \ | |
414 | umul %%o3, %%o4, %%o4; \ | |
415 | addcc %%o4, %%o2, %%o4; \ | |
416 | rd %%y, %%g1; \ | |
417 | addx %%g1, 0, %%g1; \ | |
418 | addcc %%o4, %%o5, %%o4; \ | |
419 | st %%o4, [%%o1]; \ | |
420 | addx %%g1, 0, %%o2; \ | |
421 | inc 4, %%o1; \ | |
422 | " | |
423 | ||
424 | #define MULADDC_STOP \ | |
425 | " \ | |
426 | st %%o2, %0; \ | |
427 | stx %%o1, %1; \ | |
428 | stx %%o0, %2; \ | |
429 | " \ | |
430 | : "=m" (c), "=m" (d), "=m" (s) \ | |
431 | : "m" (s), "m" (d), "m" (c), "m" (b) \ | |
432 | : "g1", "o0", "o1", "o2", "o3", "o4", \ | |
433 | "o5" \ | |
434 | ); | |
435 | #endif /* SPARCv9 */ | |
436 | ||
437 | #if defined(__sparc__) && !defined(__sparc64__) | |
438 | ||
439 | #define MULADDC_INIT \ | |
440 | asm( \ | |
441 | " \ | |
442 | ld %3, %%o0; \ | |
443 | ld %4, %%o1; \ | |
444 | ld %5, %%o2; \ | |
445 | ld %6, %%o3; \ | |
446 | " | |
447 | ||
448 | #define MULADDC_CORE \ | |
449 | " \ | |
450 | ld [%%o0], %%o4; \ | |
451 | inc 4, %%o0; \ | |
452 | ld [%%o1], %%o5; \ | |
453 | umul %%o3, %%o4, %%o4; \ | |
454 | addcc %%o4, %%o2, %%o4; \ | |
455 | rd %%y, %%g1; \ | |
456 | addx %%g1, 0, %%g1; \ | |
457 | addcc %%o4, %%o5, %%o4; \ | |
458 | st %%o4, [%%o1]; \ | |
459 | addx %%g1, 0, %%o2; \ | |
460 | inc 4, %%o1; \ | |
461 | " | |
462 | ||
463 | #define MULADDC_STOP \ | |
464 | " \ | |
465 | st %%o2, %0; \ | |
466 | st %%o1, %1; \ | |
467 | st %%o0, %2; \ | |
468 | " \ | |
469 | : "=m" (c), "=m" (d), "=m" (s) \ | |
470 | : "m" (s), "m" (d), "m" (c), "m" (b) \ | |
471 | : "g1", "o0", "o1", "o2", "o3", "o4", \ | |
472 | "o5" \ | |
473 | ); | |
474 | ||
475 | #endif /* SPARCv8 */ | |
476 | ||
477 | #if defined(__microblaze__) || defined(microblaze) | |
478 | ||
479 | #define MULADDC_INIT \ | |
480 | asm( "lwi r3, %0 " :: "m" (s)); \ | |
481 | asm( "lwi r4, %0 " :: "m" (d)); \ | |
482 | asm( "lwi r5, %0 " :: "m" (c)); \ | |
483 | asm( "lwi r6, %0 " :: "m" (b)); \ | |
484 | asm( "andi r7, r6, 0xffff" ); \ | |
485 | asm( "bsrli r6, r6, 16 " ); | |
486 | ||
487 | #define MULADDC_CORE \ | |
488 | asm( "lhui r8, r3, 0 " ); \ | |
489 | asm( "addi r3, r3, 2 " ); \ | |
490 | asm( "lhui r9, r3, 0 " ); \ | |
491 | asm( "addi r3, r3, 2 " ); \ | |
492 | asm( "mul r10, r9, r6 " ); \ | |
493 | asm( "mul r11, r8, r7 " ); \ | |
494 | asm( "mul r12, r9, r7 " ); \ | |
495 | asm( "mul r13, r8, r6 " ); \ | |
496 | asm( "bsrli r8, r10, 16 " ); \ | |
497 | asm( "bsrli r9, r11, 16 " ); \ | |
498 | asm( "add r13, r13, r8 " ); \ | |
499 | asm( "add r13, r13, r9 " ); \ | |
500 | asm( "bslli r10, r10, 16 " ); \ | |
501 | asm( "bslli r11, r11, 16 " ); \ | |
502 | asm( "add r12, r12, r10 " ); \ | |
503 | asm( "addc r13, r13, r0 " ); \ | |
504 | asm( "add r12, r12, r11 " ); \ | |
505 | asm( "addc r13, r13, r0 " ); \ | |
506 | asm( "lwi r10, r4, 0 " ); \ | |
507 | asm( "add r12, r12, r10 " ); \ | |
508 | asm( "addc r13, r13, r0 " ); \ | |
509 | asm( "add r12, r12, r5 " ); \ | |
510 | asm( "addc r5, r13, r0 " ); \ | |
511 | asm( "swi r12, r4, 0 " ); \ | |
512 | asm( "addi r4, r4, 4 " ); | |
513 | ||
514 | #define MULADDC_STOP \ | |
515 | asm( "swi r5, %0 " : "=m" (c)); \ | |
516 | asm( "swi r4, %0 " : "=m" (d)); \ | |
517 | asm( "swi r3, %0 " : "=m" (s) :: \ | |
518 | "r3", "r4" , "r5" , "r6" , "r7" , "r8" , \ | |
519 | "r9", "r10", "r11", "r12", "r13" ); | |
520 | ||
521 | #endif /* MicroBlaze */ | |
522 | ||
523 | #if defined(__tricore__) | |
524 | ||
525 | #define MULADDC_INIT \ | |
526 | asm( "ld.a %%a2, %0 " :: "m" (s)); \ | |
527 | asm( "ld.a %%a3, %0 " :: "m" (d)); \ | |
528 | asm( "ld.w %%d4, %0 " :: "m" (c)); \ | |
529 | asm( "ld.w %%d1, %0 " :: "m" (b)); \ | |
530 | asm( "xor %d5, %d5 " ); | |
531 | ||
532 | #define MULADDC_CORE \ | |
533 | asm( "ld.w %d0, [%a2+] " ); \ | |
534 | asm( "madd.u %e2, %e4, %d0, %d1 " ); \ | |
535 | asm( "ld.w %d0, [%a3] " ); \ | |
536 | asm( "addx %d2, %d2, %d0 " ); \ | |
537 | asm( "addc %d3, %d3, 0 " ); \ | |
538 | asm( "mov %d4, %d3 " ); \ | |
539 | asm( "st.w [%a3+], %d2 " ); | |
540 | ||
541 | #define MULADDC_STOP \ | |
542 | asm( "st.w %0, %%d4 " : "=m" (c)); \ | |
543 | asm( "st.a %0, %%a3 " : "=m" (d)); \ | |
544 | asm( "st.a %0, %%a2 " : "=m" (s) :: \ | |
545 | "d0", "d1", "e2", "d4", "a2", "a3" ); | |
546 | ||
547 | #endif /* TriCore */ | |
548 | ||
549 | #if defined(__arm__) | |
550 | ||
551 | #if defined(__thumb__) && !defined(__thumb2__) | |
552 | ||
553 | #define MULADDC_INIT \ | |
554 | asm( \ | |
555 | " \ | |
556 | ldr r0, %3; \ | |
557 | ldr r1, %4; \ | |
558 | ldr r2, %5; \ | |
559 | ldr r3, %6; \ | |
560 | lsr r7, r3, #16; \ | |
561 | mov r9, r7; \ | |
562 | lsl r7, r3, #16; \ | |
563 | lsr r7, r7, #16; \ | |
564 | mov r8, r7; \ | |
565 | " | |
566 | ||
567 | #define MULADDC_CORE \ | |
568 | " \ | |
569 | ldmia r0!, {r6}; \ | |
570 | lsr r7, r6, #16; \ | |
571 | lsl r6, r6, #16; \ | |
572 | lsr r6, r6, #16; \ | |
573 | mov r4, r8; \ | |
574 | mul r4, r6; \ | |
575 | mov r3, r9; \ | |
576 | mul r6, r3; \ | |
577 | mov r5, r9; \ | |
578 | mul r5, r7; \ | |
579 | mov r3, r8; \ | |
580 | mul r7, r3; \ | |
581 | lsr r3, r6, #16; \ | |
582 | add r5, r5, r3; \ | |
583 | lsr r3, r7, #16; \ | |
584 | add r5, r5, r3; \ | |
585 | add r4, r4, r2; \ | |
586 | mov r2, #0; \ | |
587 | adc r5, r2; \ | |
588 | lsl r3, r6, #16; \ | |
589 | add r4, r4, r3; \ | |
590 | adc r5, r2; \ | |
591 | lsl r3, r7, #16; \ | |
592 | add r4, r4, r3; \ | |
593 | adc r5, r2; \ | |
594 | ldr r3, [r1]; \ | |
595 | add r4, r4, r3; \ | |
596 | adc r2, r5; \ | |
597 | stmia r1!, {r4}; \ | |
598 | " | |
599 | ||
600 | #define MULADDC_STOP \ | |
601 | " \ | |
602 | str r2, %0; \ | |
603 | str r1, %1; \ | |
604 | str r0, %2; \ | |
605 | " \ | |
606 | : "=m" (c), "=m" (d), "=m" (s) \ | |
607 | : "m" (s), "m" (d), "m" (c), "m" (b) \ | |
608 | : "r0", "r1", "r2", "r3", "r4", "r5", \ | |
609 | "r6", "r7", "r8", "r9", "cc" \ | |
610 | ); | |
611 | ||
612 | #else | |
613 | ||
614 | #define MULADDC_INIT \ | |
615 | asm( \ | |
616 | " \ | |
617 | ldr r0, %3; \ | |
618 | ldr r1, %4; \ | |
619 | ldr r2, %5; \ | |
620 | ldr r3, %6; \ | |
621 | " | |
622 | ||
623 | #define MULADDC_CORE \ | |
624 | " \ | |
625 | ldr r4, [r0], #4; \ | |
626 | mov r5, #0; \ | |
627 | ldr r6, [r1]; \ | |
628 | umlal r2, r5, r3, r4; \ | |
629 | adds r7, r6, r2; \ | |
630 | adc r2, r5, #0; \ | |
631 | str r7, [r1], #4; \ | |
632 | " | |
633 | ||
634 | #define MULADDC_STOP \ | |
635 | " \ | |
636 | str r2, %0; \ | |
637 | str r1, %1; \ | |
638 | str r0, %2; \ | |
639 | " \ | |
640 | : "=m" (c), "=m" (d), "=m" (s) \ | |
641 | : "m" (s), "m" (d), "m" (c), "m" (b) \ | |
642 | : "r0", "r1", "r2", "r3", "r4", "r5", \ | |
643 | "r6", "r7", "cc" \ | |
644 | ); | |
645 | ||
646 | #endif /* Thumb */ | |
647 | ||
648 | #endif /* ARMv3 */ | |
649 | ||
650 | #if defined(__alpha__) | |
651 | ||
652 | #define MULADDC_INIT \ | |
653 | asm( "ldq $1, %0 " :: "m" (s)); \ | |
654 | asm( "ldq $2, %0 " :: "m" (d)); \ | |
655 | asm( "ldq $3, %0 " :: "m" (c)); \ | |
656 | asm( "ldq $4, %0 " :: "m" (b)); | |
657 | ||
658 | #define MULADDC_CORE \ | |
659 | asm( "ldq $6, 0($1) " ); \ | |
660 | asm( "addq $1, 8, $1 " ); \ | |
661 | asm( "mulq $6, $4, $7 " ); \ | |
662 | asm( "umulh $6, $4, $6 " ); \ | |
663 | asm( "addq $7, $3, $7 " ); \ | |
664 | asm( "cmpult $7, $3, $3 " ); \ | |
665 | asm( "ldq $5, 0($2) " ); \ | |
666 | asm( "addq $7, $5, $7 " ); \ | |
667 | asm( "cmpult $7, $5, $5 " ); \ | |
668 | asm( "stq $7, 0($2) " ); \ | |
669 | asm( "addq $2, 8, $2 " ); \ | |
670 | asm( "addq $6, $3, $3 " ); \ | |
671 | asm( "addq $5, $3, $3 " ); | |
672 | ||
673 | #define MULADDC_STOP \ | |
674 | asm( "stq $3, %0 " : "=m" (c)); \ | |
675 | asm( "stq $2, %0 " : "=m" (d)); \ | |
676 | asm( "stq $1, %0 " : "=m" (s) :: \ | |
677 | "$1", "$2", "$3", "$4", "$5", "$6", "$7" ); | |
678 | ||
679 | #endif /* Alpha */ | |
680 | ||
681 | #if defined(__mips__) | |
682 | ||
683 | #define MULADDC_INIT \ | |
684 | asm( "lw $10, %0 " :: "m" (s)); \ | |
685 | asm( "lw $11, %0 " :: "m" (d)); \ | |
686 | asm( "lw $12, %0 " :: "m" (c)); \ | |
687 | asm( "lw $13, %0 " :: "m" (b)); | |
688 | ||
689 | #define MULADDC_CORE \ | |
690 | asm( "lw $14, 0($10) " ); \ | |
691 | asm( "multu $13, $14 " ); \ | |
692 | asm( "addi $10, $10, 4 " ); \ | |
693 | asm( "mflo $14 " ); \ | |
694 | asm( "mfhi $9 " ); \ | |
695 | asm( "addu $14, $12, $14 " ); \ | |
696 | asm( "lw $15, 0($11) " ); \ | |
697 | asm( "sltu $12, $14, $12 " ); \ | |
698 | asm( "addu $15, $14, $15 " ); \ | |
699 | asm( "sltu $14, $15, $14 " ); \ | |
700 | asm( "addu $12, $12, $9 " ); \ | |
701 | asm( "sw $15, 0($11) " ); \ | |
702 | asm( "addu $12, $12, $14 " ); \ | |
703 | asm( "addi $11, $11, 4 " ); | |
704 | ||
705 | #define MULADDC_STOP \ | |
706 | asm( "sw $12, %0 " : "=m" (c)); \ | |
707 | asm( "sw $11, %0 " : "=m" (d)); \ | |
708 | asm( "sw $10, %0 " : "=m" (s) :: \ | |
709 | "$9", "$10", "$11", "$12", "$13", "$14", "$15" ); | |
710 | ||
711 | #endif /* MIPS */ | |
712 | #endif /* GNUC */ | |
713 | ||
714 | #if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) | |
715 | ||
716 | #define MULADDC_INIT \ | |
717 | __asm mov esi, s \ | |
718 | __asm mov edi, d \ | |
719 | __asm mov ecx, c \ | |
720 | __asm mov ebx, b | |
721 | ||
722 | #define MULADDC_CORE \ | |
723 | __asm lodsd \ | |
724 | __asm mul ebx \ | |
725 | __asm add eax, ecx \ | |
726 | __asm adc edx, 0 \ | |
727 | __asm add eax, [edi] \ | |
728 | __asm adc edx, 0 \ | |
729 | __asm mov ecx, edx \ | |
730 | __asm stosd | |
731 | ||
732 | #if defined(POLARSSL_HAVE_SSE2) | |
733 | ||
734 | #define EMIT __asm _emit | |
735 | ||
736 | #define MULADDC_HUIT \ | |
737 | EMIT 0x0F EMIT 0x6E EMIT 0xC9 \ | |
738 | EMIT 0x0F EMIT 0x6E EMIT 0xC3 \ | |
739 | EMIT 0x0F EMIT 0x6E EMIT 0x1F \ | |
740 | EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ | |
741 | EMIT 0x0F EMIT 0x6E EMIT 0x16 \ | |
742 | EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \ | |
743 | EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x04 \ | |
744 | EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \ | |
745 | EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x08 \ | |
746 | EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \ | |
747 | EMIT 0x0F EMIT 0x6E EMIT 0x7E EMIT 0x0C \ | |
748 | EMIT 0x0F EMIT 0xF4 EMIT 0xF8 \ | |
749 | EMIT 0x0F EMIT 0xD4 EMIT 0xCA \ | |
750 | EMIT 0x0F EMIT 0x6E EMIT 0x5F EMIT 0x04 \ | |
751 | EMIT 0x0F EMIT 0xD4 EMIT 0xDC \ | |
752 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x08 \ | |
753 | EMIT 0x0F EMIT 0xD4 EMIT 0xEE \ | |
754 | EMIT 0x0F EMIT 0x6E EMIT 0x67 EMIT 0x0C \ | |
755 | EMIT 0x0F EMIT 0xD4 EMIT 0xFC \ | |
756 | EMIT 0x0F EMIT 0x7E EMIT 0x0F \ | |
757 | EMIT 0x0F EMIT 0x6E EMIT 0x56 EMIT 0x10 \ | |
758 | EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \ | |
759 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
760 | EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x14 \ | |
761 | EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \ | |
762 | EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ | |
763 | EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x18 \ | |
764 | EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \ | |
765 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x04 \ | |
766 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
767 | EMIT 0x0F EMIT 0x6E EMIT 0x5E EMIT 0x1C \ | |
768 | EMIT 0x0F EMIT 0xF4 EMIT 0xD8 \ | |
769 | EMIT 0x0F EMIT 0xD4 EMIT 0xCD \ | |
770 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x10 \ | |
771 | EMIT 0x0F EMIT 0xD4 EMIT 0xD5 \ | |
772 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x08 \ | |
773 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
774 | EMIT 0x0F EMIT 0xD4 EMIT 0xCF \ | |
775 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x14 \ | |
776 | EMIT 0x0F EMIT 0xD4 EMIT 0xE5 \ | |
777 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x0C \ | |
778 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
779 | EMIT 0x0F EMIT 0xD4 EMIT 0xCA \ | |
780 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x18 \ | |
781 | EMIT 0x0F EMIT 0xD4 EMIT 0xF5 \ | |
782 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x10 \ | |
783 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
784 | EMIT 0x0F EMIT 0xD4 EMIT 0xCC \ | |
785 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x1C \ | |
786 | EMIT 0x0F EMIT 0xD4 EMIT 0xDD \ | |
787 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x14 \ | |
788 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
789 | EMIT 0x0F EMIT 0xD4 EMIT 0xCE \ | |
790 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x18 \ | |
791 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
792 | EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ | |
793 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x1C \ | |
794 | EMIT 0x83 EMIT 0xC7 EMIT 0x20 \ | |
795 | EMIT 0x83 EMIT 0xC6 EMIT 0x20 \ | |
796 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
797 | EMIT 0x0F EMIT 0x7E EMIT 0xC9 | |
798 | ||
799 | #define MULADDC_STOP \ | |
800 | EMIT 0x0F EMIT 0x77 \ | |
801 | __asm mov c, ecx \ | |
802 | __asm mov d, edi \ | |
803 | __asm mov s, esi \ | |
804 | ||
805 | #else | |
806 | ||
807 | #define MULADDC_STOP \ | |
808 | __asm mov c, ecx \ | |
809 | __asm mov d, edi \ | |
810 | __asm mov s, esi \ | |
811 | ||
812 | #endif /* SSE2 */ | |
813 | #endif /* MSVC */ | |
814 | ||
815 | #endif /* POLARSSL_HAVE_ASM */ | |
816 | ||
817 | #if !defined(MULADDC_CORE) | |
818 | #if defined(POLARSSL_HAVE_UDBL) | |
819 | ||
820 | #define MULADDC_INIT \ | |
821 | { \ | |
822 | t_udbl r; \ | |
823 | t_uint r0, r1; | |
824 | ||
825 | #define MULADDC_CORE \ | |
826 | r = *(s++) * (t_udbl) b; \ | |
827 | r0 = r; \ | |
828 | r1 = r >> biL; \ | |
829 | r0 += c; r1 += (r0 < c); \ | |
830 | r0 += *d; r1 += (r0 < *d); \ | |
831 | c = r1; *(d++) = r0; | |
832 | ||
833 | #define MULADDC_STOP \ | |
834 | } | |
835 | ||
836 | #else | |
837 | #define MULADDC_INIT \ | |
838 | { \ | |
839 | t_uint s0, s1, b0, b1; \ | |
840 | t_uint r0, r1, rx, ry; \ | |
841 | b0 = ( b << biH ) >> biH; \ | |
842 | b1 = ( b >> biH ); | |
843 | ||
844 | #define MULADDC_CORE \ | |
845 | s0 = ( *s << biH ) >> biH; \ | |
846 | s1 = ( *s >> biH ); s++; \ | |
847 | rx = s0 * b1; r0 = s0 * b0; \ | |
848 | ry = s1 * b0; r1 = s1 * b1; \ | |
849 | r1 += ( rx >> biH ); \ | |
850 | r1 += ( ry >> biH ); \ | |
851 | rx <<= biH; ry <<= biH; \ | |
852 | r0 += rx; r1 += (r0 < rx); \ | |
853 | r0 += ry; r1 += (r0 < ry); \ | |
854 | r0 += c; r1 += (r0 < c); \ | |
855 | r0 += *d; r1 += (r0 < *d); \ | |
856 | c = r1; *(d++) = r0; | |
857 | ||
858 | #define MULADDC_STOP \ | |
859 | } | |
860 | ||
861 | #endif /* C (generic) */ | |
862 | #endif /* C (longlong) */ | |
863 | ||
864 | #endif /* bn_mul.h */ |