]> cvs.zerfleddert.de Git - proxmark3-svn/blob - client/hardnested/hardnested_bitarray_core.c
Merge branch 'master' of https://github.com/Proxmark/proxmark3
[proxmark3-svn] / client / hardnested / hardnested_bitarray_core.c
1 //-----------------------------------------------------------------------------
2 // Copyright (C) 2016, 2017 by piwi
3 //
4 // This code is licensed to you under the terms of the GNU GPL, version 2 or,
5 // at your option, any later version. See the LICENSE.txt file for the text of
6 // the license.ch b
7 //-----------------------------------------------------------------------------
8 // Implements a card only attack based on crypto text (encrypted nonces
9 // received during a nested authentication) only. Unlike other card only
10 // attacks this doesn't rely on implementation errors but only on the
11 // inherent weaknesses of the crypto1 cypher. Described in
12 // Carlo Meijer, Roel Verdult, "Ciphertext-only Cryptanalysis on Hardened
13 // Mifare Classic Cards" in Proceedings of the 22nd ACM SIGSAC Conference on
14 // Computer and Communications Security, 2015
15 //-----------------------------------------------------------------------------
16 // some helper functions which can benefit from SIMD instructions or other special instructions
17 //
18
19 #include "hardnested_bitarray_core.h"
20
21 #include <stdint.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <malloc.h>
25
26 // this needs to be compiled several times for each instruction set.
27 // For each instruction set, define a dedicated function name:
28 #if defined (__AVX512F__)
29 #define MALLOC_BITARRAY malloc_bitarray_AVX512
30 #define FREE_BITARRAY free_bitarray_AVX512
31 #define BITCOUNT bitcount_AVX512
32 #define COUNT_STATES count_states_AVX512
33 #define BITARRAY_AND bitarray_AND_AVX512
34 #define BITARRAY_LOW20_AND bitarray_low20_AND_AVX512
35 #define COUNT_BITARRAY_AND count_bitarray_AND_AVX512
36 #define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_AVX512
37 #define BITARRAY_AND4 bitarray_AND4_AVX512
38 #define BITARRAY_OR bitarray_OR_AVX512
39 #define COUNT_BITARRAY_AND2 count_bitarray_AND2_AVX512
40 #define COUNT_BITARRAY_AND3 count_bitarray_AND3_AVX512
41 #define COUNT_BITARRAY_AND4 count_bitarray_AND4_AVX512
42 #elif defined (__AVX2__)
43 #define MALLOC_BITARRAY malloc_bitarray_AVX2
44 #define FREE_BITARRAY free_bitarray_AVX2
45 #define BITCOUNT bitcount_AVX2
46 #define COUNT_STATES count_states_AVX2
47 #define BITARRAY_AND bitarray_AND_AVX2
48 #define BITARRAY_LOW20_AND bitarray_low20_AND_AVX2
49 #define COUNT_BITARRAY_AND count_bitarray_AND_AVX2
50 #define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_AVX2
51 #define BITARRAY_AND4 bitarray_AND4_AVX2
52 #define BITARRAY_OR bitarray_OR_AVX2
53 #define COUNT_BITARRAY_AND2 count_bitarray_AND2_AVX2
54 #define COUNT_BITARRAY_AND3 count_bitarray_AND3_AVX2
55 #define COUNT_BITARRAY_AND4 count_bitarray_AND4_AVX2
56 #elif defined (__AVX__)
57 #define MALLOC_BITARRAY malloc_bitarray_AVX
58 #define FREE_BITARRAY free_bitarray_AVX
59 #define BITCOUNT bitcount_AVX
60 #define COUNT_STATES count_states_AVX
61 #define BITARRAY_AND bitarray_AND_AVX
62 #define BITARRAY_LOW20_AND bitarray_low20_AND_AVX
63 #define COUNT_BITARRAY_AND count_bitarray_AND_AVX
64 #define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_AVX
65 #define BITARRAY_AND4 bitarray_AND4_AVX
66 #define BITARRAY_OR bitarray_OR_AVX
67 #define COUNT_BITARRAY_AND2 count_bitarray_AND2_AVX
68 #define COUNT_BITARRAY_AND3 count_bitarray_AND3_AVX
69 #define COUNT_BITARRAY_AND4 count_bitarray_AND4_AVX
70 #elif defined (__SSE2__)
71 #define MALLOC_BITARRAY malloc_bitarray_SSE2
72 #define FREE_BITARRAY free_bitarray_SSE2
73 #define BITCOUNT bitcount_SSE2
74 #define COUNT_STATES count_states_SSE2
75 #define BITARRAY_AND bitarray_AND_SSE2
76 #define BITARRAY_LOW20_AND bitarray_low20_AND_SSE2
77 #define COUNT_BITARRAY_AND count_bitarray_AND_SSE2
78 #define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_SSE2
79 #define BITARRAY_AND4 bitarray_AND4_SSE2
80 #define BITARRAY_OR bitarray_OR_SSE2
81 #define COUNT_BITARRAY_AND2 count_bitarray_AND2_SSE2
82 #define COUNT_BITARRAY_AND3 count_bitarray_AND3_SSE2
83 #define COUNT_BITARRAY_AND4 count_bitarray_AND4_SSE2
84 #elif defined (__MMX__)
85 #define MALLOC_BITARRAY malloc_bitarray_MMX
86 #define FREE_BITARRAY free_bitarray_MMX
87 #define BITCOUNT bitcount_MMX
88 #define COUNT_STATES count_states_MMX
89 #define BITARRAY_AND bitarray_AND_MMX
90 #define BITARRAY_LOW20_AND bitarray_low20_AND_MMX
91 #define COUNT_BITARRAY_AND count_bitarray_AND_MMX
92 #define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_MMX
93 #define BITARRAY_AND4 bitarray_AND4_MMX
94 #define BITARRAY_OR bitarray_OR_MMX
95 #define COUNT_BITARRAY_AND2 count_bitarray_AND2_MMX
96 #define COUNT_BITARRAY_AND3 count_bitarray_AND3_MMX
97 #define COUNT_BITARRAY_AND4 count_bitarray_AND4_MMX
98 #else
99 #define MALLOC_BITARRAY malloc_bitarray_NOSIMD
100 #define FREE_BITARRAY free_bitarray_NOSIMD
101 #define BITCOUNT bitcount_NOSIMD
102 #define COUNT_STATES count_states_NOSIMD
103 #define BITARRAY_AND bitarray_AND_NOSIMD
104 #define BITARRAY_LOW20_AND bitarray_low20_AND_NOSIMD
105 #define COUNT_BITARRAY_AND count_bitarray_AND_NOSIMD
106 #define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_NOSIMD
107 #define BITARRAY_AND4 bitarray_AND4_NOSIMD
108 #define BITARRAY_OR bitarray_OR_NOSIMD
109 #define COUNT_BITARRAY_AND2 count_bitarray_AND2_NOSIMD
110 #define COUNT_BITARRAY_AND3 count_bitarray_AND3_NOSIMD
111 #define COUNT_BITARRAY_AND4 count_bitarray_AND4_NOSIMD
112 #endif
113
114
115 // typedefs and declaration of functions:
116 typedef uint32_t* malloc_bitarray_t(uint32_t);
117 malloc_bitarray_t malloc_bitarray_AVX512, malloc_bitarray_AVX2, malloc_bitarray_AVX, malloc_bitarray_SSE2, malloc_bitarray_MMX, malloc_bitarray_NOSIMD, malloc_bitarray_dispatch;
118 typedef void free_bitarray_t(uint32_t*);
119 free_bitarray_t free_bitarray_AVX512, free_bitarray_AVX2, free_bitarray_AVX, free_bitarray_SSE2, free_bitarray_MMX, free_bitarray_NOSIMD, free_bitarray_dispatch;
120 typedef uint32_t bitcount_t(uint32_t);
121 bitcount_t bitcount_AVX512, bitcount_AVX2, bitcount_AVX, bitcount_SSE2, bitcount_MMX, bitcount_NOSIMD, bitcount_dispatch;
122 typedef uint32_t count_states_t(uint32_t*);
123 count_states_t count_states_AVX512, count_states_AVX2, count_states_AVX, count_states_SSE2, count_states_MMX, count_states_NOSIMD, count_states_dispatch;
124 typedef void bitarray_AND_t(uint32_t[], uint32_t[]);
125 bitarray_AND_t bitarray_AND_AVX512, bitarray_AND_AVX2, bitarray_AND_AVX, bitarray_AND_SSE2, bitarray_AND_MMX, bitarray_AND_NOSIMD, bitarray_AND_dispatch;
126 typedef void bitarray_low20_AND_t(uint32_t*, uint32_t*);
127 bitarray_low20_AND_t bitarray_low20_AND_AVX512, bitarray_low20_AND_AVX2, bitarray_low20_AND_AVX, bitarray_low20_AND_SSE2, bitarray_low20_AND_MMX, bitarray_low20_AND_NOSIMD, bitarray_low20_AND_dispatch;
128 typedef uint32_t count_bitarray_AND_t(uint32_t*, uint32_t*);
129 count_bitarray_AND_t count_bitarray_AND_AVX512, count_bitarray_AND_AVX2, count_bitarray_AND_AVX, count_bitarray_AND_SSE2, count_bitarray_AND_MMX, count_bitarray_AND_NOSIMD, count_bitarray_AND_dispatch;
130 typedef uint32_t count_bitarray_low20_AND_t(uint32_t*, uint32_t*);
131 count_bitarray_low20_AND_t count_bitarray_low20_AND_AVX512, count_bitarray_low20_AND_AVX2, count_bitarray_low20_AND_AVX, count_bitarray_low20_AND_SSE2, count_bitarray_low20_AND_MMX, count_bitarray_low20_AND_NOSIMD, count_bitarray_low20_AND_dispatch;
132 typedef void bitarray_AND4_t(uint32_t*, uint32_t*, uint32_t*, uint32_t*);
133 bitarray_AND4_t bitarray_AND4_AVX512, bitarray_AND4_AVX2, bitarray_AND4_AVX, bitarray_AND4_SSE2, bitarray_AND4_MMX, bitarray_AND4_NOSIMD, bitarray_AND4_dispatch;
134 typedef void bitarray_OR_t(uint32_t[], uint32_t[]);
135 bitarray_OR_t bitarray_OR_AVX512, bitarray_OR_AVX2, bitarray_OR_AVX, bitarray_OR_SSE2, bitarray_OR_MMX, bitarray_OR_NOSIMD, bitarray_OR_dispatch;
136 typedef uint32_t count_bitarray_AND2_t(uint32_t*, uint32_t*);
137 count_bitarray_AND2_t count_bitarray_AND2_AVX512, count_bitarray_AND2_AVX2, count_bitarray_AND2_AVX, count_bitarray_AND2_SSE2, count_bitarray_AND2_MMX, count_bitarray_AND2_NOSIMD, count_bitarray_AND2_dispatch;
138 typedef uint32_t count_bitarray_AND3_t(uint32_t*, uint32_t*, uint32_t*);
139 count_bitarray_AND3_t count_bitarray_AND3_AVX512, count_bitarray_AND3_AVX2, count_bitarray_AND3_AVX, count_bitarray_AND3_SSE2, count_bitarray_AND3_MMX, count_bitarray_AND3_NOSIMD, count_bitarray_AND3_dispatch;
140 typedef uint32_t count_bitarray_AND4_t(uint32_t*, uint32_t*, uint32_t*, uint32_t*);
141 count_bitarray_AND4_t count_bitarray_AND4_AVX512, count_bitarray_AND4_AVX2, count_bitarray_AND4_AVX, count_bitarray_AND4_SSE2, count_bitarray_AND4_MMX, count_bitarray_AND4_NOSIMD, count_bitarray_AND4_dispatch;
142
143
144 inline uint32_t *MALLOC_BITARRAY(uint32_t x)
145 {
146 #ifdef _WIN32
147 return __builtin_assume_aligned(_aligned_malloc((x), __BIGGEST_ALIGNMENT__), __BIGGEST_ALIGNMENT__);
148 #else
149 return __builtin_assume_aligned(memalign(__BIGGEST_ALIGNMENT__, (x)), __BIGGEST_ALIGNMENT__);
150 #endif
151 }
152
153
154 inline void FREE_BITARRAY(uint32_t *x)
155 {
156 #ifdef _WIN32
157 _aligned_free(x);
158 #else
159 free(x);
160 #endif
161 }
162
163
164 inline uint32_t BITCOUNT(uint32_t a)
165 {
166 return __builtin_popcountl(a);
167 }
168
169
170 inline uint32_t COUNT_STATES(uint32_t *A)
171 {
172 uint32_t count = 0;
173 for (uint32_t i = 0; i < (1<<19); i++) {
174 count += BITCOUNT(A[i]);
175 }
176 return count;
177 }
178
179
180 inline void BITARRAY_AND(uint32_t *restrict A, uint32_t *restrict B)
181 {
182 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
183 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
184 for (uint32_t i = 0; i < (1<<19); i++) {
185 A[i] &= B[i];
186 }
187 }
188
189
190 inline void BITARRAY_LOW20_AND(uint32_t *restrict A, uint32_t *restrict B)
191 {
192 uint16_t *a = (uint16_t *)__builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
193 uint16_t *b = (uint16_t *)__builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
194
195 for (uint32_t i = 0; i < (1<<20); i++) {
196 if (!b[i]) {
197 a[i] = 0;
198 }
199 }
200 }
201
202
203 inline uint32_t COUNT_BITARRAY_AND(uint32_t *restrict A, uint32_t *restrict B)
204 {
205 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
206 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
207 uint32_t count = 0;
208 for (uint32_t i = 0; i < (1<<19); i++) {
209 A[i] &= B[i];
210 count += BITCOUNT(A[i]);
211 }
212 return count;
213 }
214
215
216 inline uint32_t COUNT_BITARRAY_LOW20_AND(uint32_t *restrict A, uint32_t *restrict B)
217 {
218 uint16_t *a = (uint16_t *)__builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
219 uint16_t *b = (uint16_t *)__builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
220 uint32_t count = 0;
221
222 for (uint32_t i = 0; i < (1<<20); i++) {
223 if (!b[i]) {
224 a[i] = 0;
225 }
226 count += BITCOUNT(a[i]);
227 }
228 return count;
229 }
230
231
232 inline void BITARRAY_AND4(uint32_t *restrict A, uint32_t *restrict B, uint32_t *restrict C, uint32_t *restrict D)
233 {
234 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
235 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
236 C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
237 D = __builtin_assume_aligned(D, __BIGGEST_ALIGNMENT__);
238 for (uint32_t i = 0; i < (1<<19); i++) {
239 A[i] = B[i] & C[i] & D[i];
240 }
241 }
242
243
244 inline void BITARRAY_OR(uint32_t *restrict A, uint32_t *restrict B)
245 {
246 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
247 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
248 for (uint32_t i = 0; i < (1<<19); i++) {
249 A[i] |= B[i];
250 }
251 }
252
253
254 inline uint32_t COUNT_BITARRAY_AND2(uint32_t *restrict A, uint32_t *restrict B)
255 {
256 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
257 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
258 uint32_t count = 0;
259 for (uint32_t i = 0; i < (1<<19); i++) {
260 count += BITCOUNT(A[i] & B[i]);
261 }
262 return count;
263 }
264
265
266 inline uint32_t COUNT_BITARRAY_AND3(uint32_t *restrict A, uint32_t *restrict B, uint32_t *restrict C)
267 {
268 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
269 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
270 C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
271 uint32_t count = 0;
272 for (uint32_t i = 0; i < (1<<19); i++) {
273 count += BITCOUNT(A[i] & B[i] & C[i]);
274 }
275 return count;
276 }
277
278
279 inline uint32_t COUNT_BITARRAY_AND4(uint32_t *restrict A, uint32_t *restrict B, uint32_t *restrict C, uint32_t *restrict D)
280 {
281 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
282 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
283 C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
284 D = __builtin_assume_aligned(D, __BIGGEST_ALIGNMENT__);
285 uint32_t count = 0;
286 for (uint32_t i = 0; i < (1<<19); i++) {
287 count += BITCOUNT(A[i] & B[i] & C[i] & D[i]);
288 }
289 return count;
290 }
291
292
293 #ifndef __MMX__
294
295 // pointers to functions:
296 malloc_bitarray_t *malloc_bitarray_function_p = &malloc_bitarray_dispatch;
297 free_bitarray_t *free_bitarray_function_p = &free_bitarray_dispatch;
298 bitcount_t *bitcount_function_p = &bitcount_dispatch;
299 count_states_t *count_states_function_p = &count_states_dispatch;
300 bitarray_AND_t *bitarray_AND_function_p = &bitarray_AND_dispatch;
301 bitarray_low20_AND_t *bitarray_low20_AND_function_p = &bitarray_low20_AND_dispatch;
302 count_bitarray_AND_t *count_bitarray_AND_function_p = &count_bitarray_AND_dispatch;
303 count_bitarray_low20_AND_t *count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_dispatch;
304 bitarray_AND4_t *bitarray_AND4_function_p = &bitarray_AND4_dispatch;
305 bitarray_OR_t *bitarray_OR_function_p = &bitarray_OR_dispatch;
306 count_bitarray_AND2_t *count_bitarray_AND2_function_p = &count_bitarray_AND2_dispatch;
307 count_bitarray_AND3_t *count_bitarray_AND3_function_p = &count_bitarray_AND3_dispatch;
308 count_bitarray_AND4_t *count_bitarray_AND4_function_p = &count_bitarray_AND4_dispatch;
309
310 // determine the available instruction set at runtime and call the correct function
311 uint32_t *malloc_bitarray_dispatch(uint32_t x) {
312 #if defined (__i386__) || defined (__x86_64__)
313 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
314 if (__builtin_cpu_supports("avx512f")) malloc_bitarray_function_p = &malloc_bitarray_AVX512;
315 else if (__builtin_cpu_supports("avx2")) malloc_bitarray_function_p = &malloc_bitarray_AVX2;
316 #else
317 if (__builtin_cpu_supports("avx2")) malloc_bitarray_function_p = &malloc_bitarray_AVX2;
318 #endif
319 else if (__builtin_cpu_supports("avx")) malloc_bitarray_function_p = &malloc_bitarray_AVX;
320 else if (__builtin_cpu_supports("sse2")) malloc_bitarray_function_p = &malloc_bitarray_SSE2;
321 else if (__builtin_cpu_supports("mmx")) malloc_bitarray_function_p = &malloc_bitarray_MMX;
322 else
323 #endif
324 malloc_bitarray_function_p = &malloc_bitarray_NOSIMD;
325
326 // call the most optimized function for this CPU
327 return (*malloc_bitarray_function_p)(x);
328 }
329
330 void free_bitarray_dispatch(uint32_t *x) {
331 #if defined (__i386__) || defined (__x86_64__)
332 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
333 if (__builtin_cpu_supports("avx512f")) free_bitarray_function_p = &free_bitarray_AVX512;
334 else if (__builtin_cpu_supports("avx2")) free_bitarray_function_p = &free_bitarray_AVX2;
335 #else
336 if (__builtin_cpu_supports("avx2")) free_bitarray_function_p = &free_bitarray_AVX2;
337 #endif
338 else if (__builtin_cpu_supports("avx")) free_bitarray_function_p = &free_bitarray_AVX;
339 else if (__builtin_cpu_supports("sse2")) free_bitarray_function_p = &free_bitarray_SSE2;
340 else if (__builtin_cpu_supports("mmx")) free_bitarray_function_p = &free_bitarray_MMX;
341 else
342 #endif
343 free_bitarray_function_p = &free_bitarray_NOSIMD;
344
345 // call the most optimized function for this CPU
346 (*free_bitarray_function_p)(x);
347 }
348
349 uint32_t bitcount_dispatch(uint32_t a) {
350 #if defined (__i386__) || defined (__x86_64__)
351 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
352 if (__builtin_cpu_supports("avx512f")) bitcount_function_p = &bitcount_AVX512;
353 else if (__builtin_cpu_supports("avx2")) bitcount_function_p = &bitcount_AVX2;
354 #else
355 if (__builtin_cpu_supports("avx2")) bitcount_function_p = &bitcount_AVX2;
356 #endif
357 else if (__builtin_cpu_supports("avx")) bitcount_function_p = &bitcount_AVX;
358 else if (__builtin_cpu_supports("sse2")) bitcount_function_p = &bitcount_SSE2;
359 else if (__builtin_cpu_supports("mmx")) bitcount_function_p = &bitcount_MMX;
360 else
361 #endif
362 bitcount_function_p = &bitcount_NOSIMD;
363
364 // call the most optimized function for this CPU
365 return (*bitcount_function_p)(a);
366 }
367
368 uint32_t count_states_dispatch(uint32_t *bitarray) {
369 #if defined (__i386__) || defined (__x86_64__)
370 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
371 if (__builtin_cpu_supports("avx512f")) count_states_function_p = &count_states_AVX512;
372 else if (__builtin_cpu_supports("avx2")) count_states_function_p = &count_states_AVX2;
373 #else
374 if (__builtin_cpu_supports("avx2")) count_states_function_p = &count_states_AVX2;
375 #endif
376 else if (__builtin_cpu_supports("avx")) count_states_function_p = &count_states_AVX;
377 else if (__builtin_cpu_supports("sse2")) count_states_function_p = &count_states_SSE2;
378 else if (__builtin_cpu_supports("mmx")) count_states_function_p = &count_states_MMX;
379 else
380 #endif
381 count_states_function_p = &count_states_NOSIMD;
382
383 // call the most optimized function for this CPU
384 return (*count_states_function_p)(bitarray);
385 }
386
387 void bitarray_AND_dispatch(uint32_t *A, uint32_t *B) {
388 #if defined (__i386__) || defined (__x86_64__)
389 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
390 if (__builtin_cpu_supports("avx512f")) bitarray_AND_function_p = &bitarray_AND_AVX512;
391 else if (__builtin_cpu_supports("avx2")) bitarray_AND_function_p = &bitarray_AND_AVX2;
392 #else
393 if (__builtin_cpu_supports("avx2")) bitarray_AND_function_p = &bitarray_AND_AVX2;
394 #endif
395 else if (__builtin_cpu_supports("avx")) bitarray_AND_function_p = &bitarray_AND_AVX;
396 else if (__builtin_cpu_supports("sse2")) bitarray_AND_function_p = &bitarray_AND_SSE2;
397 else if (__builtin_cpu_supports("mmx")) bitarray_AND_function_p = &bitarray_AND_MMX;
398 else
399 #endif
400 bitarray_AND_function_p = &bitarray_AND_NOSIMD;
401
402 // call the most optimized function for this CPU
403 (*bitarray_AND_function_p)(A,B);
404 }
405
406 void bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) {
407 #if defined (__i386__) || defined (__x86_64__)
408 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
409 if (__builtin_cpu_supports("avx512f")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX512;
410 else if (__builtin_cpu_supports("avx2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX2;
411 #else
412 if (__builtin_cpu_supports("avx2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX2;
413 #endif
414 else if (__builtin_cpu_supports("avx")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX;
415 else if (__builtin_cpu_supports("sse2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_SSE2;
416 else if (__builtin_cpu_supports("mmx")) bitarray_low20_AND_function_p = &bitarray_low20_AND_MMX;
417 else
418 #endif
419 bitarray_low20_AND_function_p = &bitarray_low20_AND_NOSIMD;
420
421 // call the most optimized function for this CPU
422 (*bitarray_low20_AND_function_p)(A, B);
423 }
424
425 uint32_t count_bitarray_AND_dispatch(uint32_t *A, uint32_t *B) {
426 #if defined (__i386__) || defined (__x86_64__)
427 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
428 if (__builtin_cpu_supports("avx512f")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX512;
429 else if (__builtin_cpu_supports("avx2")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX2;
430 #else
431 if (__builtin_cpu_supports("avx2")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX2;
432 #endif
433 else if (__builtin_cpu_supports("avx")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX;
434 else if (__builtin_cpu_supports("sse2")) count_bitarray_AND_function_p = &count_bitarray_AND_SSE2;
435 else if (__builtin_cpu_supports("mmx")) count_bitarray_AND_function_p = &count_bitarray_AND_MMX;
436 else
437 #endif
438 count_bitarray_AND_function_p = &count_bitarray_AND_NOSIMD;
439
440 // call the most optimized function for this CPU
441 return (*count_bitarray_AND_function_p)(A, B);
442 }
443
444 uint32_t count_bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) {
445 #if defined (__i386__) || defined (__x86_64__)
446 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
447 if (__builtin_cpu_supports("avx512f")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX512;
448 else if (__builtin_cpu_supports("avx2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX2;
449 #else
450 if (__builtin_cpu_supports("avx2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX2;
451 #endif
452 else if (__builtin_cpu_supports("avx")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX;
453 else if (__builtin_cpu_supports("sse2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_SSE2;
454 else if (__builtin_cpu_supports("mmx")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_MMX;
455 else
456 #endif
457 count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_NOSIMD;
458
459 // call the most optimized function for this CPU
460 return (*count_bitarray_low20_AND_function_p)(A, B);
461 }
462
463 void bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
464 #if defined (__i386__) || defined (__x86_64__)
465 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
466 if (__builtin_cpu_supports("avx512f")) bitarray_AND4_function_p = &bitarray_AND4_AVX512;
467 else if (__builtin_cpu_supports("avx2")) bitarray_AND4_function_p = &bitarray_AND4_AVX2;
468 #else
469 if (__builtin_cpu_supports("avx2")) bitarray_AND4_function_p = &bitarray_AND4_AVX2;
470 #endif
471 else if (__builtin_cpu_supports("avx")) bitarray_AND4_function_p = &bitarray_AND4_AVX;
472 else if (__builtin_cpu_supports("sse2")) bitarray_AND4_function_p = &bitarray_AND4_SSE2;
473 else if (__builtin_cpu_supports("mmx")) bitarray_AND4_function_p = &bitarray_AND4_MMX;
474 else
475 #endif
476 bitarray_AND4_function_p = &bitarray_AND4_NOSIMD;
477
478 // call the most optimized function for this CPU
479 (*bitarray_AND4_function_p)(A, B, C, D);
480 }
481
482 void bitarray_OR_dispatch(uint32_t *A, uint32_t *B) {
483 #if defined (__i386__) || defined (__x86_64__)
484 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
485 if (__builtin_cpu_supports("avx512f")) bitarray_OR_function_p = &bitarray_OR_AVX512;
486 else if (__builtin_cpu_supports("avx2")) bitarray_OR_function_p = &bitarray_OR_AVX2;
487 #else
488 if (__builtin_cpu_supports("avx2")) bitarray_OR_function_p = &bitarray_OR_AVX2;
489 #endif
490 else if (__builtin_cpu_supports("avx")) bitarray_OR_function_p = &bitarray_OR_AVX;
491 else if (__builtin_cpu_supports("sse2")) bitarray_OR_function_p = &bitarray_OR_SSE2;
492 else if (__builtin_cpu_supports("mmx")) bitarray_OR_function_p = &bitarray_OR_MMX;
493 else
494 #endif
495 bitarray_OR_function_p = &bitarray_OR_NOSIMD;
496
497 // call the most optimized function for this CPU
498 (*bitarray_OR_function_p)(A,B);
499 }
500
501 uint32_t count_bitarray_AND2_dispatch(uint32_t *A, uint32_t *B) {
502 #if defined (__i386__) || defined (__x86_64__)
503 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
504 if (__builtin_cpu_supports("avx512f")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX512;
505 else if (__builtin_cpu_supports("avx2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX2;
506 #else
507 if (__builtin_cpu_supports("avx2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX2;
508 #endif
509 else if (__builtin_cpu_supports("avx")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX;
510 else if (__builtin_cpu_supports("sse2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_SSE2;
511 else if (__builtin_cpu_supports("mmx")) count_bitarray_AND2_function_p = &count_bitarray_AND2_MMX;
512 else
513 #endif
514 count_bitarray_AND2_function_p = &count_bitarray_AND2_NOSIMD;
515
516 // call the most optimized function for this CPU
517 return (*count_bitarray_AND2_function_p)(A, B);
518 }
519
520 uint32_t count_bitarray_AND3_dispatch(uint32_t *A, uint32_t *B, uint32_t *C) {
521 #if defined (__i386__) || defined (__x86_64__)
522 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
523 if (__builtin_cpu_supports("avx512f")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX512;
524 else if (__builtin_cpu_supports("avx2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX2;
525 #else
526 if (__builtin_cpu_supports("avx2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX2;
527 #endif
528 else if (__builtin_cpu_supports("avx")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX;
529 else if (__builtin_cpu_supports("sse2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_SSE2;
530 else if (__builtin_cpu_supports("mmx")) count_bitarray_AND3_function_p = &count_bitarray_AND3_MMX;
531 else
532 #endif
533 count_bitarray_AND3_function_p = &count_bitarray_AND3_NOSIMD;
534
535 // call the most optimized function for this CPU
536 return (*count_bitarray_AND3_function_p)(A, B, C);
537 }
538
539 uint32_t count_bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
540 #if defined (__i386__) || defined (__x86_64__)
541 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
542 if (__builtin_cpu_supports("avx512f")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX512;
543 else if (__builtin_cpu_supports("avx2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX2;
544 #else
545 if (__builtin_cpu_supports("avx2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX2;
546 #endif
547 else if (__builtin_cpu_supports("avx")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX;
548 else if (__builtin_cpu_supports("sse2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_SSE2;
549 else if (__builtin_cpu_supports("mmx")) count_bitarray_AND4_function_p = &count_bitarray_AND4_MMX;
550 else
551 #endif
552 count_bitarray_AND4_function_p = &count_bitarray_AND4_NOSIMD;
553
554 // call the most optimized function for this CPU
555 return (*count_bitarray_AND4_function_p)(A, B, C, D);
556 }
557
558
559 ///////////////////////////////////////////////77
560 // Entries to dispatched function calls
561
562 uint32_t *malloc_bitarray(uint32_t x) {
563 return (*malloc_bitarray_function_p)(x);
564 }
565
566 void free_bitarray(uint32_t *x) {
567 (*free_bitarray_function_p)(x);
568 }
569
570 uint32_t bitcount(uint32_t a) {
571 return (*bitcount_function_p)(a);
572 }
573
574 uint32_t count_states(uint32_t *bitarray) {
575 return (*count_states_function_p)(bitarray);
576 }
577
578 void bitarray_AND(uint32_t *A, uint32_t *B) {
579 (*bitarray_AND_function_p)(A, B);
580 }
581
582 void bitarray_low20_AND(uint32_t *A, uint32_t *B) {
583 (*bitarray_low20_AND_function_p)(A, B);
584 }
585
586 uint32_t count_bitarray_AND(uint32_t *A, uint32_t *B) {
587 return (*count_bitarray_AND_function_p)(A, B);
588 }
589
590 uint32_t count_bitarray_low20_AND(uint32_t *A, uint32_t *B) {
591 return (*count_bitarray_low20_AND_function_p)(A, B);
592 }
593
594 void bitarray_AND4(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
595 (*bitarray_AND4_function_p)(A, B, C, D);
596 }
597
598 void bitarray_OR(uint32_t *A, uint32_t *B) {
599 (*bitarray_OR_function_p)(A, B);
600 }
601
602 uint32_t count_bitarray_AND2(uint32_t *A, uint32_t *B) {
603 return (*count_bitarray_AND2_function_p)(A, B);
604 }
605
606 uint32_t count_bitarray_AND3(uint32_t *A, uint32_t *B, uint32_t *C) {
607 return (*count_bitarray_AND3_function_p)(A, B, C);
608 }
609
610 uint32_t count_bitarray_AND4(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
611 return (*count_bitarray_AND4_function_p)(A, B, C, D);
612 }
613
614 #endif
615
Impressum, Datenschutz