]>
Commit | Line | Data |
---|---|---|
1 | //----------------------------------------------------------------------------- | |
2 | // Copyright (C) 2016, 2017 by piwi | |
3 | // | |
4 | // This code is licensed to you under the terms of the GNU GPL, version 2 or, | |
5 | // at your option, any later version. See the LICENSE.txt file for the text of | |
6 | // the license. | |
7 | //----------------------------------------------------------------------------- | |
8 | // Implements a card only attack based on crypto text (encrypted nonces | |
9 | // received during a nested authentication) only. Unlike other card only | |
10 | // attacks this doesn't rely on implementation errors but only on the | |
11 | // inherent weaknesses of the crypto1 cypher. Described in | |
12 | // Carlo Meijer, Roel Verdult, "Ciphertext-only Cryptanalysis on Hardened | |
13 | // Mifare Classic Cards" in Proceedings of the 22nd ACM SIGSAC Conference on | |
14 | // Computer and Communications Security, 2015 | |
15 | //----------------------------------------------------------------------------- | |
16 | // | |
17 | // brute forcing is based on @aczids bitsliced brute forcer | |
18 | // https://github.com/aczid/crypto1_bs with some modifications. Mainly: | |
19 | // - don't rollback. Start with 2nd byte of nonce instead | |
20 | // - reuse results of filter subfunctions | |
21 | // - reuse results of previous nonces if some first bits are identical | |
22 | // | |
23 | //----------------------------------------------------------------------------- | |
24 | // aczid's Copyright notice: | |
25 | // | |
26 | // Bit-sliced Crypto-1 brute-forcing implementation | |
27 | // Builds on the data structures returned by CraptEV1 craptev1_get_space(nonces, threshold, uid) | |
28 | /* | |
29 | Copyright (c) 2015-2016 Aram Verstegen | |
30 | ||
31 | Permission is hereby granted, free of charge, to any person obtaining a copy | |
32 | of this software and associated documentation files (the "Software"), to deal | |
33 | in the Software without restriction, including without limitation the rights | |
34 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
35 | copies of the Software, and to permit persons to whom the Software is | |
36 | furnished to do so, subject to the following conditions: | |
37 | ||
38 | The above copyright notice and this permission notice shall be included in | |
39 | all copies or substantial portions of the Software. | |
40 | ||
41 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
42 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
43 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
44 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
45 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
46 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
47 | THE SOFTWARE. | |
48 | */ | |
49 | ||
50 | #include "hardnested_bf_core.h" | |
51 | ||
52 | #include <stdint.h> | |
53 | #include <stdbool.h> | |
54 | #include <stdlib.h> | |
55 | #include <stdio.h> | |
56 | #include <malloc.h> | |
57 | #include <string.h> | |
58 | #include "crapto1/crapto1.h" | |
59 | #include "parity.h" | |
60 | ||
61 | // bitslice type | |
62 | // while AVX supports 256 bit vector floating point operations, we need integer operations for boolean logic | |
63 | // same for AVX2 and 512 bit vectors | |
64 | // using larger vectors works but seems to generate more register pressure | |
65 | #if defined(__AVX512F__) | |
66 | #define MAX_BITSLICES 512 | |
67 | #elif defined(__AVX2__) | |
68 | #define MAX_BITSLICES 256 | |
69 | #elif defined(__AVX__) | |
70 | #define MAX_BITSLICES 128 | |
71 | #elif defined(__SSE2__) | |
72 | #define MAX_BITSLICES 128 | |
73 | #else // MMX or SSE or NOSIMD | |
74 | #define MAX_BITSLICES 64 | |
75 | #endif | |
76 | ||
77 | #define VECTOR_SIZE (MAX_BITSLICES/8) | |
78 | typedef unsigned int __attribute__((aligned(VECTOR_SIZE))) __attribute__((vector_size(VECTOR_SIZE))) bitslice_value_t; | |
79 | typedef union { | |
80 | bitslice_value_t value; | |
81 | uint64_t bytes64[MAX_BITSLICES/64]; | |
82 | uint8_t bytes[MAX_BITSLICES/8]; | |
83 | } bitslice_t; | |
84 | ||
85 | // filter function (f20) | |
86 | // sourced from ``Wirelessly Pickpocketing a Mifare Classic Card'' by Flavio Garcia, Peter van Rossum, Roel Verdult and Ronny Wichers Schreur | |
87 | #define f20a(a,b,c,d) (((a|b)^(a&d))^(c&((a^b)|d))) | |
88 | #define f20b(a,b,c,d) (((a&b)|c)^((a^b)&(c|d))) | |
89 | #define f20c(a,b,c,d,e) ((a|((b|e)&(d^e)))^((a^(b&d))&((c^d)|(b&e)))) | |
90 | ||
91 | // bit indexing | |
92 | #define get_bit(n, word) (((word) >> (n)) & 1) | |
93 | #define get_vector_bit(slice, value) get_bit((slice)&0x3f, value.bytes64[(slice)>>6]) | |
94 | ||
95 | // size of crypto-1 state | |
96 | #define STATE_SIZE 48 | |
97 | // size of nonce to be decrypted | |
98 | #define KEYSTREAM_SIZE 24 | |
99 | ||
100 | // endianness conversion | |
101 | #define rev32(word) ((((word) & 0xff) << 24) | ((((word) >> 8) & 0xff) << 16) | ((((word) >> 16) & 0xff) << 8) | ((((word) >> 24) & 0xff))) | |
102 | ||
103 | // this needs to be compiled several times for each instruction set. | |
104 | // For each instruction set, define a dedicated function name: | |
105 | #if defined (__AVX512F__) | |
106 | #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX512 | |
107 | #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX512 | |
108 | #elif defined (__AVX2__) | |
109 | #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX2 | |
110 | #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX2 | |
111 | #elif defined (__AVX__) | |
112 | #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX | |
113 | #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX | |
114 | #elif defined (__SSE2__) | |
115 | #define BITSLICE_TEST_NONCES bitslice_test_nonces_SSE2 | |
116 | #define CRACK_STATES_BITSLICED crack_states_bitsliced_SSE2 | |
117 | #elif defined (__MMX__) | |
118 | #define BITSLICE_TEST_NONCES bitslice_test_nonces_MMX | |
119 | #define CRACK_STATES_BITSLICED crack_states_bitsliced_MMX | |
120 | #else | |
121 | #define BITSLICE_TEST_NONCES bitslice_test_nonces_NOSIMD | |
122 | #define CRACK_STATES_BITSLICED crack_states_bitsliced_NOSIMD | |
123 | #endif | |
124 | ||
125 | // typedefs and declaration of functions: | |
126 | typedef const uint64_t crack_states_bitsliced_t(uint32_t, uint8_t*, statelist_t*, uint32_t*, uint64_t*, uint32_t, uint8_t*, noncelist_t*); | |
127 | crack_states_bitsliced_t crack_states_bitsliced_AVX512; | |
128 | crack_states_bitsliced_t crack_states_bitsliced_AVX2; | |
129 | crack_states_bitsliced_t crack_states_bitsliced_AVX; | |
130 | crack_states_bitsliced_t crack_states_bitsliced_SSE2; | |
131 | crack_states_bitsliced_t crack_states_bitsliced_MMX; | |
132 | crack_states_bitsliced_t crack_states_bitsliced_NOSIMD; | |
133 | crack_states_bitsliced_t crack_states_bitsliced_dispatch; | |
134 | ||
135 | typedef void bitslice_test_nonces_t(uint32_t, uint32_t*, uint8_t*); | |
136 | bitslice_test_nonces_t bitslice_test_nonces_AVX512; | |
137 | bitslice_test_nonces_t bitslice_test_nonces_AVX2; | |
138 | bitslice_test_nonces_t bitslice_test_nonces_AVX; | |
139 | bitslice_test_nonces_t bitslice_test_nonces_SSE2; | |
140 | bitslice_test_nonces_t bitslice_test_nonces_MMX; | |
141 | bitslice_test_nonces_t bitslice_test_nonces_NOSIMD; | |
142 | bitslice_test_nonces_t bitslice_test_nonces_dispatch; | |
143 | ||
144 | #ifdef _WIN32 | |
145 | #define malloc_bitslice(x) __builtin_assume_aligned(_aligned_malloc((x), MAX_BITSLICES/8), MAX_BITSLICES/8) | |
146 | #define free_bitslice(x) _aligned_free(x) | |
147 | #else | |
148 | #define malloc_bitslice(x) memalign(MAX_BITSLICES/8, (x)) | |
149 | #define free_bitslice(x) free(x) | |
150 | #endif | |
151 | ||
152 | typedef enum { | |
153 | EVEN_STATE = 0, | |
154 | ODD_STATE = 1 | |
155 | } odd_even_t; | |
156 | ||
157 | ||
158 | // arrays of bitsliced states with identical values in all slices | |
159 | static bitslice_t bitsliced_encrypted_nonces[256][KEYSTREAM_SIZE]; | |
160 | static bitslice_t bitsliced_encrypted_parity_bits[256][4]; | |
161 | // 1 and 0 vectors | |
162 | static bitslice_t bs_ones; | |
163 | static bitslice_t bs_zeroes; | |
164 | ||
165 | ||
166 | void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) { | |
167 | ||
168 | // initialize 1 and 0 vectors | |
169 | memset(bs_ones.bytes, 0xff, VECTOR_SIZE); | |
170 | memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE); | |
171 | ||
172 | // bitslice nonces' 2nd to 4th byte | |
173 | for (uint32_t i = 0; i < nonces_to_bruteforce; i++) { | |
174 | for(uint32_t bit_idx = 0; bit_idx < KEYSTREAM_SIZE; bit_idx++){ | |
175 | bool bit = get_bit(KEYSTREAM_SIZE-1-bit_idx, rev32(bf_test_nonce[i] << 8)); | |
176 | if(bit){ | |
177 | bitsliced_encrypted_nonces[i][bit_idx].value = bs_ones.value; | |
178 | } else { | |
179 | bitsliced_encrypted_nonces[i][bit_idx].value = bs_zeroes.value; | |
180 | } | |
181 | } | |
182 | } | |
183 | // bitslice nonces' parity (4 bits) | |
184 | for (uint32_t i = 0; i < nonces_to_bruteforce; i++) { | |
185 | for(uint32_t bit_idx = 0; bit_idx < 4; bit_idx++){ | |
186 | bool bit = get_bit(4-1-bit_idx, bf_test_nonce_par[i]); | |
187 | if(bit){ | |
188 | bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_ones.value; | |
189 | } else { | |
190 | bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_zeroes.value; | |
191 | } | |
192 | } | |
193 | } | |
194 | ||
195 | } | |
196 | ||
197 | ||
198 | const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces){ | |
199 | ||
200 | // Unlike aczid's implementation this doesn't roll back at all when performing bitsliced bruteforce. | |
201 | // We know that the best first byte is already shifted in. Testing with the remaining three bytes of | |
202 | // the nonces is sufficient to eliminate most of them. The small rest is tested with a simple unsliced | |
203 | // brute forcing (including roll back). | |
204 | ||
205 | bitslice_t states[KEYSTREAM_SIZE+STATE_SIZE]; | |
206 | bitslice_t * restrict state_p; | |
207 | uint64_t key = -1; | |
208 | uint64_t bucket_states_tested = 0; | |
209 | uint32_t bucket_size[(p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1]; | |
210 | uint32_t bitsliced_blocks = 0; | |
211 | uint32_t const *restrict p_even_end = p->states[EVEN_STATE] + p->len[EVEN_STATE]; | |
212 | #if defined (DEBUG_BRUTE_FORCE) | |
213 | uint32_t elimination_step = 0; | |
214 | #define MAX_ELIMINATION_STEP 32 | |
215 | uint64_t keys_eliminated[MAX_ELIMINATION_STEP] = {0}; | |
216 | #endif | |
217 | #ifdef DEBUG_KEY_ELIMINATION | |
218 | bool bucket_contains_test_key[(p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1]; | |
219 | #endif | |
220 | ||
221 | // constant ones/zeroes | |
222 | bitslice_t bs_ones; | |
223 | memset(bs_ones.bytes, 0xff, VECTOR_SIZE); | |
224 | bitslice_t bs_zeroes; | |
225 | memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE); | |
226 | ||
227 | // bitslice all the even states | |
228 | bitslice_t **restrict bitsliced_even_states = (bitslice_t **)malloc(((p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1) * sizeof(bitslice_t *)); | |
229 | if (bitsliced_even_states == NULL) { | |
230 | printf("Out of memory error in brute_force. Aborting..."); | |
231 | exit(4); | |
232 | } | |
233 | bitslice_value_t *restrict bitsliced_even_feedback = malloc_bitslice(((p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1) * sizeof(bitslice_value_t)); | |
234 | if (bitsliced_even_feedback == NULL) { | |
235 | printf("Out of memory error in brute_force. Aborting..."); | |
236 | exit(4); | |
237 | } | |
238 | for(uint32_t *restrict p_even = p->states[EVEN_STATE]; p_even < p_even_end; p_even += MAX_BITSLICES){ | |
239 | bitslice_t *restrict lstate_p = malloc_bitslice(STATE_SIZE/2*sizeof(bitslice_t)); | |
240 | if (lstate_p == NULL) { | |
241 | printf("Out of memory error in brute_force. Aborting... \n"); | |
242 | exit(4); | |
243 | } | |
244 | memset(lstate_p, 0x00, STATE_SIZE/2*sizeof(bitslice_t)); // zero even bits | |
245 | // bitslice even half-states | |
246 | const uint32_t max_slices = (p_even_end-p_even) < MAX_BITSLICES ? p_even_end-p_even : MAX_BITSLICES; | |
247 | bucket_size[bitsliced_blocks] = max_slices; | |
248 | #ifdef DEBUG_KEY_ELIMINATION | |
249 | bucket_contains_test_key[bitsliced_blocks] = false; | |
250 | #endif | |
251 | uint32_t slice_idx; | |
252 | for(slice_idx = 0; slice_idx < max_slices; ++slice_idx){ | |
253 | uint32_t e = *(p_even+slice_idx); | |
254 | #ifdef DEBUG_KEY_ELIMINATION | |
255 | if (known_target_key != -1 && e == test_state[EVEN_STATE]) { | |
256 | bucket_contains_test_key[bitsliced_blocks] = true; | |
257 | // printf("bucket %d contains test key even state\n", bitsliced_blocks); | |
258 | // printf("in slice %d\n", slice_idx); | |
259 | } | |
260 | #endif | |
261 | for(uint32_t bit_idx = 0; bit_idx < STATE_SIZE/2; bit_idx++, e >>= 1){ | |
262 | // set even bits | |
263 | if(e&1){ | |
264 | lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx & 0x3f); | |
265 | } | |
266 | } | |
267 | } | |
268 | // padding with last even state | |
269 | for ( ; slice_idx < MAX_BITSLICES; ++slice_idx) { | |
270 | uint32_t e = *(p_even_end-1); | |
271 | for(uint32_t bit_idx = 0; bit_idx < STATE_SIZE/2; bit_idx++, e >>= 1){ | |
272 | // set even bits | |
273 | if(e&1){ | |
274 | lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx & 0x3f); | |
275 | } | |
276 | } | |
277 | } | |
278 | bitsliced_even_states[bitsliced_blocks] = lstate_p; | |
279 | // bitsliced_even_feedback[bitsliced_blocks] = bs_ones; | |
280 | bitsliced_even_feedback[bitsliced_blocks] = lstate_p[(47- 0)/2].value ^ | |
281 | lstate_p[(47-10)/2].value ^ lstate_p[(47-12)/2].value ^ lstate_p[(47-14)/2].value ^ | |
282 | lstate_p[(47-24)/2].value ^ lstate_p[(47-42)/2].value; | |
283 | bitsliced_blocks++; | |
284 | } | |
285 | // bitslice every odd state to every block of even states | |
286 | for(uint32_t const *restrict p_odd = p->states[ODD_STATE]; p_odd < p->states[ODD_STATE] + p->len[ODD_STATE]; ++p_odd){ | |
287 | // early abort | |
288 | if(*keys_found){ | |
289 | goto out; | |
290 | } | |
291 | ||
292 | // set odd state bits and pre-compute first keystream bit vector. This is the same for all blocks of even states | |
293 | ||
294 | state_p = &states[KEYSTREAM_SIZE]; | |
295 | uint32_t o = *p_odd; | |
296 | ||
297 | // pre-compute the odd feedback bit | |
298 | bool odd_feedback_bit = evenparity32(o&0x29ce5c); | |
299 | const bitslice_value_t odd_feedback = odd_feedback_bit ? bs_ones.value : bs_zeroes.value; | |
300 | ||
301 | // set odd state bits | |
302 | for (uint32_t state_idx = 0; state_idx < STATE_SIZE; o >>= 1, state_idx += 2) { | |
303 | if (o & 1){ | |
304 | state_p[state_idx] = bs_ones; | |
305 | } else { | |
306 | state_p[state_idx] = bs_zeroes; | |
307 | } | |
308 | } | |
309 | ||
310 | bitslice_value_t crypto1_bs_f20b_2[16]; | |
311 | bitslice_value_t crypto1_bs_f20b_3[8]; | |
312 | ||
313 | crypto1_bs_f20b_2[0] = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value); | |
314 | crypto1_bs_f20b_3[0] = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value); | |
315 | ||
316 | bitslice_value_t ksb[8]; | |
317 | ksb[0] = f20c(f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value), | |
318 | f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value), | |
319 | crypto1_bs_f20b_2[0], | |
320 | f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value), | |
321 | crypto1_bs_f20b_3[0]); | |
322 | ||
323 | uint32_t *restrict p_even = p->states[EVEN_STATE]; | |
324 | for (uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx, p_even += MAX_BITSLICES) { | |
325 | ||
326 | #ifdef DEBUG_KEY_ELIMINATION | |
327 | // if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) { | |
328 | // printf("Now testing known target key.\n"); | |
329 | // printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks); | |
330 | // } | |
331 | #endif | |
332 | // add the even state bits | |
333 | const bitslice_t const *restrict bitsliced_even_state = bitsliced_even_states[block_idx]; | |
334 | for(uint32_t state_idx = 1; state_idx < STATE_SIZE; state_idx += 2) { | |
335 | state_p[state_idx] = bitsliced_even_state[state_idx/2]; | |
336 | } | |
337 | ||
338 | // pre-compute first feedback bit vector. This is the same for all nonces | |
339 | bitslice_value_t fbb[8]; | |
340 | fbb[0] = odd_feedback ^ bitsliced_even_feedback[block_idx]; | |
341 | ||
342 | // vector to contain test results (1 = passed, 0 = failed) | |
343 | bitslice_t results = bs_ones; | |
344 | ||
345 | // parity_bits | |
346 | bitslice_value_t par[8]; | |
347 | par[0] = bs_zeroes.value; | |
348 | uint32_t next_common_bits = 0; | |
349 | ||
350 | for(uint32_t tests = 0; tests < nonces_to_bruteforce; ++tests){ | |
351 | // common bits with preceding test nonce | |
352 | uint32_t common_bits = next_common_bits; //tests ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests-1]) : 0; | |
353 | next_common_bits = tests < nonces_to_bruteforce - 1 ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests+1]) : 0; | |
354 | uint32_t parity_bit_idx = 1; // start checking with the parity of second nonce byte | |
355 | bitslice_value_t fb_bits = fbb[common_bits]; // start with precomputed feedback bits from previous nonce | |
356 | bitslice_value_t ks_bits = ksb[common_bits]; // dito for first keystream bits | |
357 | bitslice_value_t parity_bit_vector = par[common_bits]; // dito for first parity vector | |
358 | // bitslice_value_t fb_bits = fbb[0]; // start with precomputed feedback bits from previous nonce | |
359 | // bitslice_value_t ks_bits = ksb[0]; // dito for first keystream bits | |
360 | // bitslice_value_t parity_bit_vector = par[0]; // dito for first parity vector | |
361 | state_p -= common_bits; // and reuse the already calculated state bits | |
362 | // highest bit is transmitted/received first. We start with Bit 23 (highest bit of second nonce byte), | |
363 | // or the highest bit which differs from the previous nonce | |
364 | for (int32_t ks_idx = KEYSTREAM_SIZE-1-common_bits; ks_idx >= 0; --ks_idx) { | |
365 | ||
366 | // decrypt nonce bits | |
367 | const bitslice_value_t encrypted_nonce_bit_vector = bitsliced_encrypted_nonces[tests][ks_idx].value; | |
368 | const bitslice_value_t decrypted_nonce_bit_vector = encrypted_nonce_bit_vector ^ ks_bits; | |
369 | ||
370 | // compute real parity bits on the fly | |
371 | parity_bit_vector ^= decrypted_nonce_bit_vector; | |
372 | ||
373 | // update state | |
374 | state_p--; | |
375 | state_p[0].value = fb_bits ^ decrypted_nonce_bit_vector; | |
376 | ||
377 | // update crypto1 subfunctions | |
378 | bitslice_value_t f20a_1, f20b_1, f20b_2, f20a_2, f20b_3; | |
379 | f20a_2 = f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value); | |
380 | f20b_3 = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value); | |
381 | if (ks_idx > KEYSTREAM_SIZE - 8) { | |
382 | f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value); | |
383 | f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value); | |
384 | f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value); | |
385 | crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2; | |
386 | crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx] = f20b_3; | |
387 | } else if (ks_idx > KEYSTREAM_SIZE - 16) { | |
388 | f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value); | |
389 | f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8]; | |
390 | f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value); | |
391 | crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2; | |
392 | } else if (ks_idx > KEYSTREAM_SIZE - 24){ | |
393 | f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value); | |
394 | f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8]; | |
395 | f20b_2 = crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx - 16]; | |
396 | } else { | |
397 | f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value); | |
398 | f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value); | |
399 | f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value); | |
400 | } | |
401 | // update keystream bit | |
402 | ks_bits = f20c(f20a_1, f20b_1, f20b_2, f20a_2, f20b_3); | |
403 | ||
404 | // for each completed byte: | |
405 | if ((ks_idx & 0x07) == 0) { | |
406 | // get encrypted parity bits | |
407 | const bitslice_value_t encrypted_parity_bit_vector = bitsliced_encrypted_parity_bits[tests][parity_bit_idx++].value; | |
408 | ||
409 | // decrypt parity bits | |
410 | const bitslice_value_t decrypted_parity_bit_vector = encrypted_parity_bit_vector ^ ks_bits; | |
411 | ||
412 | // compare actual parity bits with decrypted parity bits and take count in results vector | |
413 | results.value &= ~parity_bit_vector ^ decrypted_parity_bit_vector; | |
414 | ||
415 | // make sure we still have a match in our set | |
416 | // if(memcmp(&results, &bs_zeroes, sizeof(bitslice_t)) == 0){ | |
417 | ||
418 | // this is much faster on my gcc, because somehow a memcmp needlessly spills/fills all the xmm registers to/from the stack - ??? | |
419 | // the short-circuiting also helps | |
420 | if(results.bytes64[0] == 0 | |
421 | #if MAX_BITSLICES > 64 | |
422 | && results.bytes64[1] == 0 | |
423 | #endif | |
424 | #if MAX_BITSLICES > 128 | |
425 | && results.bytes64[2] == 0 | |
426 | && results.bytes64[3] == 0 | |
427 | #endif | |
428 | ) { | |
429 | #if defined (DEBUG_BRUTE_FORCE) | |
430 | if (elimination_step < MAX_ELIMINATION_STEP) { | |
431 | keys_eliminated[elimination_step] += MAX_BITSLICES; | |
432 | } | |
433 | #endif | |
434 | #ifdef DEBUG_KEY_ELIMINATION | |
435 | if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) { | |
436 | printf("Known target key eliminated in brute_force.\n"); | |
437 | printf("block_idx = %d/%d, nonce = %d/%d\n", block_idx, bitsliced_blocks, tests, nonces_to_bruteforce); | |
438 | } | |
439 | #endif | |
440 | goto stop_tests; | |
441 | } | |
442 | // prepare for next nonce byte | |
443 | #if defined (DEBUG_BRUTE_FORCE) | |
444 | elimination_step++; | |
445 | #endif | |
446 | parity_bit_vector = bs_zeroes.value; | |
447 | } | |
448 | // update feedback bit vector | |
449 | if (ks_idx != 0) { | |
450 | fb_bits = | |
451 | (state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^ | |
452 | state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^ | |
453 | state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^ | |
454 | state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^ | |
455 | state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^ | |
456 | state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value); | |
457 | } | |
458 | // remember feedback and keystream vectors for later use | |
459 | uint8_t bit = KEYSTREAM_SIZE - ks_idx; | |
460 | if (bit <= next_common_bits) { // if needed and not yet stored | |
461 | fbb[bit] = fb_bits; | |
462 | ksb[bit] = ks_bits; | |
463 | par[bit] = parity_bit_vector; | |
464 | } | |
465 | } | |
466 | // prepare for next nonce. Revert to initial state | |
467 | state_p = &states[KEYSTREAM_SIZE]; | |
468 | } | |
469 | ||
470 | // all nonce tests were successful: we've found a possible key in this block! | |
471 | uint32_t *p_even_test = p_even; | |
472 | for (uint32_t results_word = 0; results_word < MAX_BITSLICES / 64; ++results_word) { | |
473 | uint64_t results64 = results.bytes64[results_word]; | |
474 | for (uint32_t results_bit = 0; results_bit < 64; results_bit++) { | |
475 | if (results64 & 0x01) { | |
476 | if (verify_key(cuid, nonces, best_first_bytes, *p_odd, *p_even_test)) { | |
477 | struct Crypto1State pcs; | |
478 | pcs.odd = *p_odd; | |
479 | pcs.even = *p_even_test; | |
480 | lfsr_rollback_byte(&pcs, (cuid >> 24) ^ best_first_bytes[0], true); | |
481 | crypto1_get_lfsr(&pcs, &key); | |
482 | bucket_states_tested += 64 * results_word + results_bit; | |
483 | goto out; | |
484 | } | |
485 | #ifdef DEBUG_KEY_ELIMINATION | |
486 | if (known_target_key != -1 && *p_even_test == test_state[EVEN_STATE] && *p_odd == test_state[ODD_STATE]) { | |
487 | printf("Known target key eliminated in brute_force verification.\n"); | |
488 | printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks); | |
489 | } | |
490 | #endif | |
491 | } | |
492 | #ifdef DEBUG_KEY_ELIMINATION | |
493 | if (known_target_key != -1 && *p_even_test == test_state[EVEN_STATE] && *p_odd == test_state[ODD_STATE]) { | |
494 | printf("Known target key eliminated in brute_force (results_bit == 0).\n"); | |
495 | printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks); | |
496 | } | |
497 | #endif | |
498 | results64 >>= 1; | |
499 | p_even_test++; | |
500 | if (p_even_test == p_even_end) { | |
501 | goto stop_tests; | |
502 | } | |
503 | } | |
504 | } | |
505 | stop_tests: | |
506 | #if defined (DEBUG_BRUTE_FORCE) | |
507 | elimination_step = 0; | |
508 | #endif | |
509 | bucket_states_tested += bucket_size[block_idx]; | |
510 | // prepare to set new states | |
511 | state_p = &states[KEYSTREAM_SIZE]; | |
512 | continue; | |
513 | } | |
514 | } | |
515 | out: | |
516 | for(uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){ | |
517 | free_bitslice(bitsliced_even_states[block_idx]); | |
518 | } | |
519 | free(bitsliced_even_states); | |
520 | free_bitslice(bitsliced_even_feedback); | |
521 | __sync_fetch_and_add(num_keys_tested, bucket_states_tested); | |
522 | ||
523 | #if defined (DEBUG_BRUTE_FORCE) | |
524 | for (uint32_t i = 0; i < MAX_ELIMINATION_STEP; i++) { | |
525 | printf("Eliminated after %2u test_bytes: %5.2f%%\n", i+1, (float)keys_eliminated[i] / bucket_states_tested * 100); | |
526 | } | |
527 | #endif | |
528 | return key; | |
529 | } | |
530 | ||
531 | ||
532 | ||
533 | #ifndef __MMX__ | |
534 | ||
535 | // pointers to functions: | |
536 | crack_states_bitsliced_t *crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch; | |
537 | bitslice_test_nonces_t *bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch; | |
538 | ||
539 | // determine the available instruction set at runtime and call the correct function | |
540 | const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) { | |
541 | #if defined (__i386__) || defined (__x86_64__) | |
542 | #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2) | |
543 | if (__builtin_cpu_supports("avx512f")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512; | |
544 | else if (__builtin_cpu_supports("avx2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2; | |
545 | #else | |
546 | if (__builtin_cpu_supports("avx2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2; | |
547 | #endif | |
548 | else if (__builtin_cpu_supports("avx")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX; | |
549 | else if (__builtin_cpu_supports("sse2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_SSE2; | |
550 | else if (__builtin_cpu_supports("mmx")) crack_states_bitsliced_function_p = &crack_states_bitsliced_MMX; | |
551 | else | |
552 | #endif | |
553 | crack_states_bitsliced_function_p = &crack_states_bitsliced_NOSIMD; | |
554 | ||
555 | // call the most optimized function for this CPU | |
556 | return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces); | |
557 | } | |
558 | ||
559 | void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) { | |
560 | #if defined (__i386__) || defined (__x86_64__) | |
561 | #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2) | |
562 | if (__builtin_cpu_supports("avx512f")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512; | |
563 | else if (__builtin_cpu_supports("avx2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2; | |
564 | #else | |
565 | if (__builtin_cpu_supports("avx2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2; | |
566 | #endif | |
567 | else if (__builtin_cpu_supports("avx")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX; | |
568 | else if (__builtin_cpu_supports("sse2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_SSE2; | |
569 | else if (__builtin_cpu_supports("mmx")) bitslice_test_nonces_function_p = &bitslice_test_nonces_MMX; | |
570 | else | |
571 | #endif | |
572 | bitslice_test_nonces_function_p = &bitslice_test_nonces_NOSIMD; | |
573 | ||
574 | // call the most optimized function for this CPU | |
575 | (*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par); | |
576 | } | |
577 | ||
578 | // Entries to dispatched function calls | |
579 | const uint64_t crack_states_bitsliced(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) { | |
580 | return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces); | |
581 | } | |
582 | ||
583 | void bitslice_test_nonces(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) { | |
584 | (*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par); | |
585 | } | |
586 | ||
587 | #endif |