From: iceman1001 Date: Thu, 21 Apr 2016 08:26:00 +0000 (+0200) Subject: ADD: added @azcid 's bitsliced BF solver for @piwi 's hardnested command. Awsume... X-Git-Url: http://cvs.zerfleddert.de/cgi-bin/gitweb.cgi/proxmark3-svn/commitdiff_plain/3130ba4b2168f8e8dd66f214d1eecdca65708d53 ADD: added @azcid 's bitsliced BF solver for @piwi 's hardnested command. Awsume work! The original patch demanded some tweaking to work in mingw. This is not tested for other systems so far. --- diff --git a/client/Makefile b/client/Makefile index d4def17e..5b947959 100644 --- a/client/Makefile +++ b/client/Makefile @@ -8,13 +8,14 @@ include ../common/Makefile.common CC = gcc CXX = g++ #COMMON_FLAGS = -m32 -VPATH = ../common ../zlib +COMMON_FLAGS = -std=c99 -O3 -mpopcnt -march=native +#VPATH = ../common ../zlib OBJDIR = obj LDLIBS = -L/opt/local/lib -L/usr/local/lib -lreadline -lpthread -lm LUALIB = ../liblua/liblua.a -LDFLAGS = $(COMMON_FLAGS) -CFLAGS = -std=c99 -I. -I../include -I../common -I../zlib -I/opt/local/include -I../liblua -Wall $(COMMON_FLAGS) -g -O3 +#LDFLAGS = $(COMMON_FLAGS) +CFLAGS = $(COMMON_FLAGS) -I. -I../include -I../common -I../zlib -I/opt/local/include -I../liblua -Wall -g LUAPLATFORM = generic ifneq (,$(findstring MINGW,$(platform))) @@ -69,17 +70,18 @@ CORESRCS = uart.c \ sleep.c -CMDSRCS = nonce2key/crapto1.c\ - nonce2key/crypto1.c\ - nonce2key/nonce2key.c\ +CMDSRCS = nonce2key/crapto1.c \ + nonce2key/crypto1.c \ + nonce2key/nonce2key.c \ + nonce2key/crypto1_bs.c \ loclass/cipher.c \ loclass/cipherutils.c \ loclass/des.c \ loclass/ikeys.c \ - loclass/elite_crack.c\ - loclass/fileutils.c\ - mifarehost.c\ - parity.c\ + loclass/elite_crack.c \ + loclass/fileutils.c \ + mifarehost.c \ + parity.c \ crc.c \ crc16.c \ crc64.c \ @@ -113,30 +115,30 @@ CMDSRCS = nonce2key/crapto1.c\ cmdparser.c \ cmdmain.c \ cmdlft55xx.c \ - cmdlfpcf7931.c\ - cmdlfviking.c\ - cmdlfpresco.c\ - cmdlfpyramid.c\ - cmdlfguard.c\ - pm3_binlib.c\ - scripting.c\ - cmdscript.c\ - pm3_bitlib.c\ - aes.c\ - protocols.c\ - sha1.c\ - sha256.c\ - cmdcrc.c\ - reveng/preset.c\ - reveng/reveng.c\ - reveng/cli.c\ - reveng/bmpbit.c\ - reveng/model.c\ - reveng/poly.c\ - reveng/getopt.c\ - tea.c\ - prng.c\ - radixsort.c\ + cmdlfpcf7931.c \ + cmdlfviking.c \ + cmdlfpresco.c \ + cmdlfpyramid.c \ + cmdlfguard.c \ + pm3_binlib.c \ + scripting.c \ + cmdscript.c \ + pm3_bitlib.c \ + aes.c \ + protocols.c \ + sha1.c \ + sha256.c \ + cmdcrc.c \ + reveng/preset.c \ + reveng/reveng.c \ + reveng/cli.c \ + reveng/bmpbit.c \ + reveng/model.c \ + reveng/poly.c \ + reveng/getopt.c \ + tea.c \ + prng.c \ + radixsort.c \ bucketsort.c ZLIBSRCS = deflate.c adler32.c trees.c zutil.c inflate.c inffast.c inftrees.c ZLIB_FLAGS = -DZ_SOLO -DZ_PREFIX -DNO_GZIP -DZLIB_PM3_TUNED @@ -147,13 +149,13 @@ CMDOBJS = $(CMDSRCS:%.c=$(OBJDIR)/%.o) ZLIBOBJS = $(ZLIBSRCS:%.c=$(OBJDIR)/%.o) RM = rm -f -BINS = proxmark3 flasher fpga_compress #snooper cli +BINS = proxmark3 flasher fpga_compress CLEAN = cli cli.exe flasher flasher.exe proxmark3 proxmark3.exe fpga_compress fpga_compress.exe snooper snooper.exe $(CMDOBJS) $(OBJDIR)/*.o *.o *.moc.cpp all: lua_build $(BINS) all-static: LDLIBS:=-static $(LDLIBS) -all-static: snooper cli flasher fpga_compress +all-static: $(BINS) proxmark3: LDLIBS+=$(LUALIB) $(QTLDLIBS) proxmark3: $(OBJDIR)/proxmark3.o $(COREOBJS) $(CMDOBJS) $(QTGUI) diff --git a/client/cmdhfmfhard.c b/client/cmdhfmfhard.c index eac783ff..5b9a6494 100644 --- a/client/cmdhfmfhard.c +++ b/client/cmdhfmfhard.c @@ -1,6 +1,6 @@ //----------------------------------------------------------------------------- // Copyright (C) 2015 piwi -// +// fiddled with 2016 Azcid (hardnested bitsliced Bruteforce imp) // This code is licensed to you under the terms of the GNU GPL, version 2 or, // at your option, any later version. See the LICENSE.txt file for the text of // the license. @@ -14,8 +14,8 @@ // Computer and Communications Security, 2015 //----------------------------------------------------------------------------- -#include #include +#include #include #include #include @@ -25,14 +25,19 @@ #include "ui.h" #include "util.h" #include "nonce2key/crapto1.h" +#include "nonce2key/crypto1_bs.h" #include "parity.h" +#ifdef __WIN32 + #include +#endif +#include +#include // uint32_t test_state_odd = 0; // uint32_t test_state_even = 0; #define CONFIDENCE_THRESHOLD 0.95 // Collect nonces until we are certain enough that the following brute force is successfull -#define GOOD_BYTES_REQUIRED 30 - +#define GOOD_BYTES_REQUIRED 28 static const float p_K[257] = { // the probability that a random nonce has a Sum Property == K 0.0290, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, @@ -88,6 +93,8 @@ typedef struct noncelist { } noncelist_t; +static size_t nonces_to_bruteforce = 0; +static noncelistentry_t *brute_force_nonces[256]; static uint32_t cuid = 0; static noncelist_t nonces[256]; static uint8_t best_first_bytes[256]; @@ -169,6 +176,11 @@ static int add_nonce(uint32_t nonce_enc, uint8_t par_enc) p2->nonce_enc = nonce_enc; p2->par_enc = par_enc; + if(nonces_to_bruteforce < 256){ + brute_force_nonces[nonces_to_bruteforce] = p2; + nonces_to_bruteforce++; + } + nonces[first_byte].num++; nonces[first_byte].Sum += evenparity32((nonce_enc & 0x00ff0000) | (par_enc & 0x04)); nonces[first_byte].updated = true; // indicates that we need to recalculate the Sum(a8) probability for this first byte @@ -176,7 +188,6 @@ static int add_nonce(uint32_t nonce_enc, uint8_t par_enc) return (1); // new nonce added } - static void init_nonce_memory(void) { for (uint16_t i = 0; i < 256; i++) { @@ -203,7 +214,6 @@ static void free_nonce_list(noncelistentry_t *p) } } - static void free_nonces_memory(void) { for (uint16_t i = 0; i < 256; i++) { @@ -211,7 +221,6 @@ static void free_nonces_memory(void) } } - static uint16_t PartialSumProperty(uint32_t state, odd_even_t odd_even) { uint16_t sum = 0; @@ -235,7 +244,6 @@ static uint16_t PartialSumProperty(uint32_t state, odd_even_t odd_even) return sum; } - // static uint16_t SumProperty(struct Crypto1State *s) // { // uint16_t sum_odd = PartialSumProperty(s->odd, ODD_STATE); @@ -243,7 +251,6 @@ static uint16_t PartialSumProperty(uint32_t state, odd_even_t odd_even) // return (sum_odd*(16-sum_even) + (16-sum_odd)*sum_even); // } - static double p_hypergeometric(uint16_t N, uint16_t K, uint16_t n, uint16_t k) { // for efficient computation we are using the recursive definition @@ -281,8 +288,7 @@ static double p_hypergeometric(uint16_t N, uint16_t K, uint16_t n, uint16_t k) } } } - - + static float sum_probability(uint16_t K, uint16_t n, uint16_t k) { const uint16_t N = 256; @@ -300,8 +306,6 @@ static float sum_probability(uint16_t K, uint16_t n, uint16_t k) return(p_T_is_k_when_S_is_K * p_S_is_K / p_T_is_k); } - - static inline uint_fast8_t common_bits(uint_fast8_t bytes_diff) { @@ -327,7 +331,6 @@ static inline uint_fast8_t common_bits(uint_fast8_t bytes_diff) return common_bits_LUT[bytes_diff]; } - static void Tests() { // printf("Tests: Partial Statelist sizes\n"); @@ -490,7 +493,6 @@ static void Tests() } - static void sort_best_first_bytes(void) { // sort based on probability for correct guess @@ -576,7 +578,6 @@ static void sort_best_first_bytes(void) } - static uint16_t estimate_second_byte_sum(void) { @@ -609,7 +610,6 @@ static uint16_t estimate_second_byte_sum(void) return num_good_nonces; } - static int read_nonce_file(void) { FILE *fnonces = NULL; @@ -652,7 +652,6 @@ static int read_nonce_file(void) return 0; } - static void Check_for_FilterFlipProperties(void) { printf("Checking for Filter Flip Properties...\n"); @@ -683,12 +682,9 @@ static void Check_for_FilterFlipProperties(void) } } - static void simulate_MFplus_RNG(uint32_t test_cuid, uint64_t test_key, uint32_t *nt_enc, uint8_t *par_enc) { struct Crypto1State sim_cs = {0, 0}; -// sim_cs.odd = sim_cs.even = 0; - // init cryptostate with key: for(int8_t i = 47; i > 0; i -= 2) { sim_cs.odd = sim_cs.odd << 1 | BIT(test_key, (i - 1) ^ 7); @@ -708,7 +704,6 @@ static void simulate_MFplus_RNG(uint32_t test_cuid, uint64_t test_key, uint32_t } - static void simulate_acquire_nonces() { clock_t time1 = clock(); @@ -762,7 +757,6 @@ static void simulate_acquire_nonces() } - static int acquire_nonces(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBlockNo, uint8_t trgKeyType, bool nonce_file_write, bool slow) { clock_t time1 = clock(); @@ -890,7 +884,6 @@ static int acquire_nonces(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_ return 0; } - static int init_partial_statelists(void) { const uint32_t sizes_odd[17] = { 126757, 0, 18387, 0, 74241, 0, 181737, 0, 248801, 0, 182033, 0, 73421, 0, 17607, 0, 125601 }; @@ -940,7 +933,6 @@ static int init_partial_statelists(void) return 0; } - static void init_BitFlip_statelist(void) { @@ -964,7 +956,6 @@ static void init_BitFlip_statelist(void) *p = 0xffffffff; statelist_bitflip.states[0] = realloc(statelist_bitflip.states[0], sizeof(uint32_t) * (statelist_bitflip.len[0] + 1)); } - static inline uint32_t *find_first_state(uint32_t state, uint32_t mask, partial_indexed_statelist_t *sl, odd_even_t odd_even) { @@ -977,7 +968,6 @@ static inline uint32_t *find_first_state(uint32_t state, uint32_t mask, partial_ return NULL; // no match } - static inline bool /*__attribute__((always_inline))*/ invariant_holds(uint_fast8_t byte_diff, uint_fast32_t state1, uint_fast32_t state2, uint_fast8_t bit, uint_fast8_t state_bit) { uint_fast8_t j_1_bit_mask = 0x01 << (bit-1); @@ -989,7 +979,6 @@ static inline bool /*__attribute__((always_inline))*/ invariant_holds(uint_fast8 return !all_diff; } - static inline bool /*__attribute__((always_inline))*/ invalid_state(uint_fast8_t byte_diff, uint_fast32_t state1, uint_fast32_t state2, uint_fast8_t bit, uint_fast8_t state_bit) { uint_fast8_t j_bit_mask = 0x01 << bit; @@ -1000,7 +989,6 @@ static inline bool /*__attribute__((always_inline))*/ invalid_state(uint_fast8_t return all_diff; } - static inline bool remaining_bits_match(uint_fast8_t num_common_bits, uint_fast8_t byte_diff, uint_fast32_t state1, uint_fast32_t state2, odd_even_t odd_even) { if (odd_even) { @@ -1031,7 +1019,6 @@ static inline bool remaining_bits_match(uint_fast8_t num_common_bits, uint_fast8 return true; // valid state } - static bool all_other_first_bytes_match(uint32_t state, odd_even_t odd_even) { for (uint16_t i = 1; i < num_good_first_bytes; i++) { @@ -1095,7 +1082,6 @@ static bool all_other_first_bytes_match(uint32_t state, odd_even_t odd_even) return true; } - static bool all_bit_flips_match(uint32_t state, odd_even_t odd_even) { for (uint16_t i = 0; i < 256; i++) { @@ -1152,13 +1138,11 @@ static bool all_bit_flips_match(uint32_t state, odd_even_t odd_even) return true; } - static struct sl_cache_entry { uint32_t *sl; uint32_t len; } sl_cache[17][17][2]; - static void init_statelist_cache(void) { for (uint16_t i = 0; i < 17; i+=2) { @@ -1171,7 +1155,6 @@ static void init_statelist_cache(void) } } - static int add_matching_states(statelist_t *candidates, uint16_t part_sum_a0, uint16_t part_sum_a8, odd_even_t odd_even) { uint32_t worstcase_size = 1<<20; @@ -1219,7 +1202,6 @@ static int add_matching_states(statelist_t *candidates, uint16_t part_sum_a0, ui return 0; } - static statelist_t *add_more_candidates(statelist_t *current_candidates) { statelist_t *new_candidates = NULL; @@ -1239,7 +1221,6 @@ static statelist_t *add_more_candidates(statelist_t *current_candidates) return new_candidates; } - static void TestIfKeyExists(uint64_t key) { struct Crypto1State *pcs; @@ -1290,7 +1271,6 @@ static void TestIfKeyExists(uint64_t key) crypto1_destroy(pcs); } - static void generate_candidates(uint16_t sum_a0, uint16_t sum_a8) { printf("Generating crypto1 state candidates... \n"); @@ -1364,7 +1344,6 @@ static void generate_candidates(uint16_t sum_a0, uint16_t sum_a8) } } - static void free_candidates_memory(statelist_t *sl) { if (sl == NULL) { @@ -1375,7 +1354,6 @@ static void free_candidates_memory(statelist_t *sl) } } - static void free_statelist_cache(void) { for (uint16_t i = 0; i < 17; i+=2) { @@ -1387,19 +1365,332 @@ static void free_statelist_cache(void) } } +size_t keys_found = 0; +size_t bucket_count = 0; +statelist_t* buckets[128]; +size_t total_states_tested = 0; +size_t thread_count = 4; + +// these bitsliced states will hold identical states in all slices +bitslice_t bitsliced_rollback_byte[ROLLBACK_SIZE]; + +// arrays of bitsliced states with identical values in all slices +bitslice_t bitsliced_encrypted_nonces[NONCE_TESTS][STATE_SIZE]; +bitslice_t bitsliced_encrypted_parity_bits[NONCE_TESTS][ROLLBACK_SIZE]; + +#define EXACT_COUNT + +static const uint64_t crack_states_bitsliced(statelist_t *p){ + // the idea to roll back the half-states before combining them was suggested/explained to me by bla + // first we pre-bitslice all the even state bits and roll them back, then bitslice the odd bits and combine the two in the inner loop + uint64_t key = -1; + uint8_t bSize = sizeof(bitslice_t); + +#ifdef EXACT_COUNT + size_t bucket_states_tested = 0; + size_t bucket_size[p->len[EVEN_STATE]/MAX_BITSLICES]; +#else + const size_t bucket_states_tested = (p->len[EVEN_STATE])*(p->len[ODD_STATE]); +#endif + + bitslice_t *bitsliced_even_states[p->len[EVEN_STATE]/MAX_BITSLICES]; + size_t bitsliced_blocks = 0; + uint32_t const * restrict even_end = p->states[EVEN_STATE]+p->len[EVEN_STATE]; + + // bitslice all the even states + for(uint32_t * restrict p_even = p->states[EVEN_STATE]; p_even < even_end; p_even += MAX_BITSLICES){ + +#ifdef __WIN32 + #ifdef __MINGW32__ + bitslice_t * restrict lstate_p = __mingw_aligned_malloc((STATE_SIZE+ROLLBACK_SIZE) * bSize, bSize); + #else + bitslice_t * restrict lstate_p = _aligned_malloc((STATE_SIZE+ROLLBACK_SIZE) * bSize, bSize); + #endif +#else + bitslice_t * restrict lstate_p = memalign(bSize, (STATE_SIZE+ROLLBACK_SIZE) * bSize); +#endif + + if ( !lstate_p ) { + __sync_fetch_and_add(&total_states_tested, bucket_states_tested); + return key; + } + + memset(lstate_p+1, 0x0, (STATE_SIZE-1)*sizeof(bitslice_t)); // zero even bits + + // bitslice even half-states + const size_t max_slices = (even_end-p_even) < MAX_BITSLICES ? even_end-p_even : MAX_BITSLICES; +#ifdef EXACT_COUNT + bucket_size[bitsliced_blocks] = max_slices; +#endif + for(size_t slice_idx = 0; slice_idx < max_slices; ++slice_idx){ + uint32_t e = *(p_even+slice_idx); + for(size_t bit_idx = 1; bit_idx < STATE_SIZE; bit_idx+=2, e >>= 1){ + // set even bits + if(e&1){ + lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx&63); + } + } + } + // compute the rollback bits + for(size_t rollback = 0; rollback < ROLLBACK_SIZE; ++rollback){ + // inlined crypto1_bs_lfsr_rollback + const bitslice_value_t feedout = lstate_p[0].value; + ++lstate_p; + const bitslice_value_t ks_bits = crypto1_bs_f20(lstate_p); + const bitslice_value_t feedback = (feedout ^ ks_bits ^ lstate_p[47- 5].value ^ lstate_p[47- 9].value ^ + lstate_p[47-10].value ^ lstate_p[47-12].value ^ lstate_p[47-14].value ^ + lstate_p[47-15].value ^ lstate_p[47-17].value ^ lstate_p[47-19].value ^ + lstate_p[47-24].value ^ lstate_p[47-25].value ^ lstate_p[47-27].value ^ + lstate_p[47-29].value ^ lstate_p[47-35].value ^ lstate_p[47-39].value ^ + lstate_p[47-41].value ^ lstate_p[47-42].value ^ lstate_p[47-43].value); + lstate_p[47].value = feedback ^ bitsliced_rollback_byte[rollback].value; + } + bitsliced_even_states[bitsliced_blocks++] = lstate_p; + } + + // bitslice every odd state to every block of even half-states with half-finished rollback + for(uint32_t const * restrict p_odd = p->states[ODD_STATE]; p_odd < p->states[ODD_STATE]+p->len[ODD_STATE]; ++p_odd){ + // early abort + if(keys_found){ + goto out; + } + + // set the odd bits and compute rollback + uint64_t o = (uint64_t) *p_odd; + lfsr_rollback_byte((struct Crypto1State*) &o, 0, 1); + // pre-compute part of the odd feedback bits (minus rollback) + bool odd_feedback_bit = parity(o&0x9ce5c); + + crypto1_bs_rewind_a0(); + // set odd bits + for(size_t state_idx = 0; state_idx < STATE_SIZE-ROLLBACK_SIZE; o >>= 1, state_idx+=2){ + if(o & 1){ + state_p[state_idx] = bs_ones; + } else { + state_p[state_idx] = bs_zeroes; + } + } + const bitslice_value_t odd_feedback = odd_feedback_bit ? bs_ones.value : bs_zeroes.value; + + for(size_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){ + const bitslice_t const * restrict bitsliced_even_state = bitsliced_even_states[block_idx]; + size_t state_idx; + // set even bits + for(state_idx = 0; state_idx < STATE_SIZE-ROLLBACK_SIZE; state_idx+=2){ + state_p[1+state_idx] = bitsliced_even_state[1+state_idx]; + } + // set rollback bits + uint64_t lo = o; + for(; state_idx < STATE_SIZE; lo >>= 1, state_idx+=2){ + // set the odd bits and take in the odd rollback bits from the even states + if(lo & 1){ + state_p[state_idx].value = ~bitsliced_even_state[state_idx].value; + } else { + state_p[state_idx] = bitsliced_even_state[state_idx]; + } + + // set the even bits and take in the even rollback bits from the odd states + if((lo >> 32) & 1){ + state_p[1+state_idx].value = ~bitsliced_even_state[1+state_idx].value; + } else { + state_p[1+state_idx] = bitsliced_even_state[1+state_idx]; + } + } + +#ifdef EXACT_COUNT + bucket_states_tested += bucket_size[block_idx]; +#endif + // pre-compute first keystream and feedback bit vectors + const bitslice_value_t ksb = crypto1_bs_f20(state_p); + const bitslice_value_t fbb = (odd_feedback ^ state_p[47- 0].value ^ state_p[47- 5].value ^ // take in the even and rollback bits + state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^ + state_p[47-24].value ^ state_p[47-42].value); + + // vector to contain test results (1 = passed, 0 = failed) + bitslice_t results = bs_ones; + + for(size_t tests = 0; tests < NONCE_TESTS; ++tests){ + size_t parity_bit_idx = 0; + bitslice_value_t fb_bits = fbb; + bitslice_value_t ks_bits = ksb; + state_p = &states[KEYSTREAM_SIZE-1]; + bitslice_value_t parity_bit_vector = bs_zeroes.value; + + // highest bit is transmitted/received first + for(int32_t ks_idx = KEYSTREAM_SIZE-1; ks_idx >= 0; --ks_idx, --state_p){ + // decrypt nonce bits + const bitslice_value_t encrypted_nonce_bit_vector = bitsliced_encrypted_nonces[tests][ks_idx].value; + const bitslice_value_t decrypted_nonce_bit_vector = (encrypted_nonce_bit_vector ^ ks_bits); + + // compute real parity bits on the fly + parity_bit_vector ^= decrypted_nonce_bit_vector; + + // update state + state_p[0].value = (fb_bits ^ decrypted_nonce_bit_vector); + + // compute next keystream bit + ks_bits = crypto1_bs_f20(state_p); + + // for each byte: + if((ks_idx&7) == 0){ + // get encrypted parity bits + const bitslice_value_t encrypted_parity_bit_vector = bitsliced_encrypted_parity_bits[tests][parity_bit_idx++].value; + + // decrypt parity bits + const bitslice_value_t decrypted_parity_bit_vector = (encrypted_parity_bit_vector ^ ks_bits); + + // compare actual parity bits with decrypted parity bits and take count in results vector + results.value &= (parity_bit_vector ^ decrypted_parity_bit_vector); + + // make sure we still have a match in our set + // if(memcmp(&results, &bs_zeroes, sizeof(bitslice_t)) == 0){ + + // this is much faster on my gcc, because somehow a memcmp needlessly spills/fills all the xmm registers to/from the stack - ??? + // the short-circuiting also helps + if(results.bytes64[0] == 0 +#if MAX_BITSLICES > 64 + && results.bytes64[1] == 0 +#endif +#if MAX_BITSLICES > 128 + && results.bytes64[2] == 0 + && results.bytes64[3] == 0 +#endif + ){ + goto stop_tests; + } + // this is about as fast but less portable (requires -std=gnu99) + // asm goto ("ptest %1, %0\n\t" + // "jz %l2" :: "xm" (results.value), "xm" (bs_ones.value) : "cc" : stop_tests); + parity_bit_vector = bs_zeroes.value; + } + // compute next feedback bit vector + fb_bits = (state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^ + state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^ + state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^ + state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^ + state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^ + state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value); + } + } + // all nonce tests were successful: we've found the key in this block! + state_t keys[MAX_BITSLICES]; + crypto1_bs_convert_states(&states[KEYSTREAM_SIZE], keys); + for(size_t results_idx = 0; results_idx < MAX_BITSLICES; ++results_idx){ + if(get_vector_bit(results_idx, results)){ + key = keys[results_idx].value; + goto out; + } + } +stop_tests: + // prepare to set new states + crypto1_bs_rewind_a0(); + continue; + } + } + +out: + for(size_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){ + +#ifdef __WIN32 + #ifdef __MINGW32__ + __mingw_aligned_free(bitsliced_even_states[block_idx]-ROLLBACK_SIZE); + #else + _aligned_free(bitsliced_even_states[block_idx]-ROLLBACK_SIZE); + #endif +#else + memfree(bitsliced_even_states[block_idx]-ROLLBACK_SIZE); +#endif + + } + __sync_fetch_and_add(&total_states_tested, bucket_states_tested); + return key; +} +static void* crack_states_thread(void* x){ + const size_t thread_id = (size_t)x; + size_t current_bucket = thread_id; + while(current_bucket < bucket_count){ + statelist_t * bucket = buckets[current_bucket]; + if(bucket){ + const uint64_t key = crack_states_bitsliced(bucket); + if(key != -1){ + printf("\nFound key: %012"PRIx64"\n", key); + __sync_fetch_and_add(&keys_found, 1); + break; + } else if(keys_found){ + break; + } else { + printf("."); + fflush(stdout); + } + } + current_bucket += thread_count; + } + return NULL; +} +#define _USE_32BIT_TIME_T static void brute_force(void) { if (known_target_key != -1) { PrintAndLog("Looking for known target key in remaining key space..."); TestIfKeyExists(known_target_key); } else { - PrintAndLog("Brute Force phase is not implemented."); + PrintAndLog("Brute force phase starting."); + time_t start, end; + time(&start); + keys_found = 0; + + crypto1_bs_init(); + + PrintAndLog("Using %u-bit bitslices", MAX_BITSLICES); + PrintAndLog("Bitslicing best_first_byte^uid[3] (rollback byte): %02x...", best_first_bytes[0]^(cuid>>24)); + // convert to 32 bit little-endian + crypto1_bs_bitslice_value32(rev32((best_first_bytes[0]^(cuid>>24))), bitsliced_rollback_byte, 8); + + PrintAndLog("Bitslicing nonces..."); + for(size_t tests = 0; tests < NONCE_TESTS; tests++){ + uint32_t test_nonce = brute_force_nonces[tests]->nonce_enc; + uint8_t test_parity = brute_force_nonces[tests]->par_enc; + // pre-xor the uid into the decrypted nonces, and also pre-xor the cuid parity into the encrypted parity bits - otherwise an exta xor is required in the decryption routine + crypto1_bs_bitslice_value32(cuid^test_nonce, bitsliced_encrypted_nonces[tests], 32); + // convert to 32 bit little-endian + crypto1_bs_bitslice_value32(rev32( ~(test_parity ^ ~(parity(cuid>>24 & 0xff)<<3 | parity(cuid>>16 & 0xff)<<2 | parity(cuid>>8 & 0xff)<<1 | parity(cuid&0xff)))), bitsliced_encrypted_parity_bits[tests], 4); + } + total_states_tested = 0; + + // count number of states to go + bucket_count = 0; + for (statelist_t *p = candidates; p != NULL; p = p->next) { + buckets[bucket_count] = p; + bucket_count++; + } + +#ifndef __WIN32 + thread_count = sysconf(_SC_NPROCESSORS_CONF); +#endif /* _WIN32 */ + pthread_t threads[thread_count]; + + // enumerate states using all hardware threads, each thread handles one bucket + PrintAndLog("Starting %u cracking threads to search %u buckets containing a total of %"PRIu32" states...", thread_count, bucket_count, maximum_states); + + for(size_t i = 0; i < thread_count; i++){ + pthread_create(&threads[i], NULL, crack_states_thread, (void*) i); + } + for(size_t i = 0; i < thread_count; i++){ + pthread_join(threads[i], 0); + } + + time(&end); + unsigned long elapsed_time = difftime(end, start); + PrintAndLog("Tested %"PRIu32" states, found %u keys after %u seconds", total_states_tested, keys_found, elapsed_time); + if(!keys_found){ + assert(total_states_tested == maximum_states); + } + // reset this counter for the next call + nonces_to_bruteforce = 0; } - } - int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBlockNo, uint8_t trgKeyType, uint8_t *trgkey, bool nonce_file_read, bool nonce_file_write, bool slow, int tests) { // initialize Random number generator diff --git a/client/nonce2key/crypto1_bs.c b/client/nonce2key/crypto1_bs.c new file mode 100644 index 00000000..2bb1194d --- /dev/null +++ b/client/nonce2key/crypto1_bs.c @@ -0,0 +1,120 @@ +// Bit-sliced Crypto-1 implementation +// The cipher states are stored with the least significant bit first, hence all bit indexes are reversed here +/* +Copyright (c) 2015-2016 Aram Verstegen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "crypto1_bs.h" +#include +#define __STDC_FORMAT_MACROS +#define llx PRIx64 +#define lli PRIi64 +#define lu PRIu32 + +// The following functions use this global or thread-local state +// It is sized to fit exactly KEYSTREAM_SIZE more states next to the initial state +__thread bitslice_t states[KEYSTREAM_SIZE+STATE_SIZE]; +__thread bitslice_t * restrict state_p; + +void crypto1_bs_init(){ + // initialize constant one and zero bit vectors + memset(bs_ones.bytes, 0xff, VECTOR_SIZE); + memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE); +} + +// The following functions have side effects on 48 bitslices at the state_p pointer +// use the crypto1_bs_rewind_* macros to (re-)initialize them as needed + +inline const bitslice_value_t crypto1_bs_bit(const bitslice_value_t input, const bool is_encrypted){ + bitslice_value_t feedback = (state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^ + state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^ + state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^ + state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^ + state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^ + state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value); + const bitslice_value_t ks_bits = crypto1_bs_f20(state_p); + if(is_encrypted){ + feedback ^= ks_bits; + } + state_p--; + state_p[0].value = feedback ^ input; + return ks_bits; +} + +inline const bitslice_value_t crypto1_bs_lfsr_rollback(const bitslice_value_t input, const bool is_encrypted){ + bitslice_value_t feedout = state_p[0].value; + state_p++; + const bitslice_value_t ks_bits = crypto1_bs_f20(state_p); + if(is_encrypted){ + feedout ^= ks_bits; + } + const bitslice_value_t feedback = (feedout ^ state_p[47- 5].value ^ state_p[47- 9].value ^ + state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^ + state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^ + state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^ + state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^ + state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value); + state_p[47].value = feedback ^ input; + return ks_bits; +} + +// side-effect free from here on +// note that bytes are sliced and unsliced with reversed endianness +inline void crypto1_bs_convert_states(bitslice_t bitsliced_states[], state_t regular_states[]){ + size_t bit_idx = 0, slice_idx = 0; + state_t values[MAX_BITSLICES]; + for(slice_idx = 0; slice_idx < MAX_BITSLICES; slice_idx++){ + for(bit_idx = 0; bit_idx < STATE_SIZE; bit_idx++){ + bool bit = get_vector_bit(slice_idx, bitsliced_states[bit_idx]); + values[slice_idx].value <<= 1; + values[slice_idx].value |= bit; + } + // swap endianness + values[slice_idx].value = rev_state_t(values[slice_idx].value); + // roll off unused bits + values[slice_idx].value >>= ((sizeof(state_t)*8)-STATE_SIZE); + } + memcpy(regular_states, values, sizeof(values)); +} + +// bitslice a value +void crypto1_bs_bitslice_value32(uint32_t value, bitslice_t bitsliced_value[], size_t bit_len){ + // load nonce bytes with unswapped endianness + size_t bit_idx; + for(bit_idx = 0; bit_idx < bit_len; bit_idx++){ + bool bit = get_bit(bit_len-1-bit_idx, rev32(value)); + if(bit){ + bitsliced_value[bit_idx].value = bs_ones.value; + } else { + bitsliced_value[bit_idx].value = bs_zeroes.value; + } + } +} + +void crypto1_bs_print_states(bitslice_t bitsliced_states[]){ + size_t slice_idx = 0; + state_t values[MAX_BITSLICES]; + crypto1_bs_convert_states(bitsliced_states, values); + for(slice_idx = 0; slice_idx < MAX_BITSLICES; slice_idx++){ + printf("State %03zu: %012"llx"\n", slice_idx, values[slice_idx].value); + } +} + diff --git a/client/nonce2key/crypto1_bs.h b/client/nonce2key/crypto1_bs.h new file mode 100644 index 00000000..8f332749 --- /dev/null +++ b/client/nonce2key/crypto1_bs.h @@ -0,0 +1,99 @@ +#ifndef _CRYPTO1_BS_H +#define _CRYPTO1_BS_H +#include +#include +#include +#include +#include +#include + +// bitslice type +// while AVX supports 256 bit vector floating point operations, we need integer operations for boolean logic +// same for AVX2 and 512 bit vectors +// using larger vectors works but seems to generate more register pressure +#if defined(__AVX2__) +#define MAX_BITSLICES 256 +#elif defined(__AVX__) +#define MAX_BITSLICES 128 +#elif defined(__SSE2__) +#define MAX_BITSLICES 128 +#else +#define MAX_BITSLICES 64 +#endif + +#define VECTOR_SIZE (MAX_BITSLICES/8) +typedef unsigned int __attribute__((aligned(VECTOR_SIZE))) __attribute__((vector_size(VECTOR_SIZE))) bitslice_value_t; +typedef union { + bitslice_value_t value; + uint64_t bytes64[MAX_BITSLICES/64]; + uint8_t bytes[MAX_BITSLICES/8]; +} bitslice_t; + +// filter function (f20) +// sourced from ``Wirelessly Pickpocketing a Mifare Classic Card'' by Flavio Garcia, Peter van Rossum, Roel Verdult and Ronny Wichers Schreur +#define f20a(a,b,c,d) (((a|b)^(a&d))^(c&((a^b)|d))) +#define f20b(a,b,c,d) (((a&b)|c)^((a^b)&(c|d))) +#define f20c(a,b,c,d,e) ((a|((b|e)&(d^e)))^((a^(b&d))&((c^d)|(b&e)))) + +#define crypto1_bs_f20(s) \ +f20c(f20a((s[47- 9].value), (s[47-11].value), (s[47-13].value), (s[47-15].value)), \ + f20b((s[47-17].value), (s[47-19].value), (s[47-21].value), (s[47-23].value)), \ + f20b((s[47-25].value), (s[47-27].value), (s[47-29].value), (s[47-31].value)), \ + f20a((s[47-33].value), (s[47-35].value), (s[47-37].value), (s[47-39].value)), \ + f20b((s[47-41].value), (s[47-43].value), (s[47-45].value), (s[47-47].value))) + +// bit indexing +#define get_bit(n, word) ((word >> (n)) & 1) +#define get_vector_bit(slice, value) get_bit(slice&0x3f, value.bytes64[slice>>6]) + +// constant ones/zeroes +bitslice_t bs_ones; +bitslice_t bs_zeroes; + +// size of crypto-1 state +#define STATE_SIZE 48 +// size of nonce to be decrypted +#define KEYSTREAM_SIZE 32 +// size of first uid^nonce byte to be rolled back to the initial key +#define ROLLBACK_SIZE 8 +// number of nonces required to test to cover entire 48-bit state +// I would have said it's 12... but bla goes with 100, so I do too +#define NONCE_TESTS 100 + +// state pointer management +extern __thread bitslice_t states[KEYSTREAM_SIZE+STATE_SIZE]; +extern __thread bitslice_t * restrict state_p; + +// rewind to the point a0, at which KEYSTREAM_SIZE more bits can be generated +#define crypto1_bs_rewind_a0() (state_p = &states[KEYSTREAM_SIZE]) + +// bitsliced bytewise parity +#define bitsliced_byte_parity(n) (n[0].value ^ n[1].value ^ n[2].value ^ n[3].value ^ n[4].value ^ n[5].value ^ n[6].value ^ n[7].value) + +// 48-bit crypto-1 states are normally represented using 64-bit values +typedef union { + uint64_t value; + uint8_t bytes[8]; +} state_t; + +// endianness conversion +#define rev32(word) (((word & 0xff) << 24) | (((word >> 8) & 0xff) << 16) | (((word >> 16) & 0xff) << 8) | (((word >> 24) & 0xff))) +#define rev64(x) (rev32(x)<<32|(rev32((x>>32)))) +#define rev_state_t rev64 + +// crypto-1 functions +const bitslice_value_t crypto1_bs_bit(const bitslice_value_t input, const bool is_encrypted); +const bitslice_value_t crypto1_bs_lfsr_rollback(const bitslice_value_t input, const bool is_encrypted); + +// initialization functions +void crypto1_bs_init(); + +// conversion functions +void crypto1_bs_bitslice_value32(uint32_t value, bitslice_t bitsliced_value[], size_t bit_len); +void crypto1_bs_convert_states(bitslice_t bitsliced_states[], state_t regular_states[]); + +// debug print +void crypto1_bs_print_states(bitslice_t *bitsliced_states); + +#endif // _CRYPTO1_BS_H +