+ // bitslice even half-states
+ const size_t max_slices = (even_end-p_even) < MAX_BITSLICES ? even_end-p_even : MAX_BITSLICES;
+#ifdef EXACT_COUNT
+ bucket_size[bitsliced_blocks] = max_slices;
+#endif
+ for(size_t slice_idx = 0; slice_idx < max_slices; ++slice_idx){
+ uint32_t e = *(p_even+slice_idx);
+ for(size_t bit_idx = 1; bit_idx < STATE_SIZE; bit_idx+=2, e >>= 1){
+ // set even bits
+ if(e&1){
+ lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx&63);
+ }
+ }
+ }
+ // compute the rollback bits
+ for(size_t rollback = 0; rollback < ROLLBACK_SIZE; ++rollback){
+ // inlined crypto1_bs_lfsr_rollback
+ const bitslice_value_t feedout = lstate_p[0].value;
+ ++lstate_p;
+ const bitslice_value_t ks_bits = crypto1_bs_f20(lstate_p);
+ const bitslice_value_t feedback = (feedout ^ ks_bits ^ lstate_p[47- 5].value ^ lstate_p[47- 9].value ^
+ lstate_p[47-10].value ^ lstate_p[47-12].value ^ lstate_p[47-14].value ^
+ lstate_p[47-15].value ^ lstate_p[47-17].value ^ lstate_p[47-19].value ^
+ lstate_p[47-24].value ^ lstate_p[47-25].value ^ lstate_p[47-27].value ^
+ lstate_p[47-29].value ^ lstate_p[47-35].value ^ lstate_p[47-39].value ^
+ lstate_p[47-41].value ^ lstate_p[47-42].value ^ lstate_p[47-43].value);
+ lstate_p[47].value = feedback ^ bitsliced_rollback_byte[rollback].value;
+ }
+ bitsliced_even_states[bitsliced_blocks++] = lstate_p;
+ }
+
+ // bitslice every odd state to every block of even half-states with half-finished rollback
+ for(uint32_t const * restrict p_odd = p->states[ODD_STATE]; p_odd < p->states[ODD_STATE]+p->len[ODD_STATE]; ++p_odd){
+ // early abort
+ if(keys_found){
+ goto out;
+ }
+
+ // set the odd bits and compute rollback
+ uint64_t o = (uint64_t) *p_odd;
+ lfsr_rollback_byte((struct Crypto1State*) &o, 0, 1);
+ // pre-compute part of the odd feedback bits (minus rollback)
+ bool odd_feedback_bit = parity(o&0x9ce5c);
+
+ crypto1_bs_rewind_a0();
+ // set odd bits
+ for(size_t state_idx = 0; state_idx < STATE_SIZE-ROLLBACK_SIZE; o >>= 1, state_idx+=2){
+ if(o & 1){
+ state_p[state_idx] = bs_ones;
+ } else {
+ state_p[state_idx] = bs_zeroes;
+ }
+ }
+ const bitslice_value_t odd_feedback = odd_feedback_bit ? bs_ones.value : bs_zeroes.value;
+
+ for(size_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){
+ const bitslice_t const * restrict bitsliced_even_state = bitsliced_even_states[block_idx];
+ size_t state_idx;
+ // set even bits
+ for(state_idx = 0; state_idx < STATE_SIZE-ROLLBACK_SIZE; state_idx+=2){
+ state_p[1+state_idx] = bitsliced_even_state[1+state_idx];
+ }
+ // set rollback bits
+ uint64_t lo = o;
+ for(; state_idx < STATE_SIZE; lo >>= 1, state_idx+=2){
+ // set the odd bits and take in the odd rollback bits from the even states
+ if(lo & 1){
+ state_p[state_idx].value = ~bitsliced_even_state[state_idx].value;
+ } else {
+ state_p[state_idx] = bitsliced_even_state[state_idx];
+ }
+
+ // set the even bits and take in the even rollback bits from the odd states
+ if((lo >> 32) & 1){
+ state_p[1+state_idx].value = ~bitsliced_even_state[1+state_idx].value;
+ } else {
+ state_p[1+state_idx] = bitsliced_even_state[1+state_idx];
+ }
+ }
+
+#ifdef EXACT_COUNT
+ bucket_states_tested += bucket_size[block_idx];
+#endif
+ // pre-compute first keystream and feedback bit vectors
+ const bitslice_value_t ksb = crypto1_bs_f20(state_p);
+ const bitslice_value_t fbb = (odd_feedback ^ state_p[47- 0].value ^ state_p[47- 5].value ^ // take in the even and rollback bits
+ state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^
+ state_p[47-24].value ^ state_p[47-42].value);
+
+ // vector to contain test results (1 = passed, 0 = failed)
+ bitslice_t results = bs_ones;
+
+ for(size_t tests = 0; tests < NONCE_TESTS; ++tests){
+ size_t parity_bit_idx = 0;
+ bitslice_value_t fb_bits = fbb;
+ bitslice_value_t ks_bits = ksb;
+ state_p = &states[KEYSTREAM_SIZE-1];
+ bitslice_value_t parity_bit_vector = bs_zeroes.value;
+
+ // highest bit is transmitted/received first
+ for(int32_t ks_idx = KEYSTREAM_SIZE-1; ks_idx >= 0; --ks_idx, --state_p){
+ // decrypt nonce bits
+ const bitslice_value_t encrypted_nonce_bit_vector = bitsliced_encrypted_nonces[tests][ks_idx].value;
+ const bitslice_value_t decrypted_nonce_bit_vector = (encrypted_nonce_bit_vector ^ ks_bits);
+
+ // compute real parity bits on the fly
+ parity_bit_vector ^= decrypted_nonce_bit_vector;
+
+ // update state
+ state_p[0].value = (fb_bits ^ decrypted_nonce_bit_vector);
+
+ // compute next keystream bit
+ ks_bits = crypto1_bs_f20(state_p);
+
+ // for each byte:
+ if((ks_idx&7) == 0){
+ // get encrypted parity bits
+ const bitslice_value_t encrypted_parity_bit_vector = bitsliced_encrypted_parity_bits[tests][parity_bit_idx++].value;
+
+ // decrypt parity bits
+ const bitslice_value_t decrypted_parity_bit_vector = (encrypted_parity_bit_vector ^ ks_bits);
+
+ // compare actual parity bits with decrypted parity bits and take count in results vector
+ results.value &= (parity_bit_vector ^ decrypted_parity_bit_vector);
+
+ // make sure we still have a match in our set
+ // if(memcmp(&results, &bs_zeroes, sizeof(bitslice_t)) == 0){
+
+ // this is much faster on my gcc, because somehow a memcmp needlessly spills/fills all the xmm registers to/from the stack - ???
+ // the short-circuiting also helps
+ if(results.bytes64[0] == 0
+#if MAX_BITSLICES > 64
+ && results.bytes64[1] == 0
+#endif
+#if MAX_BITSLICES > 128
+ && results.bytes64[2] == 0
+ && results.bytes64[3] == 0
+#endif
+ ){
+ goto stop_tests;
+ }
+ // this is about as fast but less portable (requires -std=gnu99)
+ // asm goto ("ptest %1, %0\n\t"
+ // "jz %l2" :: "xm" (results.value), "xm" (bs_ones.value) : "cc" : stop_tests);
+ parity_bit_vector = bs_zeroes.value;
+ }
+ // compute next feedback bit vector
+ fb_bits = (state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^
+ state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^
+ state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^
+ state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^
+ state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^
+ state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value);
+ }
+ }
+ // all nonce tests were successful: we've found the key in this block!
+ state_t keys[MAX_BITSLICES];
+ crypto1_bs_convert_states(&states[KEYSTREAM_SIZE], keys);
+ for(size_t results_idx = 0; results_idx < MAX_BITSLICES; ++results_idx){
+ if(get_vector_bit(results_idx, results)){
+ key = keys[results_idx].value;
+ goto out;
+ }
+ }
+stop_tests:
+ // prepare to set new states
+ crypto1_bs_rewind_a0();
+ continue;
+ }
+ }
+
+out:
+ for(size_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){
+
+#ifdef __WIN32
+ #ifdef __MINGW32__
+ __mingw_aligned_free(bitsliced_even_states[block_idx]-ROLLBACK_SIZE);
+ #else
+ _aligned_free(bitsliced_even_states[block_idx]-ROLLBACK_SIZE);
+ #endif
+#else
+ free(bitsliced_even_states[block_idx]-ROLLBACK_SIZE);
+#endif
+
+ }
+ __sync_fetch_and_add(&total_states_tested, bucket_states_tested);
+ return key;
+}
+
+static void* crack_states_thread(void* x){
+ const size_t thread_id = (size_t)x;
+ size_t current_bucket = thread_id;
+ while(current_bucket < bucket_count){
+ statelist_t * bucket = buckets[current_bucket];
+ if(bucket){
+ const uint64_t key = crack_states_bitsliced(bucket);
+ if(key != -1){
+ __sync_fetch_and_add(&keys_found, 1);
+ __sync_fetch_and_add(&foundkey, key);
+ break;
+ } else if(keys_found){
+ break;
+ } else {
+ printf(".");
+ fflush(stdout);
+ }
+ }
+ current_bucket += thread_count;
+ }
+ return NULL;
+}
+
+static void brute_force(void)
+{
+ if (known_target_key != -1) {
+ PrintAndLog("Looking for known target key in remaining key space...");
+ TestIfKeyExists(known_target_key);
+ } else {
+ PrintAndLog("Brute force phase starting.");
+ time_t start, end;
+ time(&start);
+ keys_found = 0;
+
+ crypto1_bs_init();
+
+ PrintAndLog("Using %u-bit bitslices", MAX_BITSLICES);
+ PrintAndLog("Bitslicing best_first_byte^uid[3] (rollback byte): %02x...", best_first_bytes[0]^(cuid>>24));
+ // convert to 32 bit little-endian
+ crypto1_bs_bitslice_value32((best_first_bytes[0]<<24)^cuid, bitsliced_rollback_byte, 8);
+
+ PrintAndLog("Bitslicing nonces...");
+ for(size_t tests = 0; tests < NONCE_TESTS; tests++){
+ uint32_t test_nonce = brute_force_nonces[tests]->nonce_enc;
+ uint8_t test_parity = brute_force_nonces[tests]->par_enc;
+ // pre-xor the uid into the decrypted nonces, and also pre-xor the cuid parity into the encrypted parity bits - otherwise an exta xor is required in the decryption routine
+ crypto1_bs_bitslice_value32(cuid^test_nonce, bitsliced_encrypted_nonces[tests], 32);
+ // convert to 32 bit little-endian
+ crypto1_bs_bitslice_value32(rev32( ~(test_parity ^ ~(parity(cuid>>24 & 0xff)<<3 | parity(cuid>>16 & 0xff)<<2 | parity(cuid>>8 & 0xff)<<1 | parity(cuid&0xff)))), bitsliced_encrypted_parity_bits[tests], 4);
+ }
+ total_states_tested = 0;
+
+ // count number of states to go
+ bucket_count = 0;
+ for (statelist_t *p = candidates; p != NULL; p = p->next) {
+ buckets[bucket_count] = p;
+ bucket_count++;
+ }
+
+#ifndef __WIN32
+ thread_count = sysconf(_SC_NPROCESSORS_CONF);
+ if ( thread_count < 1)
+ thread_count = 1;
+#endif /* _WIN32 */
+
+ pthread_t threads[thread_count];
+
+ // enumerate states using all hardware threads, each thread handles one bucket
+ PrintAndLog("Starting %u cracking threads to search %u buckets containing a total of %"PRIu32" states...", thread_count, bucket_count, maximum_states);
+
+ for(size_t i = 0; i < thread_count; i++){
+ pthread_create(&threads[i], NULL, crack_states_thread, (void*) i);
+ }
+ for(size_t i = 0; i < thread_count; i++){
+ pthread_join(threads[i], 0);
+ }
+
+ time(&end);
+ unsigned long elapsed_time = difftime(end, start);
+ if(keys_found){
+ PrintAndLog("Success! Tested %"PRIu32" states, found %u keys after %u seconds", total_states_tested, keys_found, elapsed_time);
+ PrintAndLog("\nFound key: %012"PRIx64"\n", foundkey);
+ } else {
+ PrintAndLog("Fail! Tested %"PRIu32" states, in %u seconds", total_states_tested, elapsed_time);