//-----------------------------------------------------------------------------
// Copyright (C) 2015 piwi
-//
+// fiddled with 2016 Azcid (hardnested bitsliced Bruteforce imp)
// This code is licensed to you under the terms of the GNU GPL, version 2 or,
// at your option, any later version. See the LICENSE.txt file for the text of
// the license.
// Computer and Communications Security, 2015
//-----------------------------------------------------------------------------
-#include <stdio.h>
#include <stdlib.h>
+#include <stdio.h>
#include <string.h>
#include <pthread.h>
#include <locale.h>
#include "ui.h"
#include "util.h"
#include "nonce2key/crapto1.h"
+#include "nonce2key/crypto1_bs.h"
#include "parity.h"
+#ifdef __WIN32
+ #include <windows.h>
+#endif
+#include <malloc.h>
+#include <assert.h>
// uint32_t test_state_odd = 0;
// uint32_t test_state_even = 0;
#define CONFIDENCE_THRESHOLD 0.95 // Collect nonces until we are certain enough that the following brute force is successfull
-#define GOOD_BYTES_REQUIRED 30
-
+#define GOOD_BYTES_REQUIRED 28
static const float p_K[257] = { // the probability that a random nonce has a Sum Property == K
0.0290, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
} noncelist_t;
+static size_t nonces_to_bruteforce = 0;
+static noncelistentry_t *brute_force_nonces[256];
static uint32_t cuid = 0;
static noncelist_t nonces[256];
static uint8_t best_first_bytes[256];
p2->nonce_enc = nonce_enc;
p2->par_enc = par_enc;
+ if(nonces_to_bruteforce < 256){
+ brute_force_nonces[nonces_to_bruteforce] = p2;
+ nonces_to_bruteforce++;
+ }
+
nonces[first_byte].num++;
nonces[first_byte].Sum += evenparity32((nonce_enc & 0x00ff0000) | (par_enc & 0x04));
nonces[first_byte].updated = true; // indicates that we need to recalculate the Sum(a8) probability for this first byte
return (1); // new nonce added
}
-
static void init_nonce_memory(void)
{
for (uint16_t i = 0; i < 256; i++) {
}
}
-
static void free_nonces_memory(void)
{
for (uint16_t i = 0; i < 256; i++) {
}
}
-
static uint16_t PartialSumProperty(uint32_t state, odd_even_t odd_even)
{
uint16_t sum = 0;
return sum;
}
-
// static uint16_t SumProperty(struct Crypto1State *s)
// {
// uint16_t sum_odd = PartialSumProperty(s->odd, ODD_STATE);
// return (sum_odd*(16-sum_even) + (16-sum_odd)*sum_even);
// }
-
static double p_hypergeometric(uint16_t N, uint16_t K, uint16_t n, uint16_t k)
{
// for efficient computation we are using the recursive definition
}
}
}
-
-
+
static float sum_probability(uint16_t K, uint16_t n, uint16_t k)
{
const uint16_t N = 256;
return(p_T_is_k_when_S_is_K * p_S_is_K / p_T_is_k);
}
-
-
static inline uint_fast8_t common_bits(uint_fast8_t bytes_diff)
{
return common_bits_LUT[bytes_diff];
}
-
static void Tests()
{
// printf("Tests: Partial Statelist sizes\n");
}
-
static void sort_best_first_bytes(void)
{
// sort based on probability for correct guess
}
-
static uint16_t estimate_second_byte_sum(void)
{
return num_good_nonces;
}
-
static int read_nonce_file(void)
{
FILE *fnonces = NULL;
return 0;
}
-
static void Check_for_FilterFlipProperties(void)
{
printf("Checking for Filter Flip Properties...\n");
}
}
-
static void simulate_MFplus_RNG(uint32_t test_cuid, uint64_t test_key, uint32_t *nt_enc, uint8_t *par_enc)
{
struct Crypto1State sim_cs = {0, 0};
-// sim_cs.odd = sim_cs.even = 0;
-
// init cryptostate with key:
for(int8_t i = 47; i > 0; i -= 2) {
sim_cs.odd = sim_cs.odd << 1 | BIT(test_key, (i - 1) ^ 7);
}
-
static void simulate_acquire_nonces()
{
clock_t time1 = clock();
}
-
static int acquire_nonces(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBlockNo, uint8_t trgKeyType, bool nonce_file_write, bool slow)
{
clock_t time1 = clock();
return 0;
}
-
static int init_partial_statelists(void)
{
const uint32_t sizes_odd[17] = { 126757, 0, 18387, 0, 74241, 0, 181737, 0, 248801, 0, 182033, 0, 73421, 0, 17607, 0, 125601 };
return 0;
}
-
static void init_BitFlip_statelist(void)
{
*p = 0xffffffff;
statelist_bitflip.states[0] = realloc(statelist_bitflip.states[0], sizeof(uint32_t) * (statelist_bitflip.len[0] + 1));
}
-
static inline uint32_t *find_first_state(uint32_t state, uint32_t mask, partial_indexed_statelist_t *sl, odd_even_t odd_even)
{
return NULL; // no match
}
-
static inline bool /*__attribute__((always_inline))*/ invariant_holds(uint_fast8_t byte_diff, uint_fast32_t state1, uint_fast32_t state2, uint_fast8_t bit, uint_fast8_t state_bit)
{
uint_fast8_t j_1_bit_mask = 0x01 << (bit-1);
return !all_diff;
}
-
static inline bool /*__attribute__((always_inline))*/ invalid_state(uint_fast8_t byte_diff, uint_fast32_t state1, uint_fast32_t state2, uint_fast8_t bit, uint_fast8_t state_bit)
{
uint_fast8_t j_bit_mask = 0x01 << bit;
return all_diff;
}
-
static inline bool remaining_bits_match(uint_fast8_t num_common_bits, uint_fast8_t byte_diff, uint_fast32_t state1, uint_fast32_t state2, odd_even_t odd_even)
{
if (odd_even) {
return true; // valid state
}
-
static bool all_other_first_bytes_match(uint32_t state, odd_even_t odd_even)
{
for (uint16_t i = 1; i < num_good_first_bytes; i++) {
return true;
}
-
static bool all_bit_flips_match(uint32_t state, odd_even_t odd_even)
{
for (uint16_t i = 0; i < 256; i++) {
return true;
}
-
static struct sl_cache_entry {
uint32_t *sl;
uint32_t len;
} sl_cache[17][17][2];
-
static void init_statelist_cache(void)
{
for (uint16_t i = 0; i < 17; i+=2) {
}
}
-
static int add_matching_states(statelist_t *candidates, uint16_t part_sum_a0, uint16_t part_sum_a8, odd_even_t odd_even)
{
uint32_t worstcase_size = 1<<20;
return 0;
}
-
static statelist_t *add_more_candidates(statelist_t *current_candidates)
{
statelist_t *new_candidates = NULL;
return new_candidates;
}
-
static void TestIfKeyExists(uint64_t key)
{
struct Crypto1State *pcs;
crypto1_destroy(pcs);
}
-
static void generate_candidates(uint16_t sum_a0, uint16_t sum_a8)
{
printf("Generating crypto1 state candidates... \n");
}
}
-
static void free_candidates_memory(statelist_t *sl)
{
if (sl == NULL) {
}
}
-
static void free_statelist_cache(void)
{
for (uint16_t i = 0; i < 17; i+=2) {
}
}
+size_t keys_found = 0;
+size_t bucket_count = 0;
+statelist_t* buckets[128];
+size_t total_states_tested = 0;
+size_t thread_count = 4;
+
+// these bitsliced states will hold identical states in all slices
+bitslice_t bitsliced_rollback_byte[ROLLBACK_SIZE];
+
+// arrays of bitsliced states with identical values in all slices
+bitslice_t bitsliced_encrypted_nonces[NONCE_TESTS][STATE_SIZE];
+bitslice_t bitsliced_encrypted_parity_bits[NONCE_TESTS][ROLLBACK_SIZE];
+
+#define EXACT_COUNT
+
+static const uint64_t crack_states_bitsliced(statelist_t *p){
+ // the idea to roll back the half-states before combining them was suggested/explained to me by bla
+ // first we pre-bitslice all the even state bits and roll them back, then bitslice the odd bits and combine the two in the inner loop
+ uint64_t key = -1;
+ uint8_t bSize = sizeof(bitslice_t);
+
+#ifdef EXACT_COUNT
+ size_t bucket_states_tested = 0;
+ size_t bucket_size[p->len[EVEN_STATE]/MAX_BITSLICES];
+#else
+ const size_t bucket_states_tested = (p->len[EVEN_STATE])*(p->len[ODD_STATE]);
+#endif
+
+ bitslice_t *bitsliced_even_states[p->len[EVEN_STATE]/MAX_BITSLICES];
+ size_t bitsliced_blocks = 0;
+ uint32_t const * restrict even_end = p->states[EVEN_STATE]+p->len[EVEN_STATE];
+
+ // bitslice all the even states
+ for(uint32_t * restrict p_even = p->states[EVEN_STATE]; p_even < even_end; p_even += MAX_BITSLICES){
+
+#ifdef __WIN32
+ #ifdef __MINGW32__
+ bitslice_t * restrict lstate_p = __mingw_aligned_malloc((STATE_SIZE+ROLLBACK_SIZE) * bSize, bSize);
+ #else
+ bitslice_t * restrict lstate_p = _aligned_malloc((STATE_SIZE+ROLLBACK_SIZE) * bSize, bSize);
+ #endif
+#else
+ bitslice_t * restrict lstate_p = memalign(bSize, (STATE_SIZE+ROLLBACK_SIZE) * bSize);
+#endif
+
+ if ( !lstate_p ) {
+ __sync_fetch_and_add(&total_states_tested, bucket_states_tested);
+ return key;
+ }
+
+ memset(lstate_p+1, 0x0, (STATE_SIZE-1)*sizeof(bitslice_t)); // zero even bits
+
+ // bitslice even half-states
+ const size_t max_slices = (even_end-p_even) < MAX_BITSLICES ? even_end-p_even : MAX_BITSLICES;
+#ifdef EXACT_COUNT
+ bucket_size[bitsliced_blocks] = max_slices;
+#endif
+ for(size_t slice_idx = 0; slice_idx < max_slices; ++slice_idx){
+ uint32_t e = *(p_even+slice_idx);
+ for(size_t bit_idx = 1; bit_idx < STATE_SIZE; bit_idx+=2, e >>= 1){
+ // set even bits
+ if(e&1){
+ lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx&63);
+ }
+ }
+ }
+ // compute the rollback bits
+ for(size_t rollback = 0; rollback < ROLLBACK_SIZE; ++rollback){
+ // inlined crypto1_bs_lfsr_rollback
+ const bitslice_value_t feedout = lstate_p[0].value;
+ ++lstate_p;
+ const bitslice_value_t ks_bits = crypto1_bs_f20(lstate_p);
+ const bitslice_value_t feedback = (feedout ^ ks_bits ^ lstate_p[47- 5].value ^ lstate_p[47- 9].value ^
+ lstate_p[47-10].value ^ lstate_p[47-12].value ^ lstate_p[47-14].value ^
+ lstate_p[47-15].value ^ lstate_p[47-17].value ^ lstate_p[47-19].value ^
+ lstate_p[47-24].value ^ lstate_p[47-25].value ^ lstate_p[47-27].value ^
+ lstate_p[47-29].value ^ lstate_p[47-35].value ^ lstate_p[47-39].value ^
+ lstate_p[47-41].value ^ lstate_p[47-42].value ^ lstate_p[47-43].value);
+ lstate_p[47].value = feedback ^ bitsliced_rollback_byte[rollback].value;
+ }
+ bitsliced_even_states[bitsliced_blocks++] = lstate_p;
+ }
+
+ // bitslice every odd state to every block of even half-states with half-finished rollback
+ for(uint32_t const * restrict p_odd = p->states[ODD_STATE]; p_odd < p->states[ODD_STATE]+p->len[ODD_STATE]; ++p_odd){
+ // early abort
+ if(keys_found){
+ goto out;
+ }
+
+ // set the odd bits and compute rollback
+ uint64_t o = (uint64_t) *p_odd;
+ lfsr_rollback_byte((struct Crypto1State*) &o, 0, 1);
+ // pre-compute part of the odd feedback bits (minus rollback)
+ bool odd_feedback_bit = parity(o&0x9ce5c);
+
+ crypto1_bs_rewind_a0();
+ // set odd bits
+ for(size_t state_idx = 0; state_idx < STATE_SIZE-ROLLBACK_SIZE; o >>= 1, state_idx+=2){
+ if(o & 1){
+ state_p[state_idx] = bs_ones;
+ } else {
+ state_p[state_idx] = bs_zeroes;
+ }
+ }
+ const bitslice_value_t odd_feedback = odd_feedback_bit ? bs_ones.value : bs_zeroes.value;
+
+ for(size_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){
+ const bitslice_t const * restrict bitsliced_even_state = bitsliced_even_states[block_idx];
+ size_t state_idx;
+ // set even bits
+ for(state_idx = 0; state_idx < STATE_SIZE-ROLLBACK_SIZE; state_idx+=2){
+ state_p[1+state_idx] = bitsliced_even_state[1+state_idx];
+ }
+ // set rollback bits
+ uint64_t lo = o;
+ for(; state_idx < STATE_SIZE; lo >>= 1, state_idx+=2){
+ // set the odd bits and take in the odd rollback bits from the even states
+ if(lo & 1){
+ state_p[state_idx].value = ~bitsliced_even_state[state_idx].value;
+ } else {
+ state_p[state_idx] = bitsliced_even_state[state_idx];
+ }
+
+ // set the even bits and take in the even rollback bits from the odd states
+ if((lo >> 32) & 1){
+ state_p[1+state_idx].value = ~bitsliced_even_state[1+state_idx].value;
+ } else {
+ state_p[1+state_idx] = bitsliced_even_state[1+state_idx];
+ }
+ }
+
+#ifdef EXACT_COUNT
+ bucket_states_tested += bucket_size[block_idx];
+#endif
+ // pre-compute first keystream and feedback bit vectors
+ const bitslice_value_t ksb = crypto1_bs_f20(state_p);
+ const bitslice_value_t fbb = (odd_feedback ^ state_p[47- 0].value ^ state_p[47- 5].value ^ // take in the even and rollback bits
+ state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^
+ state_p[47-24].value ^ state_p[47-42].value);
+
+ // vector to contain test results (1 = passed, 0 = failed)
+ bitslice_t results = bs_ones;
+
+ for(size_t tests = 0; tests < NONCE_TESTS; ++tests){
+ size_t parity_bit_idx = 0;
+ bitslice_value_t fb_bits = fbb;
+ bitslice_value_t ks_bits = ksb;
+ state_p = &states[KEYSTREAM_SIZE-1];
+ bitslice_value_t parity_bit_vector = bs_zeroes.value;
+
+ // highest bit is transmitted/received first
+ for(int32_t ks_idx = KEYSTREAM_SIZE-1; ks_idx >= 0; --ks_idx, --state_p){
+ // decrypt nonce bits
+ const bitslice_value_t encrypted_nonce_bit_vector = bitsliced_encrypted_nonces[tests][ks_idx].value;
+ const bitslice_value_t decrypted_nonce_bit_vector = (encrypted_nonce_bit_vector ^ ks_bits);
+
+ // compute real parity bits on the fly
+ parity_bit_vector ^= decrypted_nonce_bit_vector;
+
+ // update state
+ state_p[0].value = (fb_bits ^ decrypted_nonce_bit_vector);
+
+ // compute next keystream bit
+ ks_bits = crypto1_bs_f20(state_p);
+
+ // for each byte:
+ if((ks_idx&7) == 0){
+ // get encrypted parity bits
+ const bitslice_value_t encrypted_parity_bit_vector = bitsliced_encrypted_parity_bits[tests][parity_bit_idx++].value;
+
+ // decrypt parity bits
+ const bitslice_value_t decrypted_parity_bit_vector = (encrypted_parity_bit_vector ^ ks_bits);
+
+ // compare actual parity bits with decrypted parity bits and take count in results vector
+ results.value &= (parity_bit_vector ^ decrypted_parity_bit_vector);
+
+ // make sure we still have a match in our set
+ // if(memcmp(&results, &bs_zeroes, sizeof(bitslice_t)) == 0){
+
+ // this is much faster on my gcc, because somehow a memcmp needlessly spills/fills all the xmm registers to/from the stack - ???
+ // the short-circuiting also helps
+ if(results.bytes64[0] == 0
+#if MAX_BITSLICES > 64
+ && results.bytes64[1] == 0
+#endif
+#if MAX_BITSLICES > 128
+ && results.bytes64[2] == 0
+ && results.bytes64[3] == 0
+#endif
+ ){
+ goto stop_tests;
+ }
+ // this is about as fast but less portable (requires -std=gnu99)
+ // asm goto ("ptest %1, %0\n\t"
+ // "jz %l2" :: "xm" (results.value), "xm" (bs_ones.value) : "cc" : stop_tests);
+ parity_bit_vector = bs_zeroes.value;
+ }
+ // compute next feedback bit vector
+ fb_bits = (state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^
+ state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^
+ state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^
+ state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^
+ state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^
+ state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value);
+ }
+ }
+ // all nonce tests were successful: we've found the key in this block!
+ state_t keys[MAX_BITSLICES];
+ crypto1_bs_convert_states(&states[KEYSTREAM_SIZE], keys);
+ for(size_t results_idx = 0; results_idx < MAX_BITSLICES; ++results_idx){
+ if(get_vector_bit(results_idx, results)){
+ key = keys[results_idx].value;
+ goto out;
+ }
+ }
+stop_tests:
+ // prepare to set new states
+ crypto1_bs_rewind_a0();
+ continue;
+ }
+ }
+
+out:
+ for(size_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){
+
+#ifdef __WIN32
+ #ifdef __MINGW32__
+ __mingw_aligned_free(bitsliced_even_states[block_idx]-ROLLBACK_SIZE);
+ #else
+ _aligned_free(bitsliced_even_states[block_idx]-ROLLBACK_SIZE);
+ #endif
+#else
+ memfree(bitsliced_even_states[block_idx]-ROLLBACK_SIZE);
+#endif
+
+ }
+ __sync_fetch_and_add(&total_states_tested, bucket_states_tested);
+ return key;
+}
+static void* crack_states_thread(void* x){
+ const size_t thread_id = (size_t)x;
+ size_t current_bucket = thread_id;
+ while(current_bucket < bucket_count){
+ statelist_t * bucket = buckets[current_bucket];
+ if(bucket){
+ const uint64_t key = crack_states_bitsliced(bucket);
+ if(key != -1){
+ printf("\nFound key: %012"PRIx64"\n", key);
+ __sync_fetch_and_add(&keys_found, 1);
+ break;
+ } else if(keys_found){
+ break;
+ } else {
+ printf(".");
+ fflush(stdout);
+ }
+ }
+ current_bucket += thread_count;
+ }
+ return NULL;
+}
+#define _USE_32BIT_TIME_T
static void brute_force(void)
{
if (known_target_key != -1) {
PrintAndLog("Looking for known target key in remaining key space...");
TestIfKeyExists(known_target_key);
} else {
- PrintAndLog("Brute Force phase is not implemented.");
+ PrintAndLog("Brute force phase starting.");
+ time_t start, end;
+ time(&start);
+ keys_found = 0;
+
+ crypto1_bs_init();
+
+ PrintAndLog("Using %u-bit bitslices", MAX_BITSLICES);
+ PrintAndLog("Bitslicing best_first_byte^uid[3] (rollback byte): %02x...", best_first_bytes[0]^(cuid>>24));
+ // convert to 32 bit little-endian
+ crypto1_bs_bitslice_value32(rev32((best_first_bytes[0]^(cuid>>24))), bitsliced_rollback_byte, 8);
+
+ PrintAndLog("Bitslicing nonces...");
+ for(size_t tests = 0; tests < NONCE_TESTS; tests++){
+ uint32_t test_nonce = brute_force_nonces[tests]->nonce_enc;
+ uint8_t test_parity = brute_force_nonces[tests]->par_enc;
+ // pre-xor the uid into the decrypted nonces, and also pre-xor the cuid parity into the encrypted parity bits - otherwise an exta xor is required in the decryption routine
+ crypto1_bs_bitslice_value32(cuid^test_nonce, bitsliced_encrypted_nonces[tests], 32);
+ // convert to 32 bit little-endian
+ crypto1_bs_bitslice_value32(rev32( ~(test_parity ^ ~(parity(cuid>>24 & 0xff)<<3 | parity(cuid>>16 & 0xff)<<2 | parity(cuid>>8 & 0xff)<<1 | parity(cuid&0xff)))), bitsliced_encrypted_parity_bits[tests], 4);
+ }
+ total_states_tested = 0;
+
+ // count number of states to go
+ bucket_count = 0;
+ for (statelist_t *p = candidates; p != NULL; p = p->next) {
+ buckets[bucket_count] = p;
+ bucket_count++;
+ }
+
+#ifndef __WIN32
+ thread_count = sysconf(_SC_NPROCESSORS_CONF);
+#endif /* _WIN32 */
+ pthread_t threads[thread_count];
+
+ // enumerate states using all hardware threads, each thread handles one bucket
+ PrintAndLog("Starting %u cracking threads to search %u buckets containing a total of %"PRIu32" states...", thread_count, bucket_count, maximum_states);
+
+ for(size_t i = 0; i < thread_count; i++){
+ pthread_create(&threads[i], NULL, crack_states_thread, (void*) i);
+ }
+ for(size_t i = 0; i < thread_count; i++){
+ pthread_join(threads[i], 0);
+ }
+
+ time(&end);
+ unsigned long elapsed_time = difftime(end, start);
+ PrintAndLog("Tested %"PRIu32" states, found %u keys after %u seconds", total_states_tested, keys_found, elapsed_time);
+ if(!keys_found){
+ assert(total_states_tested == maximum_states);
+ }
+ // reset this counter for the next call
+ nonces_to_bruteforce = 0;
}
-
}
-
int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBlockNo, uint8_t trgKeyType, uint8_t *trgkey, bool nonce_file_read, bool nonce_file_write, bool slow, int tests)
{
// initialize Random number generator
--- /dev/null
+// Bit-sliced Crypto-1 implementation
+// The cipher states are stored with the least significant bit first, hence all bit indexes are reversed here
+/*
+Copyright (c) 2015-2016 Aram Verstegen
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "crypto1_bs.h"
+#include <inttypes.h>
+#define __STDC_FORMAT_MACROS
+#define llx PRIx64
+#define lli PRIi64
+#define lu PRIu32
+
+// The following functions use this global or thread-local state
+// It is sized to fit exactly KEYSTREAM_SIZE more states next to the initial state
+__thread bitslice_t states[KEYSTREAM_SIZE+STATE_SIZE];
+__thread bitslice_t * restrict state_p;
+
+void crypto1_bs_init(){
+ // initialize constant one and zero bit vectors
+ memset(bs_ones.bytes, 0xff, VECTOR_SIZE);
+ memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE);
+}
+
+// The following functions have side effects on 48 bitslices at the state_p pointer
+// use the crypto1_bs_rewind_* macros to (re-)initialize them as needed
+
+inline const bitslice_value_t crypto1_bs_bit(const bitslice_value_t input, const bool is_encrypted){
+ bitslice_value_t feedback = (state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^
+ state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^
+ state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^
+ state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^
+ state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^
+ state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value);
+ const bitslice_value_t ks_bits = crypto1_bs_f20(state_p);
+ if(is_encrypted){
+ feedback ^= ks_bits;
+ }
+ state_p--;
+ state_p[0].value = feedback ^ input;
+ return ks_bits;
+}
+
+inline const bitslice_value_t crypto1_bs_lfsr_rollback(const bitslice_value_t input, const bool is_encrypted){
+ bitslice_value_t feedout = state_p[0].value;
+ state_p++;
+ const bitslice_value_t ks_bits = crypto1_bs_f20(state_p);
+ if(is_encrypted){
+ feedout ^= ks_bits;
+ }
+ const bitslice_value_t feedback = (feedout ^ state_p[47- 5].value ^ state_p[47- 9].value ^
+ state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^
+ state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^
+ state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^
+ state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^
+ state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value);
+ state_p[47].value = feedback ^ input;
+ return ks_bits;
+}
+
+// side-effect free from here on
+// note that bytes are sliced and unsliced with reversed endianness
+inline void crypto1_bs_convert_states(bitslice_t bitsliced_states[], state_t regular_states[]){
+ size_t bit_idx = 0, slice_idx = 0;
+ state_t values[MAX_BITSLICES];
+ for(slice_idx = 0; slice_idx < MAX_BITSLICES; slice_idx++){
+ for(bit_idx = 0; bit_idx < STATE_SIZE; bit_idx++){
+ bool bit = get_vector_bit(slice_idx, bitsliced_states[bit_idx]);
+ values[slice_idx].value <<= 1;
+ values[slice_idx].value |= bit;
+ }
+ // swap endianness
+ values[slice_idx].value = rev_state_t(values[slice_idx].value);
+ // roll off unused bits
+ values[slice_idx].value >>= ((sizeof(state_t)*8)-STATE_SIZE);
+ }
+ memcpy(regular_states, values, sizeof(values));
+}
+
+// bitslice a value
+void crypto1_bs_bitslice_value32(uint32_t value, bitslice_t bitsliced_value[], size_t bit_len){
+ // load nonce bytes with unswapped endianness
+ size_t bit_idx;
+ for(bit_idx = 0; bit_idx < bit_len; bit_idx++){
+ bool bit = get_bit(bit_len-1-bit_idx, rev32(value));
+ if(bit){
+ bitsliced_value[bit_idx].value = bs_ones.value;
+ } else {
+ bitsliced_value[bit_idx].value = bs_zeroes.value;
+ }
+ }
+}
+
+void crypto1_bs_print_states(bitslice_t bitsliced_states[]){
+ size_t slice_idx = 0;
+ state_t values[MAX_BITSLICES];
+ crypto1_bs_convert_states(bitsliced_states, values);
+ for(slice_idx = 0; slice_idx < MAX_BITSLICES; slice_idx++){
+ printf("State %03zu: %012"llx"\n", slice_idx, values[slice_idx].value);
+ }
+}
+
--- /dev/null
+#ifndef _CRYPTO1_BS_H
+#define _CRYPTO1_BS_H
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+// bitslice type
+// while AVX supports 256 bit vector floating point operations, we need integer operations for boolean logic
+// same for AVX2 and 512 bit vectors
+// using larger vectors works but seems to generate more register pressure
+#if defined(__AVX2__)
+#define MAX_BITSLICES 256
+#elif defined(__AVX__)
+#define MAX_BITSLICES 128
+#elif defined(__SSE2__)
+#define MAX_BITSLICES 128
+#else
+#define MAX_BITSLICES 64
+#endif
+
+#define VECTOR_SIZE (MAX_BITSLICES/8)
+typedef unsigned int __attribute__((aligned(VECTOR_SIZE))) __attribute__((vector_size(VECTOR_SIZE))) bitslice_value_t;
+typedef union {
+ bitslice_value_t value;
+ uint64_t bytes64[MAX_BITSLICES/64];
+ uint8_t bytes[MAX_BITSLICES/8];
+} bitslice_t;
+
+// filter function (f20)
+// sourced from ``Wirelessly Pickpocketing a Mifare Classic Card'' by Flavio Garcia, Peter van Rossum, Roel Verdult and Ronny Wichers Schreur
+#define f20a(a,b,c,d) (((a|b)^(a&d))^(c&((a^b)|d)))
+#define f20b(a,b,c,d) (((a&b)|c)^((a^b)&(c|d)))
+#define f20c(a,b,c,d,e) ((a|((b|e)&(d^e)))^((a^(b&d))&((c^d)|(b&e))))
+
+#define crypto1_bs_f20(s) \
+f20c(f20a((s[47- 9].value), (s[47-11].value), (s[47-13].value), (s[47-15].value)), \
+ f20b((s[47-17].value), (s[47-19].value), (s[47-21].value), (s[47-23].value)), \
+ f20b((s[47-25].value), (s[47-27].value), (s[47-29].value), (s[47-31].value)), \
+ f20a((s[47-33].value), (s[47-35].value), (s[47-37].value), (s[47-39].value)), \
+ f20b((s[47-41].value), (s[47-43].value), (s[47-45].value), (s[47-47].value)))
+
+// bit indexing
+#define get_bit(n, word) ((word >> (n)) & 1)
+#define get_vector_bit(slice, value) get_bit(slice&0x3f, value.bytes64[slice>>6])
+
+// constant ones/zeroes
+bitslice_t bs_ones;
+bitslice_t bs_zeroes;
+
+// size of crypto-1 state
+#define STATE_SIZE 48
+// size of nonce to be decrypted
+#define KEYSTREAM_SIZE 32
+// size of first uid^nonce byte to be rolled back to the initial key
+#define ROLLBACK_SIZE 8
+// number of nonces required to test to cover entire 48-bit state
+// I would have said it's 12... but bla goes with 100, so I do too
+#define NONCE_TESTS 100
+
+// state pointer management
+extern __thread bitslice_t states[KEYSTREAM_SIZE+STATE_SIZE];
+extern __thread bitslice_t * restrict state_p;
+
+// rewind to the point a0, at which KEYSTREAM_SIZE more bits can be generated
+#define crypto1_bs_rewind_a0() (state_p = &states[KEYSTREAM_SIZE])
+
+// bitsliced bytewise parity
+#define bitsliced_byte_parity(n) (n[0].value ^ n[1].value ^ n[2].value ^ n[3].value ^ n[4].value ^ n[5].value ^ n[6].value ^ n[7].value)
+
+// 48-bit crypto-1 states are normally represented using 64-bit values
+typedef union {
+ uint64_t value;
+ uint8_t bytes[8];
+} state_t;
+
+// endianness conversion
+#define rev32(word) (((word & 0xff) << 24) | (((word >> 8) & 0xff) << 16) | (((word >> 16) & 0xff) << 8) | (((word >> 24) & 0xff)))
+#define rev64(x) (rev32(x)<<32|(rev32((x>>32))))
+#define rev_state_t rev64
+
+// crypto-1 functions
+const bitslice_value_t crypto1_bs_bit(const bitslice_value_t input, const bool is_encrypted);
+const bitslice_value_t crypto1_bs_lfsr_rollback(const bitslice_value_t input, const bool is_encrypted);
+
+// initialization functions
+void crypto1_bs_init();
+
+// conversion functions
+void crypto1_bs_bitslice_value32(uint32_t value, bitslice_t bitsliced_value[], size_t bit_len);
+void crypto1_bs_convert_states(bitslice_t bitsliced_states[], state_t regular_states[]);
+
+// debug print
+void crypto1_bs_print_states(bitslice_t *bitsliced_states);
+
+#endif // _CRYPTO1_BS_H
+