X-Git-Url: http://cvs.zerfleddert.de/cgi-bin/gitweb.cgi/proxmark3-svn/blobdiff_plain/7ea23ef5b3278a9a1b54b8901473a93b71a2bc12..01aa068b6ff34d86dd5d9d9e962aba20cd490050:/armsrc/optimized_cipher.c diff --git a/armsrc/optimized_cipher.c b/armsrc/optimized_cipher.c index 005f473b..2ac72ec0 100644 --- a/armsrc/optimized_cipher.c +++ b/armsrc/optimized_cipher.c @@ -1,13 +1,13 @@ /***************************************************************************** * WARNING * - * THIS CODE IS CREATED FOR EXPERIMENTATION AND EDUCATIONAL USE ONLY. - * - * USAGE OF THIS CODE IN OTHER WAYS MAY INFRINGE UPON THE INTELLECTUAL - * PROPERTY OF OTHER PARTIES, SUCH AS INSIDE SECURE AND HID GLOBAL, - * AND MAY EXPOSE YOU TO AN INFRINGEMENT ACTION FROM THOSE PARTIES. - * - * THIS CODE SHOULD NEVER BE USED TO INFRINGE PATENTS OR INTELLECTUAL PROPERTY RIGHTS. + * THIS CODE IS CREATED FOR EXPERIMENTATION AND EDUCATIONAL USE ONLY. + * + * USAGE OF THIS CODE IN OTHER WAYS MAY INFRINGE UPON THE INTELLECTUAL + * PROPERTY OF OTHER PARTIES, SUCH AS INSIDE SECURE AND HID GLOBAL, + * AND MAY EXPOSE YOU TO AN INFRINGEMENT ACTION FROM THOSE PARTIES. + * + * THIS CODE SHOULD NEVER BE USED TO INFRINGE PATENTS OR INTELLECTUAL PROPERTY RIGHTS. * ***************************************************************************** * @@ -22,7 +22,7 @@ * * This is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. + * by the Free Software Foundation, or, at your option, any later version. * * This file is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -31,9 +31,9 @@ * * You should have received a copy of the GNU General Public License * along with loclass. If not, see . - * - * - * + * + * + * ****************************************************************************/ /** @@ -60,15 +60,63 @@ -- MHS 2015 **/ +/** + + The runtime of opt_doTagMAC_2() with the MHS optimized version was 403 microseconds on Proxmark3. + This was still to slow for some newer readers which didn't want to wait that long. + + Further optimizations to speedup the MAC calculations: + * Optimized opt_Tt logic + * Look up table for opt_select + * Removing many unnecessary bit maskings (& 0x1) + * updating state in place instead of alternating use of a second state structure + * remove the necessity to reverse bits of input and output bytes + + opt_doTagMAC_2() now completes in 270 microseconds. + + -- piwi 2019 +**/ + #include "optimized_cipher.h" #include #include #include - - -#define opt_T(s) (0x1 & ((s->t >> 15) ^ (s->t >> 14)^ (s->t >> 10)^ (s->t >> 8)^ (s->t >> 5)^ (s->t >> 4)^ (s->t >> 1)^ s->t)) - -#define opt_B(s) (((s->b >> 6) ^ (s->b >> 5) ^ (s->b >> 4) ^ (s->b)) & 0x1) +#include "string.h" + +static const uint8_t opt_select_LUT[256] = { + 00, 03, 02, 01, 02, 03, 00, 01, 04, 07, 07, 04, 06, 07, 05, 04, + 01, 02, 03, 00, 02, 03, 00, 01, 05, 06, 06, 05, 06, 07, 05, 04, + 06, 05, 04, 07, 04, 05, 06, 07, 06, 05, 05, 06, 04, 05, 07, 06, + 07, 04, 05, 06, 04, 05, 06, 07, 07, 04, 04, 07, 04, 05, 07, 06, + 06, 05, 04, 07, 04, 05, 06, 07, 02, 01, 01, 02, 00, 01, 03, 02, + 03, 00, 01, 02, 00, 01, 02, 03, 07, 04, 04, 07, 04, 05, 07, 06, + 00, 03, 02, 01, 02, 03, 00, 01, 00, 03, 03, 00, 02, 03, 01, 00, + 05, 06, 07, 04, 06, 07, 04, 05, 05, 06, 06, 05, 06, 07, 05, 04, + 02, 01, 00, 03, 00, 01, 02, 03, 06, 05, 05, 06, 04, 05, 07, 06, + 03, 00, 01, 02, 00, 01, 02, 03, 07, 04, 04, 07, 04, 05, 07, 06, + 02, 01, 00, 03, 00, 01, 02, 03, 02, 01, 01, 02, 00, 01, 03, 02, + 03, 00, 01, 02, 00, 01, 02, 03, 03, 00, 00, 03, 00, 01, 03, 02, + 04, 07, 06, 05, 06, 07, 04, 05, 00, 03, 03, 00, 02, 03, 01, 00, + 01, 02, 03, 00, 02, 03, 00, 01, 05, 06, 06, 05, 06, 07, 05, 04, + 04, 07, 06, 05, 06, 07, 04, 05, 04, 07, 07, 04, 06, 07, 05, 04, + 01, 02, 03, 00, 02, 03, 00, 01, 01, 02, 02, 01, 02, 03, 01, 00 +}; + +/********************** the table above has been generated with this code: ******** +#include "util.h" +static void init_opt_select_LUT(void) { + for (int r = 0; r < 256; r++) { + uint8_t r_ls2 = r << 2; + uint8_t r_and_ls2 = r & r_ls2; + uint8_t r_or_ls2 = r | r_ls2; + uint8_t z0 = (r_and_ls2 >> 5) ^ ((r & ~r_ls2) >> 4) ^ ( r_or_ls2 >> 3); + uint8_t z1 = (r_or_ls2 >> 6) ^ ( r_or_ls2 >> 1) ^ (r >> 5) ^ r; + uint8_t z2 = ((r & ~r_ls2) >> 4) ^ (r_and_ls2 >> 3) ^ r; + opt_select_LUT[r] = (z0 & 4) | (z1 & 2) | (z2 & 1); + } + print_result("", opt_select_LUT, 256); +} +***********************************************************************************/ #define opt__select(x,y,r) (4 & (((r & (r << 2)) >> 5) ^ ((r & ~(r << 2)) >> 4) ^ ( (r | r << 2) >> 3)))\ |(2 & (((r | r << 2) >> 6) ^ ( (r | r << 2) >> 1) ^ (r >> 5) ^ r ^ ((x^y) << 1)))\ @@ -78,169 +126,145 @@ * Some background on the expression above can be found here... uint8_t xopt__select(bool x, bool y, uint8_t r) { - uint8_t r_ls2 = r << 2; - uint8_t r_and_ls2 = r & r_ls2; - uint8_t r_or_ls2 = r | r_ls2; //r: r0 r1 r2 r3 r4 r5 r6 r7 //r_ls2: r2 r3 r4 r5 r6 r7 0 0 // z0 // z1 -// uint8_t z0 = (r0 & r2) ^ (r1 & ~r3) ^ (r2 | r4); // <-- original +// uint8_t z0 = (r0 & r2) ^ (r1 & ~r3) ^ (r2 | r4); // <-- original uint8_t z0 = (r_and_ls2 >> 5) ^ ((r & ~r_ls2) >> 4) ^ ( r_or_ls2 >> 3); -// uint8_t z1 = (r0 | r2) ^ ( r5 | r7) ^ r1 ^ r6 ^ x ^ y; // <-- original +// uint8_t z1 = (r0 | r2) ^ ( r5 | r7) ^ r1 ^ r6 ^ x ^ y; // <-- original uint8_t z1 = (r_or_ls2 >> 6) ^ ( r_or_ls2 >> 1) ^ (r >> 5) ^ r ^ ((x^y) << 1); -// uint8_t z2 = (r3 & ~r5) ^ (r4 & r6 ) ^ r7 ^ x; // <-- original +// uint8_t z2 = (r3 & ~r5) ^ (r4 & r6 ) ^ r7 ^ x; // <-- original uint8_t z2 = ((r & ~r_ls2) >> 4) ^ (r_and_ls2 >> 3) ^ r ^ x; return (z0 & 4) | (z1 & 2) | (z2 & 1); } */ -void opt_successor(const uint8_t* k, State *s, bool y, State* successor) -{ - - uint8_t Tt = 1 & opt_T(s); - - successor->t = (s->t >> 1); - successor->t |= (Tt ^ (s->r >> 7 & 0x1) ^ (s->r >> 3 & 0x1)) << 15; - - successor->b = s->b >> 1; - successor->b |= (opt_B(s) ^ (s->r & 0x1)) << 7; - - successor->r = (k[opt__select(Tt,y,s->r)] ^ successor->b) + s->l ; - successor->l = successor->r+s->r; - +static void opt_successor(const uint8_t *k, State *s, uint8_t y) { +// #define opt_T(s) (0x1 & ((s->t >> 15) ^ (s->t >> 14) ^ (s->t >> 10) ^ (s->t >> 8) ^ (s->t >> 5) ^ (s->t >> 4)^ (s->t >> 1) ^ s->t)) + // uint8_t Tt = opt_T(s); + uint16_t Tt = s->t & 0xc533; + Tt = Tt ^ (Tt >> 1); + Tt = Tt ^ (Tt >> 4); + Tt = Tt ^ (Tt >> 10); + Tt = Tt ^ (Tt >> 8); + + s->t = (s->t >> 1); + s->t |= (Tt ^ (s->r >> 7) ^ (s->r >> 3)) << 15; + + uint8_t opt_B = s->b; + opt_B ^= s->b >> 6; + opt_B ^= s->b >> 5; + opt_B ^= s->b >> 4; + + s->b = s->b >> 1; + s->b |= (opt_B ^ s->r) << 7; + + uint8_t opt_select = opt_select_LUT[s->r] & 0x04; + opt_select |= (opt_select_LUT[s->r] ^ ((Tt ^ y) << 1)) & 0x02; + opt_select |= (opt_select_LUT[s->r] ^ Tt) & 0x01; + + uint8_t r = s->r; + s->r = (k[opt_select] ^ s->b) + s->l ; + s->l = s->r + r; } -void opt_suc(const uint8_t* k,State* s, uint8_t *in, uint8_t length, bool add32Zeroes) -{ - State x2; - int i; - uint8_t head = 0; - for(i =0 ; i < length ; i++) - { - head = 1 & (in[i] >> 7); - opt_successor(k,s,head,&x2); - - head = 1 & (in[i] >> 6); - opt_successor(k,&x2,head,s); +static void opt_suc(const uint8_t *k, State *s, uint8_t *in, uint8_t length, bool add32Zeroes) { + for (int i = 0; i < length; i++) { + uint8_t head; + head = in[i]; + opt_successor(k, s, head); - head = 1 & (in[i] >> 5); - opt_successor(k,s,head,&x2); + head >>= 1; + opt_successor(k, s, head); - head = 1 & (in[i] >> 4); - opt_successor(k,&x2,head,s); + head >>= 1; + opt_successor(k, s, head); - head = 1 & (in[i] >> 3); - opt_successor(k,s,head,&x2); + head >>= 1; + opt_successor(k, s, head); - head = 1 & (in[i] >> 2); - opt_successor(k,&x2,head,s); + head >>= 1; + opt_successor(k, s, head); - head = 1 & (in[i] >> 1); - opt_successor(k,s,head,&x2); + head >>= 1; + opt_successor(k, s, head); - head = 1 & in[i]; - opt_successor(k,&x2,head,s); + head >>= 1; + opt_successor(k, s, head); + head >>= 1; + opt_successor(k, s, head); } //For tag MAC, an additional 32 zeroes - if(add32Zeroes) - for(i =0 ; i < 16 ; i++) - { - opt_successor(k,s,0,&x2); - opt_successor(k,&x2,0,s); + if (add32Zeroes) { + for(int i = 0; i < 16; i++) { + opt_successor(k, s, 0); + opt_successor(k, s, 0); } + } } -void opt_output(const uint8_t* k,State* s, uint8_t *buffer) -{ - uint8_t times = 0; - uint8_t bout = 0; - State temp = {0,0,0,0}; - for( ; times < 4 ; times++) - { - bout =0; - bout |= (s->r & 0x4) << 5; - opt_successor(k,s,0,&temp); - bout |= (temp.r & 0x4) << 4; - opt_successor(k,&temp,0,s); - bout |= (s->r & 0x4) << 3; - opt_successor(k,s,0,&temp); - bout |= (temp.r & 0x4) << 2; - opt_successor(k,&temp,0,s); - bout |= (s->r & 0x4) << 1; - opt_successor(k,s,0,&temp); - bout |= (temp.r & 0x4) ; - opt_successor(k,&temp,0,s); +static void opt_output(const uint8_t *k, State *s, uint8_t *buffer) { + for (uint8_t times = 0; times < 4; times++) { + uint8_t bout = 0; + bout |= (s->r & 0x4) >> 2; + opt_successor(k, s, 0); bout |= (s->r & 0x4) >> 1; - opt_successor(k,s,0,&temp); - bout |= (temp.r & 0x4) >> 2; - opt_successor(k,&temp,0,s); + opt_successor(k, s, 0); + bout |= (s->r & 0x4); + opt_successor(k, s, 0); + bout |= (s->r & 0x4) << 1; + opt_successor(k, s, 0); + bout |= (s->r & 0x4) << 2; + opt_successor(k, s, 0); + bout |= (s->r & 0x4) << 3; + opt_successor(k, s, 0); + bout |= (s->r & 0x4) << 4; + opt_successor(k, s, 0); + bout |= (s->r & 0x4) << 5; + opt_successor(k, s, 0); buffer[times] = bout; } - } -void opt_MAC(uint8_t* k, uint8_t* input, uint8_t* out) -{ +static void opt_MAC(uint8_t *k, uint8_t *input, uint8_t *out) { State _init = { - ((k[0] ^ 0x4c) + 0xEC) & 0xFF,// l - ((k[0] ^ 0x4c) + 0x21) & 0xFF,// r - 0x4c, // b - 0xE012 // t - }; + ((k[0] ^ 0x4c) + 0xEC) & 0xFF,// l + ((k[0] ^ 0x4c) + 0x21) & 0xFF,// r + 0x4c, // b + 0xE012 // t + }; - opt_suc(k,&_init,input,12, false); + opt_suc(k, &_init, input, 12, false); //printf("\noutp "); - opt_output(k,&_init, out); -} -uint8_t rev_byte(uint8_t b) { - b = (b & 0xF0) >> 4 | (b & 0x0F) << 4; - b = (b & 0xCC) >> 2 | (b & 0x33) << 2; - b = (b & 0xAA) >> 1 | (b & 0x55) << 1; - return b; -} -void opt_reverse_arraybytecpy(uint8_t* dest, uint8_t *src, size_t len) -{ - uint8_t i; - for( i =0; i< len ; i++) - dest[i] = rev_byte(src[i]); + opt_output(k, &_init, out); } -void opt_doReaderMAC(uint8_t *cc_nr_p, uint8_t *div_key_p, uint8_t mac[4]) -{ - static uint8_t cc_nr[12]; - - opt_reverse_arraybytecpy(cc_nr, cc_nr_p,12); - uint8_t dest []= {0,0,0,0,0,0,0,0}; - opt_MAC(div_key_p,cc_nr, dest); - //The output MAC must also be reversed - opt_reverse_arraybytecpy(mac, dest,4); +void opt_doReaderMAC(uint8_t *cc_nr_p, uint8_t *div_key_p, uint8_t mac[4]) { + uint8_t dest[] = {0, 0, 0, 0, 0, 0, 0, 0}; + opt_MAC(div_key_p, cc_nr_p, dest); + memcpy(mac, dest, 4); return; } -void opt_doTagMAC(uint8_t *cc_p, const uint8_t *div_key_p, uint8_t mac[4]) -{ - static uint8_t cc_nr[8+4+4]; - opt_reverse_arraybytecpy(cc_nr, cc_p,12); - State _init = { - ((div_key_p[0] ^ 0x4c) + 0xEC) & 0xFF,// l - ((div_key_p[0] ^ 0x4c) + 0x21) & 0xFF,// r - 0x4c, // b - 0xE012 // t - }; - opt_suc(div_key_p,&_init,cc_nr, 12,true); - uint8_t dest []= {0,0,0,0}; - opt_output(div_key_p,&_init, dest); - //The output MAC must also be reversed - opt_reverse_arraybytecpy(mac, dest,4); - return; +void opt_doTagMAC(uint8_t *cc_p, const uint8_t *div_key_p, uint8_t mac[4]) { + State _init = { + ((div_key_p[0] ^ 0x4c) + 0xEC) & 0xFF,// l + ((div_key_p[0] ^ 0x4c) + 0x21) & 0xFF,// r + 0x4c, // b + 0xE012 // t + }; + opt_suc(div_key_p, &_init, cc_p, 12, true); + opt_output(div_key_p, &_init, mac); + return; } + /** * The tag MAC can be divided (both can, but no point in dividing the reader mac) into * two functions, since the first 8 bytes are known, we can pre-calculate the state @@ -249,19 +273,17 @@ void opt_doTagMAC(uint8_t *cc_p, const uint8_t *div_key_p, uint8_t mac[4]) * @param div_key_p * @return the cipher state */ -State opt_doTagMAC_1(uint8_t *cc_p, const uint8_t *div_key_p) -{ - static uint8_t cc_nr[8]; - opt_reverse_arraybytecpy(cc_nr, cc_p,8); - State _init = { - ((div_key_p[0] ^ 0x4c) + 0xEC) & 0xFF,// l - ((div_key_p[0] ^ 0x4c) + 0x21) & 0xFF,// r - 0x4c, // b - 0xE012 // t - }; - opt_suc(div_key_p,&_init,cc_nr, 8,false); +State opt_doTagMAC_1(uint8_t *cc_p, const uint8_t *div_key_p) { + State _init = { + ((div_key_p[0] ^ 0x4c) + 0xEC) & 0xFF,// l + ((div_key_p[0] ^ 0x4c) + 0x21) & 0xFF,// r + 0x4c, // b + 0xE012 // t + }; + opt_suc(div_key_p, &_init, cc_p, 8, false); return _init; } + /** * The second part of the tag MAC calculation, since the CC is already calculated into the state, * this function is fed only the NR, and internally feeds the remaining 32 0-bits to generate the tag @@ -271,15 +293,8 @@ State opt_doTagMAC_1(uint8_t *cc_p, const uint8_t *div_key_p) * @param mac - where to store the MAC * @param div_key_p - the key to use */ -void opt_doTagMAC_2(State _init, uint8_t* nr, uint8_t mac[4], const uint8_t* div_key_p) -{ - static uint8_t _nr [4]; - opt_reverse_arraybytecpy(_nr, nr, 4); - opt_suc(div_key_p,&_init,_nr, 4, true); - //opt_suc(div_key_p,&_init,nr, 4, false); - uint8_t dest []= {0,0,0,0}; - opt_output(div_key_p,&_init, dest); - //The output MAC must also be reversed - opt_reverse_arraybytecpy(mac, dest,4); +void opt_doTagMAC_2(State _init, uint8_t *nr, uint8_t mac[4], const uint8_t *div_key_p) { + opt_suc(div_key_p, &_init, nr, 4, true); + opt_output(div_key_p, &_init, mac); return; }