-AT91PS_TC timer;
-
-static void setup_timer(void)
-{
- /* Set up Timer 1 to use for measuring time between pulses. Since we're bit-banging
- * this it won't be terribly accurate but should be good enough.
- */
- AT91C_BASE_PMC->PMC_PCER = (1 << AT91C_ID_TC1);
- timer = AT91C_BASE_TC1;
- timer->TC_CCR = AT91C_TC_CLKDIS;
- timer->TC_CMR = TC_CMR_TCCLKS_TIMER_CLOCK3;
- timer->TC_CCR = AT91C_TC_CLKEN | AT91C_TC_SWTRG;
-
-/* At TIMER_CLOCK3 (MCK/32) */
-#define RWD_TIME_1 150 /* RWD_TIME_PAUSE off, 80us on = 100us */
-#define RWD_TIME_0 90 /* RWD_TIME_PAUSE off, 40us on = 60us */
-#define RWD_TIME_PAUSE 30 /* 20us */
-#define RWD_TIME_FUZZ 20 /* rather generous 13us, since the peak detector + hysteresis fuzz quite a bit */
-#define TAG_TIME_BIT 150 /* 100us for every bit */
-#define TAG_TIME_WAIT 490 /* time from RWD frame end to tag frame start, experimentally determined */
+//-----------------------------------------------------------------------------
+// Frame timing and pseudorandom number generator
+//
+// The Prng is forwarded every 100us (TAG_BIT_PERIOD), except when the reader is
+// transmitting. In that case the prng has to be forwarded every bit transmitted:
+// - 60us for a 0 (RWD_TIME_0)
+// - 100us for a 1 (RWD_TIME_1)
+//
+// The data dependent timing makes writing comprehensible code significantly
+// harder. The current aproach forwards the prng data based if there is data on
+// air and time based, using GET_TICKS, during computational and wait periodes.
+//
+// To not have the necessity to calculate/guess exection time dependend timeouts
+// tx_frame and rx_frame use a shared timestamp to coordinate tx and rx timeslots.
+//-----------------------------------------------------------------------------
+
+static uint32_t last_frame_end; /* ts of last bit of previews rx or tx frame */
+
+#define RWD_TIME_PAUSE 30 /* 20us */
+#define RWD_TIME_1 150 /* READER_TIME_PAUSE 20us off + 80us on = 100us */
+#define RWD_TIME_0 90 /* READER_TIME_PAUSE 20us off + 40us on = 60us */
+#define RWD_FRAME_WAIT 330 /* 220us from TAG frame end to READER frame start */
+#define TAG_FRAME_WAIT 495 /* 330us from READER frame end to TAG frame start */
+#define TAG_BIT_PERIOD 150 /* 100us */
+#define TAG_WRITE_TIMEOUT 60 /* 40 * 100us (write should take at most 3.6ms) */
+
+#define LEGIC_READ 0x01 /* Read Command */
+#define LEGIC_WRITE 0x00 /* Write Command */
+
+#define SESSION_IV 0x55 /* An arbitrary chose session IV, all shoud work */
+#define OFFSET_LOG 1024 /* The largest Legic Prime card is 1k */
+#define WRITE_LOWERLIMIT 4 /* UID and MCC are not writable */
+
+#define INPUT_THRESHOLD 8 /* heuristically determined, lower values */
+ /* lead to detecting false ack during write */
+
+//-----------------------------------------------------------------------------
+// I/O interface abstraction (FPGA -> ARM)
+//-----------------------------------------------------------------------------
+
+static inline uint8_t rx_byte_from_fpga() {
+ for(;;) {
+ WDT_HIT();
+
+ // wait for byte be become available in rx holding register
+ if(AT91C_BASE_SSC->SSC_SR & (AT91C_SSC_RXRDY)) {
+ return AT91C_BASE_SSC->SSC_RHR;
+ }
+ }
+}
+
+//-----------------------------------------------------------------------------
+// Demodulation (Reader)
+//-----------------------------------------------------------------------------
+
+// Returns a demedulated bit
+//
+// The FPGA running xcorrelation samples the subcarrier at ~13.56 MHz. The mode
+// was initialy designed to receive BSPK/2-PSK. Hance, it reports an I/Q pair
+// every 4.7us (8 bits i and 8 bits q).
+//
+// The subcarrier amplitude can be calculated using Pythagoras sqrt(i^2 + q^2).
+// To reduce CPU time the amplitude is approximated by using linear functions:
+// am = MAX(ABS(i),ABS(q)) + 1/2*MIN(ABS(i),ABSq))
+//
+// Note: The SSC receiver is never synchronized the calculation my be performed
+// on a I/Q pair from two subsequent correlations, but does not matter.
+//
+// The bit time is 99.1us (21 I/Q pairs). The receiver skips the first 5 samples
+// and averages the next (most stable) 8 samples. The final 8 samples are dropped
+// also.
+//
+// The demedulated should be alligned to the bit periode by the caller. This is
+// done in rx_bit and rx_ack.
+static inline bool rx_bit() {
+ int32_t cq = 0;
+ int32_t ci = 0;
+
+ // skip first 5 I/Q pairs
+ for(size_t i = 0; i<5; ++i) {
+ (int8_t)rx_byte_from_fpga();
+ (int8_t)rx_byte_from_fpga();
+ }
+
+ // sample next 8 I/Q pairs
+ for(size_t i = 0; i<8; ++i) {
+ cq += (int8_t)rx_byte_from_fpga();
+ ci += (int8_t)rx_byte_from_fpga();
+ }
+
+ // calculate power
+ int32_t power = (MAX(ABS(ci), ABS(cq)) + (MIN(ABS(ci), ABS(cq)) >> 1));
+
+ // compare average (power / 8) to threshold
+ return ((power >> 3) > INPUT_THRESHOLD);
+}