#ifndef _CACHEUTILS_H_
#define _CACHEUTILS_H_

#include <assert.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <linux/perf_event.h>
#include <sys/ioctl.h>
#include <stdio.h>
#include <stdint.h>
#include <signal.h>
#include <setjmp.h>
#include <sched.h>

#define TIMER_RDTSC              1 //standard timestamp counter
#define TIMER_RDTSCP             2 //serializing timestamp counter
#define TIMER_RDPRU              3 //high-res timer on recent AMD

/* ============================================================
 *                    User configuration
 * ============================================================ */
static size_t CACHE_MISS = 200;   //TODO: Adjust threshold

#define TIMER TIMER_RDTSCP //set to TIMER_RDPRU for high-res timer on AMD

/* ============================================================
 *                  User configuration End
 * ============================================================ */


// ---------------------------------------------------------------------------
static size_t perf_fd;
void perf_init();

#ifndef HIDEMINMAX
#define MAX(X,Y) (((X) > (Y)) ? (X) : (Y))
#define MIN(X,Y) (((X) < (Y)) ? (X) : (Y))
#endif

#define RDPRU ".byte 0x0f, 0x01, 0xfd"
#define RDPRU_ECX_MPERF	0
#define RDPRU_ECX_APERF	1


#if defined(__x86_64__)
// ---------------------------------------------------------------------------
uint64_t rdtsc();

// ---------------------------------------------------------------------------
void flush(void *p);

// ---------------------------------------------------------------------------
void maccess(void *p);

// ---------------------------------------------------------------------------
void mfence();

// ---------------------------------------------------------------------------
void nospec();

#include <cpuid.h>
// ---------------------------------------------------------------------------
unsigned int xbegin();

// ---------------------------------------------------------------------------
void xend();

// ---------------------------------------------------------------------------
int has_tsx();

// ---------------------------------------------------------------------------
void maccess_tsx(void* ptr);

#elif defined(__i386__)
// ---------------------------------------------------------------------------
uint32_t rdtsc();

// ---------------------------------------------------------------------------
void flush(void *p);

// ---------------------------------------------------------------------------
void maccess(void *p);

// ---------------------------------------------------------------------------
void mfence();

// ---------------------------------------------------------------------------
void nospec();

#include <cpuid.h>
// ---------------------------------------------------------------------------
int has_tsx();
#endif

// ---------------------------------------------------------------------------
int flush_reload(void *ptr);

// ---------------------------------------------------------------------------
int flush_reload_t(void *ptr);

// ---------------------------------------------------------------------------
int reload_t(void *ptr);

// ---------------------------------------------------------------------------
size_t detect_flush_reload_threshold();

// ---------------------------------------------------------------------------
void maccess_speculative(void* ptr);

// ---------------------------------------------------------------------------
jmp_buf trycatch_buf;

// ---------------------------------------------------------------------------
void unblock_signal(int signum __attribute__((__unused__)));

// ---------------------------------------------------------------------------
void trycatch_segfault_handler(int signum);

// ---------------------------------------------------------------------------
int try_start();

// ---------------------------------------------------------------------------
void try_end();

// ---------------------------------------------------------------------------
void try_abort();

// ---------------------------------------------------------------------------
void cache_encode(char data);

// ---------------------------------------------------------------------------
void cache_decode_pretty(char *leaked, int index);

// ---------------------------------------------------------------------------
void flush_shared_memory();
#endif


#if defined(__x86_64__)
// ---------------------------------------------------------------------------
uint64_t rdtsc() {
  uint64_t a, d;
  asm volatile("mfence");
#if TIMER == TIMER_RDTSCP
  asm volatile("rdtscp" : "=a"(a), "=d"(d) :: "rcx");
#elif TIMER == TIMER_RDTSC
  asm volatile("rdtsc" : "=a"(a), "=d"(d));
#elif TIMER == TIMER_RDPRU
  asm volatile(RDPRU
			     : "=a" (a), "=d" (d)
			     : "c" (RDPRU_ECX_APERF));
#endif
  a = (d << 32) | a;
  asm volatile("mfence");
  return a;
}


// ---------------------------------------------------------------------------
void flush(void *p) { asm volatile("clflush 0(%0)\n" : : "c"(p) : "rax"); }

// ---------------------------------------------------------------------------
void maccess(void *p) { asm volatile("movq (%0), %%rax\n" : : "c"(p) : "rax"); }

// ---------------------------------------------------------------------------
void mfence() { asm volatile("mfence"); }

// ---------------------------------------------------------------------------
void nospec() { asm volatile("lfence"); }

// ---------------------------------------------------------------------------
unsigned int xbegin() {
  unsigned status;
  asm volatile(".byte 0xc7,0xf8,0x00,0x00,0x00,0x00" : "=a"(status) : "a"(-1UL) : "memory");
  return status;
}

// ---------------------------------------------------------------------------
void xend() {
  asm volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
}

// ---------------------------------------------------------------------------
int has_tsx() {
  if (__get_cpuid_max(0, NULL) >= 7) {
    unsigned a, b, c, d;
    __cpuid_count(7, 0, a, b, c, d);
    return (b & (1 << 11)) ? 1 : 0;
  } else {
    return 0;
  }
}

// ---------------------------------------------------------------------------
void maccess_tsx(void* ptr) {
    if (xbegin() == (~0u)) {
        maccess(ptr);
        xend();
    }
}

#elif defined(__i386__)
// ---------------------------------------------------------------------------
uint32_t rdtsc() {
  uint32_t a, d;
  asm volatile("mfence");
#if USE_RDTSCP
  asm volatile("rdtscp" : "=a"(a), "=d"(d));
#else
  asm volatile("rdtsc" : "=a"(a), "=d"(d));
#endif
  asm volatile("mfence");
  return a;
}

// ---------------------------------------------------------------------------
void flush(void *p) { asm volatile("clflush 0(%0)\n" : : "c"(p)); }

// ---------------------------------------------------------------------------
void maccess(void *p) { asm volatile("mov (%0), %%eax\n" : : "c"(p) : "eax"); }

// ---------------------------------------------------------------------------
void mfence() { asm volatile("mfence"); }

// ---------------------------------------------------------------------------
void nospec() { asm volatile("lfence"); }

// ---------------------------------------------------------------------------
int has_tsx() {
  if (__get_cpuid_max(0, NULL) >= 7) {
    unsigned a, b, c, d;
    __cpuid_count(7, 0, a, b, c, d);
    return (b & (1 << 11)) ? 1 : 0;
  } else {
    return 0;
  }
}
#endif

//perform flush+reload on an address and return hit or miss
// ---------------------------------------------------------------------------
int flush_reload(void *ptr) {
  uint64_t start = 0, end = 0;

  start = rdtsc();
  maccess(ptr);
  end = rdtsc();

  flush(ptr);

  if (end - start < CACHE_MISS) {
    return 1;
  }
  return 0;
}

//perform flush+reload on an address and return cycle count
// ---------------------------------------------------------------------------
int flush_reload_t(void *ptr) {
  uint64_t start = 0, end = 0;

  start = rdtsc();
  maccess(ptr);
  end = rdtsc();

  flush(ptr);

  return (int)(end - start);
}

// measure access time of an address
// ---------------------------------------------------------------------------
int reload_t(void *ptr) {
  uint64_t start = 0, end = 0;

  start = rdtsc();
  maccess(ptr);
  end = rdtsc();

  return (int)(end - start);
}

//perform a rough estimate of flush+reload threshold
//often good enough for quick experiments, but don't rely on it too much
// ---------------------------------------------------------------------------
size_t detect_flush_reload_threshold() {
  size_t reload_time = 0, flush_reload_time = 0, i, count = 1000000;
  size_t dummy[16];
  size_t *ptr = dummy + 8;

  maccess(ptr);
  for (i = 0; i < count; i++) {
    reload_time += reload_t(ptr);
  }
  for (i = 0; i < count; i++) {
    flush_reload_time += flush_reload_t(ptr);
  }
  reload_time /= count;
  flush_reload_time /= count;

  return (flush_reload_time + reload_time * 2) / 3;
}


//you probably won't need anything below here
// ---------------------------------------------------------------------------



// ---------------------------------------------------------------------------
void maccess_speculative(void* ptr) {
    int i;
    size_t dummy = 0;
    void* addr;

    for(i = 0; i < 50; i++) {
        size_t c = ((i * 167) + 13) & 1;
        addr = (void*)(((size_t)&dummy) * c + ((size_t)ptr) * (1 - c));
        flush(&c);
        mfence();
        if(c / 0.5 > 1.1) maccess(addr);
    }
}

// ---------------------------------------------------------------------------
void perf_init() {
  static struct perf_event_attr attr;
  attr.type = PERF_TYPE_HARDWARE;
  attr.config = PERF_COUNT_HW_CPU_CYCLES;
  attr.size = sizeof(attr);
  attr.exclude_kernel = 1;
  attr.exclude_hv = 1;
  attr.exclude_callchain_kernel = 1;

  perf_fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
}


// ---------------------------------------------------------------------------
void unblock_signal(int signum __attribute__((__unused__))) {
  sigset_t sigs;
  sigemptyset(&sigs);
  sigaddset(&sigs, signum);
  sigprocmask(SIG_UNBLOCK, &sigs, NULL);
}

// ---------------------------------------------------------------------------
void trycatch_segfault_handler(int signum) {
  (void)signum;

  int i;
  for(i = 1; i < 32; i++) {
    unblock_signal(i);
  }
  longjmp(trycatch_buf, 1);
}

// ---------------------------------------------------------------------------
int try_start() {
#if defined(__i386__) || defined(__x86_64__)
    if(has_tsx()) {
        unsigned status;
        // tsx begin
        asm volatile(".byte 0xc7,0xf8,0x00,0x00,0x00,0x00"
                 : "=a"(status)
                 : "a"(-1UL)
                 : "memory");
        return status == (~0u);
    } else
#endif
    {
        int i;
        for(i = 1; i < 32; i++) {
            signal(i, trycatch_segfault_handler);
        }
        return !setjmp(trycatch_buf);
    }
}

// ---------------------------------------------------------------------------
void try_end() {
#if defined(__i386__) || defined(__x86_64__)
    if(!has_tsx())
#endif
    {
        int i;
        for(i = 1; i < 32; i++) {
            signal(i, SIG_DFL);
        }
    }
}

// ---------------------------------------------------------------------------
void try_abort() {
#if defined(__i386__) || defined(__x86_64__)
    if(has_tsx()) {
        asm volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
    } else
#endif
    {
        maccess(0);
    }
}

void prefetch0(void* p)
{
  asm volatile ("prefetcht0 %0" : : "m" (p));
}
