// copied from xil_cache.c
// -----------------------
#include "defs.h"
#include "xparameters.h"

extern int  __stack_end__;
extern int  __stack_start__; 

void Xil_L1DCacheInvalidate(void);
static inline void Xil_L2CacheSync(void);
void Xil_L1DCacheEnable(void);
void Xil_L2CacheEnable(void);
void Xil_L2CacheInvalidate(void);
void Xil_DCacheFlushRange(unsigned int adr, unsigned len);

#define mfcpsr() ({unsigned int rval; __asm__ __volatile__("mrs	%0, cpsr\n": "=r" (rval));rval;}) 
#define mtcpsr(v) __asm__ __volatile__("msr cpsr,%0\n" : : "r" (v))
#define mtcp(rn, v) __asm__ __volatile__("mcr " rn "\n": : "r" (v));
#define mfcp(rn) ({unsigned int rval; __asm__ __volatile__("mrc " rn "\n" : "=r" (rval)); rval;}) 

#define XREG_CP15_SYS_CONTROL			"p15, 0, %0,  c1,  c0, 0"
#define XREG_CP15_CLEAN_INVAL_DC_LINE_MVA_POC	"p15, 0, %0,  c7, c14, 1"
#define XREG_CP15_CACHE_SIZE_SEL		"p15, 2, %0,  c0,  c0, 0"
#define XREG_CP15_CACHE_SIZE_ID			"p15, 1, %0,  c0,  c0, 0"
#define XREG_CP15_INVAL_DC_LINE_SW		"p15, 0, %0,  c7,  c6, 2"
#define XREG_CP15_CONTROL_C_BIT			0x00000004

#define XPS_L2CC_CACHE_INVLD_WAY_OFFSET  0x077C  // Cache Invalid by Way 
#define XPS_L2CC_CACHE_INV_CLN_PA_OFFSET 0x07F0  // Cache Invalidate and Clean by PA
#define XPS_L2CC_DUMMY_CACHE_SYNC_OFFSET 0x0740  // Dummy Register for Cache Sync 
#define XPS_L2CC_AUX_REG_ZERO_MASK	 0xFFF1FFFF
#define XPS_L2CC_CNTRL_OFFSET		 0x0100
#define XPS_L2CC_AUX_CNTRL_OFFSET	 0x0104
#define XPS_L2CC_TAG_RAM_CNTRL_OFFSET	 0x0108
#define XPS_L2CC_DATA_RAM_CNTRL_OFFSET	 0x010C
#define XPS_L2CC_ISR_OFFSET		 0x021C  // Raw Interrupt Status
#define XPS_L2CC_IAR_OFFSET		 0x0220  // Interrupt Clear
#define XPS_L2CC_TAG_RAM_DEFAULT_MASK	0x00000111 // latency for TAG RAM 
#define XPS_L2CC_DATA_RAM_DEFAULT_MASK	0x00000121 // latency for DATA RAM

#define XPS_L2CC_AUX_REG_DEFAULT_MASK	 0x72360000 // Enable all prefetching, Cache replacement policy, 
                                                    // Parity enable, Event monitor bus enable and Way Size (64 KB) 
#define IRQ_FIQ_MASK 0xC0  // Mask IRQ and FIQ interrupts in cpsr

// Data Synchronization Barrier
#define dsb() __asm__ __volatile__ ("dsb" : : : "memory") 

UINT32 Xil_In32(UINT32 Addr) {
  return *(volatile UINT32 *) Addr;
} 
void Xil_Out32(UINT32 OutAddress, UINT32 Value) {
  *(volatile UINT32 *) OutAddress = Value;
}

void Xil_DCacheEnable(void) {
  Xil_L1DCacheEnable();
  Xil_L2CacheEnable();
} 

void Xil_L1DCacheEnable(void) {
  register unsigned int CtrlReg;

  // enable caches only if they are disabled
  CtrlReg = mfcp(XREG_CP15_SYS_CONTROL);
  if (CtrlReg & XREG_CP15_CONTROL_C_BIT) {
    return;
  }
  Xil_L1DCacheInvalidate(); // clean and invalidate the Data cache
  CtrlReg |= (XREG_CP15_CONTROL_C_BIT);  // enable the Data cache
  mtcp(XREG_CP15_SYS_CONTROL, CtrlReg);
}

void Xil_L2CacheEnable(void) {
  register unsigned int L2CCReg;

  L2CCReg = Xil_In32(XPS_L2CC_BASEADDR + XPS_L2CC_CNTRL_OFFSET);

  // only enable if L2CC is currently disabled
  if ((L2CCReg & 0x01) == 0) {
    // set up the way size and latencies
    L2CCReg = Xil_In32(XPS_L2CC_BASEADDR + XPS_L2CC_AUX_CNTRL_OFFSET);
    L2CCReg &= XPS_L2CC_AUX_REG_ZERO_MASK;
    L2CCReg |= XPS_L2CC_AUX_REG_DEFAULT_MASK;
    Xil_Out32(XPS_L2CC_BASEADDR + XPS_L2CC_AUX_CNTRL_OFFSET, L2CCReg);
    Xil_Out32(XPS_L2CC_BASEADDR + XPS_L2CC_TAG_RAM_CNTRL_OFFSET, XPS_L2CC_TAG_RAM_DEFAULT_MASK);
    Xil_Out32(XPS_L2CC_BASEADDR + XPS_L2CC_DATA_RAM_CNTRL_OFFSET, XPS_L2CC_DATA_RAM_DEFAULT_MASK);

    // Clear the pending interrupts
    L2CCReg = Xil_In32(XPS_L2CC_BASEADDR + XPS_L2CC_ISR_OFFSET);
    Xil_Out32(XPS_L2CC_BASEADDR + XPS_L2CC_IAR_OFFSET, L2CCReg);

    Xil_L2CacheInvalidate();
    // Enable the L2CC
    L2CCReg = Xil_In32(XPS_L2CC_BASEADDR + XPS_L2CC_CNTRL_OFFSET);
    Xil_Out32(XPS_L2CC_BASEADDR + XPS_L2CC_CNTRL_OFFSET, (L2CCReg | (0x01)));

    Xil_L2CacheSync();
    dsb();  // synchronize the processor
  }
}

void Xil_L1DCacheInvalidate(void) {
  register unsigned int CsidReg, C7Reg;
  unsigned int CacheSize, LineSize, NumWays;
  unsigned int Way, WayIndex, Set, SetIndex, NumSet;
  unsigned int currmask;  
  unsigned int stack_start,stack_end,stack_size;

  currmask = mfcpsr();
  mtcpsr(currmask | IRQ_FIQ_MASK);
  
  stack_end = (unsigned int )&__stack_end__;
  stack_start = (unsigned int )&__stack_start__; 
  stack_size = stack_start-stack_end;

  //Flush stack memory to save return address
  Xil_DCacheFlushRange(stack_end, stack_size);
  
  // Select cache level 0 and D cache in CSSR 
  mtcp(XREG_CP15_CACHE_SIZE_SEL, 0);

  CsidReg = mfcp(XREG_CP15_CACHE_SIZE_ID);

  // Determine Cache Size 
  CacheSize = (CsidReg >> 13) & 0x1FF;
  CacheSize +=1;
  CacheSize *=128;    // to get number of bytes 

  // Number of Ways 
  NumWays = (CsidReg & 0x3ff) >> 3;
  NumWays += 1;

  // Get the cacheline size, way size, index size from csidr 
  LineSize = (CsidReg & 0x07) + 4;

  NumSet = CacheSize/NumWays;
  NumSet /= (1 << LineSize);

  Way = 0UL;
  Set = 0UL;

  // Invalidate all the cachelines 
  for (WayIndex =0; WayIndex < NumWays; WayIndex++) {
    for (SetIndex =0; SetIndex < NumSet; SetIndex++) {
      C7Reg = Way | Set;
      // Invalidate by Set/Way 
      __asm__ __volatile__("mcr " XREG_CP15_INVAL_DC_LINE_SW :: "r" (C7Reg));
      Set += (1 << LineSize);
    }
    Set=0UL;
    Way += 0x40000000;
  }
  // Wait for L1 invalidate to complete
  dsb();
  mtcpsr(currmask);
}

void Xil_L2CacheInvalidate(void) {
  Xil_Out32(XPS_L2CC_BASEADDR + XPS_L2CC_CACHE_INVLD_WAY_OFFSET, 0x0000FFFF);
  while((Xil_In32(XPS_L2CC_BASEADDR + XPS_L2CC_CACHE_INVLD_WAY_OFFSET)) & 0x0000FFFF)
    ;
  Xil_L2CacheSync();  // Wait for the invalidate to complete
  dsb();  // synchronize the processor
}
 
static inline void Xil_L2CacheSync(void) {
  Xil_Out32(XPS_L2CC_BASEADDR + XPS_L2CC_DUMMY_CACHE_SYNC_OFFSET, 0x0);
} 

void Xil_DCacheFlushRange(unsigned int adr, unsigned len) {
  const unsigned cacheline = 32;
  unsigned int end, currmask;
  volatile UINT32 *L2CCOffset = (volatile UINT32 *) (XPS_L2CC_BASEADDR + XPS_L2CC_CACHE_INV_CLN_PA_OFFSET);

  currmask = mfcpsr();
  mtcpsr(currmask | IRQ_FIQ_MASK);

  if (len != 0) {
    // Back the starting address up to the start of a cache line
    // perform cache operations until adr+len
    end = adr + len;
    adr &= ~(cacheline - 1);
    while (adr < end) {
      // Flush L1 Data cache line
      __asm__ __volatile__("mcr " XREG_CP15_CLEAN_INVAL_DC_LINE_MVA_POC :: "r" (adr));
      // Flush L2 cache line
      *L2CCOffset = adr;
      dsb();
      adr += cacheline;
    }
  }
  dsb();
  mtcpsr(currmask);
}
