#include "combw.h"


void combWTest(
	unsigned int InData [16*MAX_SC],
	unsigned int outData [16*MAX_SC]
	){


	complex vector_in1[MAX_SC][4][4];
	complex vector_out[MAX_SC][4][4];
    int status;
    unsigned int tmpVal;
    status = Xil_In32(XPAR_AXI_DMA_0_BASEADDR + 0x04);
    status = Xil_In32(XPAR_AXI_DMA_0_BASEADDR + 0x34);
    tmpVal = Xil_In32 ( XPAR_AXI_DMA_0_BASEADDR + 0x0 );
    tmpVal = tmpVal | 0x0001;
    Xil_Out32  (XPAR_AXI_DMA_0_BASEADDR + 0x00 , tmpVal); // MM2S Control Reset
    tmpVal = Xil_In32 ( XPAR_AXI_DMA_0_BASEADDR + 0x30 );
    tmpVal = tmpVal | 0x0001;
    Xil_Out32  (XPAR_AXI_DMA_0_BASEADDR + 0x30 , tmpVal); // S2MM Control Reset
    status = Xil_In32(XPAR_AXI_DMA_0_BASEADDR + 0x04);
    status = Xil_In32(XPAR_AXI_DMA_0_BASEADDR + 0x34);

    unsigned int inputDataSize, outputDataSize;
    u32 pLSB, pMSB;

    int nmbRB	= 100;
	int nmbSc 	= 15; //1200; //nmbRB/2;
	int X		= 4; //1 to Number of Layer
	int rho		= 4;
	int beta	= 4;
	int alpha	= 4;


	int ii=0, jj=0, kk=0;
	for(ii=0; ii<MAX_SC; ii++)
		for(kk=0;kk<4;kk++)
			for(jj=0;jj<4;jj++)
				{
				vector_in1[ii][jj][kk].re = (InData[(ii*4*4)+(jj*4)+kk]&0xFFFF);
				vector_in1[ii][jj][kk].im = ((InData[(ii*4*4)+(jj*4)+kk]>>16)&0xFFFF);
				}
    XTime tStart1, tEnd1;
    unsigned int tmp1;
    XTime_GetTime(&tStart1);
    comb_w_calc(X, vector_out, vector_in1, nmbSc, rho, beta, alpha);
	XTime_GetTime(&tEnd1);

	Xil_DCacheDisable();
    //Send data to the In-memory via DMA
    Xil_Out32(XPAR_BUFFERS_CONTROL_AXI_BASEADDR + 0x08, 0); //Mem Select
    Xil_Out32(XPAR_BUFFERS_CONTROL_AXI_BASEADDR + 0x0C, 1); //Restart Mem1 address
    inputDataSize = 4*4*nmbSc;
    pLSB = (u32)(uint64_t)InData;
    pMSB = (u32)((uint64_t)InData>>32);
    Xil_Out32  (XPAR_AXI_DMA_0_BASEADDR + 0x18 , pLSB); 	   // MM2S Source LSB Address
    Xil_Out32  (XPAR_AXI_DMA_0_BASEADDR + 0x1c , pMSB); // MM2S Source MSB Address
    status = Xil_In32(XPAR_AXI_DMA_0_BASEADDR + 0x04);
    Xil_Out32  (XPAR_AXI_DMA_0_BASEADDR + 0x28 , inputDataSize*4);  	  // MM2S Transfer Length (Bytes)
    status = Xil_In32(XPAR_AXI_DMA_0_BASEADDR + 0x04);
    while((status & 0x02) == 0)
    	status = Xil_In32(XPAR_AXI_DMA_0_BASEADDR + 0x04);



    XTime tStart, tEnd;
    unsigned int tmp;
    //Start the Kernel
    int times[100];
    int iii=0;
    for(iii=0; iii<100; iii++)
    {
    	int SC = (iii+1)*12;
		Xil_Out32(XPAR_BUFFERS_CONTROL_AXI_BASEADDR + 16, X); //set X
		Xil_Out32(XPAR_BUFFERS_CONTROL_AXI_BASEADDR + 20, rho); //set rho
		Xil_Out32(XPAR_BUFFERS_CONTROL_AXI_BASEADDR + 24, beta); //set beta
		Xil_Out32(XPAR_BUFFERS_CONTROL_AXI_BASEADDR + 28, alpha); //set alpha
		Xil_Out32(XPAR_BUFFERS_CONTROL_AXI_BASEADDR + 32, SC); //set SC

		XTime_GetTime(&tStart);
		Xil_Out32(XPAR_BUFFERS_CONTROL_AXI_BASEADDR + 36, 1); //set Start
		while(!(Xil_In32(XPAR_BUFFERS_CONTROL_AXI_BASEADDR + 60)&0x01));
		XTime_GetTime(&tEnd);
		status = Xil_In32(XPAR_BUFFERS_CONTROL_AXI_BASEADDR + 60);
		times[iii] = tEnd - tStart;
    }
	//Read Data Out Back via DMA
    outputDataSize = 4*4*nmbSc;
    pLSB = (u32)(uint64_t)outData;
    pMSB = (u32)((uint64_t)outData>>32);
    Xil_Out32  (XPAR_AXI_DMA_0_BASEADDR + 0x48 , pLSB);	  // S2MM Source LSB Address
    Xil_Out32  (XPAR_AXI_DMA_0_BASEADDR + 0x4c , pMSB); // S2MM Source MSB Address
    Xil_Out32  (XPAR_AXI_DMA_0_BASEADDR + 0x58 , outputDataSize*4);  // S2MM Transfer Length (Bytes)
    Xil_Out32(XPAR_BUFFERS_CONTROL_AXI_BASEADDR + 0x08, 32); //Mem Select
    Xil_Out32(XPAR_BUFFERS_CONTROL_AXI_BASEADDR + 0x04, outputDataSize); //Mem Out Size
    Xil_Out32(XPAR_BUFFERS_CONTROL_AXI_BASEADDR + 0x0C, 2); //Restart Mem1 address
    status = Xil_In32(XPAR_AXI_DMA_0_BASEADDR + 0x34);
    while((status & 0x02) == 0)
    	status = Xil_In32(XPAR_AXI_DMA_0_BASEADDR + 0x34);

    //float tmp = 1.0 * (tEnd - tStart) / (COUNTS_PER_SECOND/1000000);
	tmp = 10 * (tEnd - tStart);
	tmp1 = 10 * (tEnd1 - tStart1);
    xil_printf("Hardware (%d), Software (%d) ns.\n", tmp, tmp1);
int counter1 = 0;
int counter2 = 0;
	//Test the output
	for(ii=0; ii<MAX_SC; ii++)
		for(jj=0;jj<4;jj++)
			for(kk=0;kk<4;kk++)
			{
				counter1++;
				complex tmp;
				tmp.re = (outData[(ii*4*4)+(jj*4)+kk]&0xFFFF);
				tmp.im = ((outData[(ii*4*4)+(jj*4)+kk]>>16)&0xFFFF);
				if((tmp.re != vector_out[ii][jj][kk].re) || (tmp.im != vector_out[ii][jj][kk].im))
					counter2++; //xil_printf("Error %d, %d(%d), %d(%d)\n", ii, tmp.re, vector_out[kk][jj][ii].re, tmp.im, vector_out[kk][jj][ii].im);
			}

    return;
}

