#include "Df_Equalizer.h"

#ifndef IS_PIPELINE
#ifndef IS_BARE

hwComplex hwConj(hwComplex a)
{
    hwComplex t;
    t.re = a.re;
    t.im = -a.im;
    return t;
}

hwComplex hwAdd(hwComplex a, hwComplex b)
{
    hwComplex c;
    c.re = a.re + b.re;
    c.im = a.im + b.im;
    return c;
}

hwComplex hwSub(hwComplex a, hwComplex b)
{
    hwComplex c;
    c.re = a.re - b.re;
    c.im = a.im - b.im;
    return c;
}

hwComplex hwMul(hwComplex a, hwComplex b)
{
    hwComplex c;
    c.re = (a.re * b.re) - (a.im * b.im);
    c.im = (a.re * b.im) + (a.im * b.re);
    return c;
}

hwComplex hwDiv(hwComplex a, hwComplex b)
{
    hwComplex c;
    c.re = ((a.re * b.re) + (a.im * b.im))/((b.re * b.re) + (b.im * b.im));
    c.im = ((a.im * b.re) - (a.re * b.im))/((b.re * b.re) + (b.im * b.im));
    return c;
}

void hardware_cpy_pInvXDX(
        int NumLayer,
        hwComplex pInvXDX_in[MAX_LAYER][MAX_LAYER],
        hwComplex pInvXDX_out[MAX_LAYER][MAX_LAYER])
{
#ifdef IS_HLS
#pragma HLS INLINE off
#endif
    for(int ii=0;ii<NumLayer;ii++)
    {
     #ifdef IS_HLS
     DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
     #endif
        for(int jj=0;jj<NumLayer;jj++)
        {
        #ifdef IS_HLS
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
        #endif
            pInvXDX_out[ii][jj]=pInvXDX_in[ii][jj];
        }
    }
}

void hardware_cpy_pYt(
        int NumRxAntenna,
        hwComplex pYt_in [2][MAX_RX_ANT],
        hwComplex pYt_out [2][MAX_RX_ANT])
{
#ifdef IS_HLS
#pragma HLS INLINE off
#endif
    for(int ii=0;ii<2;ii++)
    {
        for(int jj=0;jj<NumRxAntenna;jj++)
        {
        #ifdef IS_HLS
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_RX_ANT)
        #endif
            pYt_out[ii][jj]=pYt_in[ii][jj];
        }
    }
}

void hardware_cpy_pXtDagger(
        int NumLayer,
        hwComplex pXtDagger_in[MAX_LAYER][2],
        hwComplex pXtDagger_out[MAX_LAYER][2])
{
#ifdef IS_HLS
#pragma HLS INLINE off
#endif
    for(int ii=0;ii<NumLayer;ii++)
    {
     #ifdef IS_HLS
     DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
     #endif
        for(int jj=0;jj<2;jj++)
        {
            pXtDagger_out[ii][jj]=pXtDagger_in[ii][jj];
        }
    }
}

void hardware_cpy_pHDagger(
        int NumLayer,
        int NumRxAntenna,
        hwComplex pHDagger_in[MAX_LAYER][MAX_RX_ANT],
        hwComplex pHDagger_out[MAX_LAYER][MAX_RX_ANT])
{
#ifdef IS_HLS
#pragma HLS INLINE off
#endif
    for(int ii=0;ii<NumLayer;ii++)
    {
     #ifdef IS_HLS
     DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
     #endif
        for(int jj=0;jj<NumRxAntenna;jj++)
        {
         #ifdef IS_HLS
         DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_RX_ANT)
         #endif
            pHDagger_out[ii][jj]=pHDagger_in[ii][jj];
        }
    }
}

void hardware_cpy_YData(
        int NumULSymbSF_in,
        int NumRxAntenna_in,
        hwComplex pYData_in[MAX_SYM][MAX_RX_ANT],
        hwComplex pYData_out[MAX_SYM][MAX_RX_ANT])
{
#ifdef IS_HLS
#pragma HLS INLINE off
#endif
    for(int nSymb=0;nSymb<NumULSymbSF_in-2;nSymb++)
    {
     #ifdef IS_HLS
     DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_SYM-2)
     #endif
        for(int nrx=0;nrx<NumRxAntenna_in;nrx++)
        {
         #ifdef IS_HLS
		 #pragma HLS PIPELINE enable_flush
         DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_RX_ANT)
         #endif
            pYData_out[nSymb][nrx]=pYData_in[nSymb][nrx];
        }
    }
}




void hwMatrixInv(int sz,hwComplex pM[MAX_LAYER][MAX_LAYER],hwComplex pInvM[MAX_LAYER][MAX_LAYER])
{
    hwComplex pX [MAX_SIZE][2*MAX_SIZE];
    for(int r=0;r<sz;r++)
    {
    #ifdef IS_HLS
	#pragma HLS PIPELINE enable_flush
    DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_SIZE)
    #endif
        for(int c=0;c<sz;c++)
        {
        #ifdef IS_HLS
		#pragma HLS PIPELINE enable_flush
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_SIZE)
        #endif
            pX[r][c]=pM[r][c];
        }
        for(int c=sz;c<2*sz;c++)
        {
        #ifdef IS_HLS
		#pragma HLS PIPELINE enable_flush
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=2*MAX_SIZE)
        #endif
            if(c==r+sz)
            {
                pX[r][c].re=1.0;
                pX[r][c].im=0.0;
            }
            else
            {
                pX[r][c].re=0.0;
                pX[r][c].im=0.0;
            }
        }
    }

    hwComplex pCurRow[2*MAX_SIZE];
    for(int r=0;r<sz;r++)
    {
    #ifdef IS_HLS
	#pragma HLS PIPELINE enable_flush
    DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_SIZE)
    #endif
        for(int c=0;c<2*sz;c++)
        {
        #ifdef IS_HLS
		#pragma HLS PIPELINE enable_flush
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=2*MAX_SIZE)
        #endif
            pCurRow[c]=hwDiv(pX[r][c],pX[r][r]);
        }
        for(int c=0;c<2*sz;c++)
        {
        #ifdef IS_HLS
		#pragma HLS PIPELINE enable_flush
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_SIZE)
        #endif
            pX[r][c]=pCurRow[c];
        }
        for(int er=r+1;er<sz;er++)
        {
        #ifdef IS_HLS
		#pragma HLS PIPELINE enable_flush
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_SIZE)
        #endif
            hwComplex curC=pX[er][r];
            for(int c=0;c<2*sz;c++)
            {
            #ifdef IS_HLS
			#pragma HLS PIPELINE enable_flush
            DO_PRAGMA(HLS LOOP_TRIPCOUNT max=2*MAX_SIZE)
            #endif
                pX[er][c]=hwSub(pX[er][c],hwMul(curC,pCurRow[c]));
            }
        }
    }


    for(int r=sz-1;r>=0;r--)
    {
    #ifdef IS_HLS
	#pragma HLS PIPELINE enable_flush
    DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_SIZE)
    #endif
        for(int c=0;c<2*sz;c++)
        {
        #ifdef IS_HLS
		#pragma HLS PIPELINE enable_flush
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=2*MAX_SIZE)
        #endif
            pCurRow[c]=pX[r][c];
        }
        for(int er=r-1;er>=0;er--)
        {
        #ifdef IS_HLS
		#pragma HLS PIPELINE enable_flush
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_SIZE)
        #endif
            hwComplex curC = pX[er][r];
            for(int c=0;c<2*sz;c++)
            {
            #ifdef IS_HLS
			#pragma HLS PIPELINE enable_flush
            DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_SIZE)
            #endif
                pX[er][c]=hwSub(pX[er][c],hwMul(curC,pCurRow[c]));
            }
        }
    }

    for(int r=0;r<sz;r++)
    {
    #ifdef IS_HLS
	#pragma HLS PIPELINE enable_flush
    DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_SIZE)
    #endif
        for(int c=0;c<sz;c++)
        {
        #ifdef IS_HLS
		#pragma HLS PIPELINE enable_flush
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_SIZE)
        #endif
            int col=c+sz;
            pInvM[r][c]=pX[r][col];
        }
    }
}


void hardware_FDLSEqualization_Phase1(
        hwComplex hardware_pHTranspose[MAX_LAYER][MAX_RX_ANT],
        hwComplex pHDH[MAX_LAYER][MAX_LAYER],
        hwComplex pYData_in[MAX_SYM][MAX_RX_ANT],
        hwComplex pYData_out[MAX_SYM][MAX_RX_ANT],
        hwComplex hardware_pHdm[MAX_MDFT][MAX_RX_ANT][MAX_LAYER],
        hwComplex pHDagger[MAX_LAYER][MAX_RX_ANT],
        int NumLayer_in,
        int* NumLayer_out,
        int NumRxAntenna_in,
        int* NumRxAntenna_out,
        int MDFT_in,
        int* MDFT_out,
        int NumULSymbSF_in,
        int* NumULSymbSF_out)
{
//////////////////// Freq Domain Equalize received Data /////////////////
    hwComplex pH[MAX_RX_ANT][MAX_LAYER];

    for(int nrx=0;nrx<NumRxAntenna_in;nrx++)
    {
    #ifdef IS_HLS
	#pragma HLS PIPELINE enable_flush
    DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_RX_ANT)
    #endif
        for(int layer=0;layer<NumLayer_in;layer++)
        {
        #ifdef IS_HLS
		#pragma HLS PIPELINE enable_flush
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
        #endif
            pH[nrx][layer]=hardware_pHTranspose[layer][nrx];
            pHDagger[layer][nrx]=hwConj(hardware_pHTranspose[layer][nrx]);
        }
    }


   for(int i=0; i<NumLayer_in; i++)
   {
    #ifdef IS_HLS
	#pragma HLS PIPELINE enable_flush
    DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
    #endif
       for(int j=0; j<NumLayer_in; j++)
       {
        #ifdef IS_HLS
		#pragma HLS PIPELINE enable_flush
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
        #endif
           hwComplex tmp;
           tmp.re=0;
           tmp.im=0;
           for(int k=0; k<NumRxAntenna_in; k++)
           {
            #ifdef IS_HLS
			#pragma HLS PIPELINE enable_flush
            DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_RX_ANT)
            #endif
               tmp=hwAdd(tmp,hwMul(pHDagger[i][k],pH[k][j]));
           }
           pHDH[i][j]=tmp;
       }
   }

   for(int nSymb=0;nSymb<NumULSymbSF_in-2;nSymb++)
   {
    #ifdef IS_HLS
	#pragma HLS PIPELINE enable_flush
    DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_SYM-2)
    #endif
       for(int nrx=0;nrx<NumRxAntenna_in;nrx++)
       {
        #ifdef IS_HLS
		#pragma HLS PIPELINE enable_flush
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_RX_ANT)
        #endif
           pYData_out[nSymb][nrx]=pYData_in[nSymb][nrx];
       }
   }

   //////////////////////// Get pHdm ////////////////////////
   for(int nrx=0;nrx<NumRxAntenna_in;nrx++)
   {
    #ifdef IS_HLS
	#pragma HLS PIPELINE enable_flush
    DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_RX_ANT)
    #endif
       for(int layer=0;layer<NumLayer_in;layer++)
       {
        #ifdef IS_HLS
		#pragma HLS PIPELINE enable_flush
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
        #endif
           hardware_pHdm[MDFT_in][nrx][layer]=pH[nrx][layer];
       }
   }

   *NumRxAntenna_out = NumRxAntenna_in;
   *MDFT_out = MDFT_in;
   *NumLayer_out = NumLayer_in;
   *NumULSymbSF_out = NumULSymbSF_in;
}




void hardware_Get_EqW (
		int NumLayer_in,
		int NumRxAntenna_in,
		int MDFT_in,
		hwComplex pHDagger_in[MAX_LAYER][MAX_RX_ANT],
		hwComplex pInvHDH[MAX_LAYER][MAX_LAYER],
		hwComplex hardware_pEqW[MAX_MDFT][MAX_LAYER][MAX_RX_ANT]
		)
{
	for(int i=0; i<NumLayer_in; i++)
	{
	#ifdef IS_HLS
	#pragma HLS PIPELINE enable_flush
	DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
	#endif
	for(int j=0; j<NumRxAntenna_in; j++)
		{
		#ifdef IS_HLS
		#pragma HLS PIPELINE enable_flush
		DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_RX_ANT)
		#endif
			hwComplex tmp;
			tmp.re=0;
			tmp.im=0;
			for(int k=0; k<NumLayer_in; k++)
			{
			#ifdef IS_HLS
			#pragma HLS PIPELINE enable_flush
			DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
			#endif
					tmp=hwAdd(tmp,hwMul(pInvHDH[i][k],pHDagger_in[k][j]));
			}
			hardware_pEqW[MDFT_in][i][j]=tmp;
		}
	}
}

void hardware_FDLSEqualization_Phase2(
        hwComplex pYData_in[MAX_SYM][MAX_RX_ANT],
        hwComplex pYData_out[MAX_SYM][MAX_RX_ANT],
        hwComplex hardware_pEqW[MAX_MDFT][MAX_LAYER][MAX_RX_ANT],
        hwComplex pHDagger_in[MAX_LAYER][MAX_RX_ANT],
        hwComplex pHDagger_out[MAX_LAYER][MAX_RX_ANT],
        hwComplex pHDH[MAX_LAYER][MAX_LAYER],
        hwComplex pInvHDH[MAX_LAYER][MAX_LAYER],
        int NumLayer_in,
        int* NumLayer_out,
        int NumRxAntenna_in,
        int* NumRxAntenna_out,
        int MDFT_in,
        int* MDFT_out,
        int NumULSymbSF_in,
        int* NumULSymbSF_out)
{
       hwMatrixInv(NumLayer_in,pHDH,pInvHDH);
       hardware_Get_EqW (NumLayer_in,NumRxAntenna_in,MDFT_in,pHDagger_in,pInvHDH,hardware_pEqW);
       hardware_cpy_YData(NumULSymbSF_in, NumRxAntenna_in, pYData_in, pYData_out);
       hardware_cpy_pHDagger(NumLayer_in,NumRxAntenna_in,pHDagger_in, pHDagger_out);

       *NumRxAntenna_out = NumRxAntenna_in;
       *MDFT_out = MDFT_in;
       *NumLayer_out = NumLayer_in;
       *NumULSymbSF_out = NumULSymbSF_in;

    return;
}


void hardware_FDLSEqualization_Phase3_Part1(
        hwComplex pYData[MAX_SYM][MAX_RX_ANT],
        hwComplex pHDagger[MAX_LAYER][MAX_RX_ANT],
        hwComplex pHDY[MAX_LAYER],
        hwComplex pInvHDH_in[MAX_LAYER][MAX_LAYER],
        hwComplex pInvHDH_out[MAX_LAYER][MAX_LAYER],
        int NumLayer_in,
        int* NumLayer_out,
        int MDFT_in,
        int* MDFT_out,
        int NumRxAntenna,
        int nSymb_in,
        int* nSymb_out,
        int NumULSymbSF_in,
        int* NumULSymbSF_out)
{
    for(int i=0;i<NumLayer_in;i++)
    {
    #ifdef IS_HLS
	#pragma HLS PIPELINE enable_flush
    DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
    #endif
        hwComplex tmp;
        tmp.re = 0;
        tmp.im = 0;
        for(int k=0;k<NumRxAntenna;k++)
        {
        #ifdef IS_HLS
		#pragma HLS PIPELINE enable_flush
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_RX_ANT)
        #endif
            tmp=hwAdd(tmp, hwMul(pHDagger[i][k],(pYData[nSymb_in][k])));
        }
        pHDY[i] = tmp; //InnerProd(d2,*(M1+i),V2);
    }

    for(int ii=0; ii<NumLayer_in; ii++)
    {
    #ifdef IS_HLS
	#pragma HLS PIPELINE enable_flush
    DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
    #endif
        for(int jj=0; jj<NumLayer_in; jj++)
        {
        #ifdef IS_HLS
		#pragma HLS PIPELINE enable_flush
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
        #endif
            pInvHDH_out[ii][jj] = pInvHDH_in[ii][jj];
        }
    }
    *NumLayer_out = NumLayer_in;
    *MDFT_out = MDFT_in;
    *nSymb_out = nSymb_in;
    *NumULSymbSF_out = NumULSymbSF_in;
}

void hardware_FDLSEqualization_Phase3_Part2(
        hwComplex pInvHDH[MAX_LAYER][MAX_LAYER],
        hwComplex pXData[MAX_LAYER],
        hwComplex pHDY[MAX_LAYER],
        int NumLayer_in,
        int* NumLayer_out,
        int MDFT_in,
        int* MDFT_out,
        int nSymb_in,
        int* nSymb_out,
        int NumULSymbSF_in,
        int* NumULSymbSF_out
        )
{
    for(int i=0;i<NumLayer_in;i++)
    {
    #ifdef IS_HLS
	#pragma HLS PIPELINE enable_flush
    DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
    #endif
        hwComplex tmp;
        tmp.re = 0;
        tmp.im = 0;
        for(int k=0;k<NumLayer_in;k++)
        {
        #ifdef IS_HLS
		#pragma HLS PIPELINE enable_flush
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
        #endif
            tmp=hwAdd(tmp, hwMul(pInvHDH[i][k],(pHDY[k])));
        }
        pXData[i] = tmp;
    }

    *NumLayer_out = NumLayer_in;
    *MDFT_out = MDFT_in;
    *nSymb_out = nSymb_in;
    *NumULSymbSF_out = NumULSymbSF_in;
}

void hardware_FDLSEqualization_Phase3_Part3(
        hwComplex pXData[MAX_LAYER],
        hwComplex hardware_pOutData[MAX_SYM*MAX_LAYER][MAX_MDFT],
        int m,
        int NumLayer,
        int nSymb,
        int NumULSymbSF)
{

    for(int layer=0;layer<NumLayer;layer++)
    {
    #ifdef IS_HLS
	#pragma HLS PIPELINE enable_flush
    DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
    #endif
        int IDX = (NumULSymbSF-2)*layer+nSymb;
        hardware_pOutData[IDX][m]=pXData[layer];
    }
}

void hardware_FDLSEqualization_Phase3(
        hwComplex pYData[MAX_SYM][MAX_RX_ANT],
        hwComplex pHDagger[MAX_LAYER][MAX_RX_ANT],
        hwComplex pInvHDH[MAX_LAYER][MAX_LAYER],
        hwComplex hardware_pOutData[MAX_SYM*MAX_LAYER][MAX_MDFT],
        int m,
        int NumLayer,
        int NumRxAntenna,
        int NumULSymbSF)
{

    ////////////////// Equalizing Data /////////////////
     for(int nSymb=0;nSymb<NumULSymbSF-2;nSymb++)
     {
    #ifdef IS_HLS
    #pragma HLS DATAFLOW
    DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_SYM-2)
    #endif
         hwComplex pHDY[MAX_LAYER];
         hwComplex pXData[MAX_LAYER];
         hwComplex pInvHDH_1[MAX_LAYER][MAX_LAYER];
         int NumLayer_1, NumLayer_2;
         int MDFT_1, MDFT_2;
         int nSymb_1, nSymb_2;
         int NumULSymbSF_1, NumULSymbSF_2;

         hardware_FDLSEqualization_Phase3_Part1(
                 pYData,
                 pHDagger,
                 pHDY,
                 pInvHDH,
                 pInvHDH_1,
                 NumLayer,
                 &NumLayer_1,
                 m,
                 &MDFT_1,
                 NumRxAntenna,
                 nSymb,
                 &nSymb_1,
                 NumULSymbSF,
                 &NumULSymbSF_1);


         hardware_FDLSEqualization_Phase3_Part2(
                 pInvHDH_1,
                 pXData,
                 pHDY,
                 NumLayer_1,
                 &NumLayer_2,
                 MDFT_1,
                 &MDFT_2,
                 nSymb_1,
                 &nSymb_2,
                 NumULSymbSF_1,
                 &NumULSymbSF_2
                 );

         hardware_FDLSEqualization_Phase3_Part3(
                 pXData,
                 hardware_pOutData,
                 MDFT_2,
                 NumLayer_2,
                 nSymb_2,
                 NumULSymbSF_2);
     }
}


void hardware_FDLSEstimation_Part1_f1(
		int NumLayer_in,
		hwComplex pXtDagger_in[MAX_LAYER][2],
		hwComplex pXt[2][MAX_LAYER],
		hwComplex pXDX[MAX_LAYER][MAX_LAYER]
		)
{
#ifdef IS_HLS
#pragma HLS INLINE off
#endif
    for(int i=0; i<NumLayer_in; i++)
    {
     #ifdef IS_HLS
     #pragma HLS PIPELINE enable_flush
     DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
     #endif
        for(int j=0; j<NumLayer_in; j++)
        {
         #ifdef IS_HLS
         #pragma HLS PIPELINE enable_flush
         DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
         #endif
            hwComplex tmp;
            tmp.re=0;
            tmp.im=0;
            for(int k=0; k<2; k++)
            {
                tmp=hwAdd(tmp,hwMul(pXtDagger_in[i][k],pXt[k][j]));
            }
            pXDX[i][j]=tmp;
        }
    }
}

void hardware_FDLSEstimation_Part1(
        hwComplex pXt[2][MAX_LAYER],
        hwComplex pXtDagger_in[MAX_LAYER][2],
        hwComplex pXtDagger_out[MAX_LAYER][2],
        hwComplex pXDX[MAX_LAYER][MAX_LAYER],
        hwComplex pYt_in [2][MAX_RX_ANT],
        hwComplex pYt_out [2][MAX_RX_ANT],
        hwComplex pYData_in[MAX_SYM-2][MAX_RX_ANT],
        hwComplex pYData_out[MAX_SYM-2][MAX_RX_ANT],
        int NumLayer_in,
        int* NumLayer_out,
        int NumRxAntenna_in,
        int* NumRxAntenna_out,
        int MDFT_in,
        int* MDFT_out,
        int NumULSymbSF_in,
        int* NumULSymbSF_out
        )
{

	hardware_FDLSEstimation_Part1_f1(NumLayer_in, pXtDagger_in, pXt, pXDX);
    hardware_cpy_YData(NumULSymbSF_in, NumRxAntenna_in, pYData_in, pYData_out);
    hardware_cpy_pYt(NumRxAntenna_in, pYt_in, pYt_out);
    hardware_cpy_pXtDagger(NumLayer_in, pXtDagger_in, pXtDagger_out);
    *NumRxAntenna_out = NumRxAntenna_in;
    *MDFT_out = MDFT_in;
    *NumLayer_out = NumLayer_in;
    *NumULSymbSF_out = NumULSymbSF_in;
}

void hardware_FDLSEstimation_Part2(
        hwComplex pXDX[MAX_LAYER][MAX_LAYER],
        hwComplex pInvXDX[MAX_LAYER][MAX_LAYER],
        hwComplex pXtDagger_in[MAX_LAYER][2],
        hwComplex pXtDagger_out[MAX_LAYER][2],
        hwComplex pYt_in[2][MAX_RX_ANT],
        hwComplex pYt_out[2][MAX_RX_ANT],
        hwComplex pYData_in[MAX_SYM-2][MAX_RX_ANT],
        hwComplex pYData_out[MAX_SYM-2][MAX_RX_ANT],
        int NumLayer_in,
        int* NumLayer_out,
        int NumRxAntenna_in,
        int* NumRxAntenna_out,
        int MDFT_in,
        int* MDFT_out,
        int NumULSymbSF_in,
        int* NumULSymbSF_out
        )
{
    hwMatrixInv(NumLayer_in,pXDX,pInvXDX);
    hardware_cpy_pXtDagger(NumLayer_in, pXtDagger_in, pXtDagger_out);
    hardware_cpy_pYt(NumRxAntenna_in, pYt_in, pYt_out);
    hardware_cpy_YData(NumULSymbSF_in, NumRxAntenna_in, pYData_in, pYData_out);
    *NumRxAntenna_out = NumRxAntenna_in;
    *MDFT_out = MDFT_in;
    *NumLayer_out = NumLayer_in;
    *NumULSymbSF_out = NumULSymbSF_in;
}


void hardware_FDLSEstimation_Part3_f1(
        int NumLayer_in,
        int NumRxAntenna_in,
        hwComplex pXtDagger[MAX_LAYER][2],
        hwComplex pYt[2][MAX_RX_ANT],
        hwComplex pXDY[MAX_LAYER][MAX_RX_ANT]
		)
{
#ifdef IS_HLS
#pragma HLS INLINE off
#endif
    for(int i=0; i<NumLayer_in; i++)
    {
     #ifdef IS_HLS
     #pragma HLS PIPELINE enable_flush
     DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
     #endif
        for(int j=0; j<NumRxAntenna_in; j++)
        {
         #ifdef IS_HLS
         #pragma HLS PIPELINE enable_flush
         DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_RX_ANT)
         #endif
            hwComplex tmp;
            tmp.re=0;
            tmp.im=0;
            for(int k=0; k<2; k++)
            {
             #ifdef IS_HLS
             #pragma HLS PIPELINE enable_flush
             #endif
                tmp=hwAdd(tmp,hwMul(pXtDagger[i][k],pYt[k][j]));
            }
            pXDY[i][j]=tmp;
        }
    }
}

void hardware_FDLSEstimation_Part3(
        hwComplex pXtDagger[MAX_LAYER][2],
        hwComplex pYt[2][MAX_RX_ANT],
        hwComplex pXDY[MAX_LAYER][MAX_RX_ANT],
        hwComplex pYData_in[MAX_SYM-2][MAX_RX_ANT],
        hwComplex pYData_out[MAX_SYM-2][MAX_RX_ANT],
        hwComplex pInvXDX_in[MAX_LAYER][MAX_LAYER],
        hwComplex pInvXDX_out[MAX_LAYER][MAX_LAYER],
        int NumLayer_in,
        int* NumLayer_out,
        int NumRxAntenna_in,
        int* NumRxAntenna_out,
        int MDFT_in,
        int* MDFT_out,
        int NumULSymbSF_in,
        int* NumULSymbSF_out
        )
{

	hardware_FDLSEstimation_Part3_f1(NumLayer_in, NumRxAntenna_in, pXtDagger, pYt, pXDY);
    hardware_cpy_pInvXDX(NumLayer_in,pInvXDX_in, pInvXDX_out);
    hardware_cpy_YData(NumULSymbSF_in, NumRxAntenna_in, pYData_in, pYData_out);
    *NumRxAntenna_out = NumRxAntenna_in;
    *MDFT_out = MDFT_in;
    *NumLayer_out = NumLayer_in;
    *NumULSymbSF_out = NumULSymbSF_in;
}

void hardware_FDLSEstimation_Part4_f1(
        int NumLayer_in,
        int NumRxAntenna_in,
        hwComplex pInvXDX[MAX_LAYER][MAX_LAYER],
        hwComplex pXDY[MAX_LAYER][MAX_RX_ANT],
        hwComplex pHTranspose[MAX_LAYER][MAX_RX_ANT]
        )
{
    for(int i=0; i<NumLayer_in; i++)
    {
     #ifdef IS_HLS
     #pragma HLS PIPELINE enable_flush
     DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
     #endif
        for(int j=0; j<NumRxAntenna_in; j++)
        {
         #ifdef IS_HLS
         #pragma HLS PIPELINE enable_flush
         DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_RX_ANT)
         #endif
            hwComplex tmp;
            tmp.re=0;
            tmp.im=0;
            for(int k=0; k<NumLayer_in; k++)
            {
             #ifdef IS_HLS
             #pragma HLS PIPELINE enable_flush
             DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
             #endif
                tmp=hwAdd(tmp,hwMul(pInvXDX[i][k],pXDY[k][j]));
            }
            pHTranspose[i][j]=tmp;
        }
    }
}


void hardware_FDLSEstimation_Part4(
        hwComplex pInvXDX[MAX_LAYER][MAX_LAYER],
        hwComplex pXDY[MAX_LAYER][MAX_RX_ANT],
        hwComplex pHTranspose[MAX_LAYER][MAX_RX_ANT],
        hwComplex pYData_in[MAX_SYM-2][MAX_RX_ANT],
        hwComplex pYData_out[MAX_SYM-2][MAX_RX_ANT],
        int NumLayer_in,
        int* NumLayer_out,
        int NumRxAntenna_in,
        int* NumRxAntenna_out,
        int MDFT_in,
        int* MDFT_out,
        int NumULSymbSF_in,
        int* NumULSymbSF_out
        )
{
	hardware_FDLSEstimation_Part4_f1(NumLayer_in, NumRxAntenna_in, pInvXDX, pXDY, pHTranspose);
    hardware_cpy_YData(NumULSymbSF_in, NumRxAntenna_in, pYData_in, pYData_out);
    *NumRxAntenna_out = NumRxAntenna_in;
    *MDFT_out = MDFT_in;
    *NumLayer_out = NumLayer_in;
    *NumULSymbSF_out = NumULSymbSF_in;
}


void hardware_readData(
        hwComplex hardware_pInpData[MAX_SYM*MAX_LAYER][MAX_MDFT],
        hwComplex hardware_pDMRS[MAX_SLOT][MAX_LAYER][MAX_MDFT],
        hwComplex pXt[2][MAX_LAYER],
        hwComplex pXtDagger[MAX_LAYER][2],
        hwComplex pYt [2][MAX_RX_ANT],
        hwComplex pYData[MAX_SYM-2][MAX_RX_ANT],
        int NumLayer_in,
        int* NumLayer_out,
        int NumRxAntenna_in,
        int* NumRxAntenna_out,
        int MDFT_in,
        int* MDFT_out,
        int NumULSymbSF_in,
        int* NumULSymbSF_out
        )
{
    for(int slot=0;slot<2;slot++)
    {
        for(int layer=0;layer<NumLayer_in;layer++)
            {
            #ifdef IS_HLS
            DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_LAYER)
            #endif
            pXt[slot][layer]=hardware_pDMRS[slot][layer][MDFT_in];
            pXtDagger[layer][slot]=hwConj(hardware_pDMRS[slot][layer][MDFT_in]);
            }
    }

    for(int slot=0;slot<2;slot++)
    {
        for(int nrx=0;nrx<NumRxAntenna_in;nrx++)
        {
        #ifdef IS_HLS
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_RX_ANT)
        #endif
            pYt[slot][nrx]=hardware_pInpData[nrx*2+slot][MDFT_in];
        }
    }

    for(int nSymb=0;nSymb<NumULSymbSF_in-2;nSymb++)
    {
    #ifdef IS_HLS
	#pragma HLS PIPELINE enable_flush
    DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_SYM-2)
    #endif
        for(int nrx=0;nrx<NumRxAntenna_in;nrx++)
        {
        #ifdef IS_HLS
		#pragma HLS PIPELINE enable_flush
        DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_RX_ANT)
        #endif
            int IDX=(NumULSymbSF_in-2)*nrx+nSymb+2*NumRxAntenna_in;
            pYData[nSymb][nrx]=hardware_pInpData[IDX][MDFT_in];
        }
    }

    *NumULSymbSF_out = NumULSymbSF_in;
    *NumRxAntenna_out = NumRxAntenna_in;
    *MDFT_out = MDFT_in;
    *NumLayer_out = NumLayer_in;
}



void hardware_Equalizer(
        hwComplex hardware_pDMRS[MAX_SLOT][MAX_LAYER][MAX_MDFT],
        hwComplex hardware_pInpData[MAX_SYM*MAX_LAYER][MAX_MDFT],
        hwComplex hardware_pEqW[MAX_MDFT][MAX_LAYER][MAX_RX_ANT],
        hwComplex hardware_pHdm[MAX_MDFT][MAX_RX_ANT][MAX_LAYER],
        hwComplex hardware_pOutData[MAX_SYM*MAX_LAYER][MAX_MDFT],
        int MDFT,
        int NumLayer,
        int NumRxAntenna,
        int NumULSymbSF)
{


#ifdef IS_HLS
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE s_axilite port=MDFT
#pragma HLS INTERFACE s_axilite port=NumLayer
#pragma HLS INTERFACE s_axilite port=NumRxAntenna
#pragma HLS INTERFACE s_axilite port=NumULSymbSF
#pragma HLS RESOURCE variable=hardware_pDMRS core=RAM_1P_BRAM
#pragma HLS RESOURCE variable=hardware_pInpData core=RAM_1P_BRAM
#pragma HLS RESOURCE variable=hardware_pEqW core=RAM_1P_BRAM
#pragma HLS RESOURCE variable=hardware_pHdm core=RAM_1P_BRAM
#pragma HLS RESOURCE variable=hardware_pOutData core=RAM_1P_BRAM
#endif


    for(int m=0;m<MDFT;m++)
    {
    #ifdef IS_HLS
    #pragma HLS DATAFLOW
    DO_PRAGMA(HLS LOOP_TRIPCOUNT max=MAX_MDFT)
    #endif
        hwComplex pXt[2][MAX_LAYER];
        hwComplex pXtDagger[MAX_LAYER][2];
        hwComplex pXtDagger_1[MAX_LAYER][2];
        hwComplex pXtDagger_2[MAX_LAYER][2];
        hwComplex pYt [2][MAX_RX_ANT];
        hwComplex pYt_1 [2][MAX_RX_ANT];
        hwComplex pYt_2 [2][MAX_RX_ANT];
        hwComplex pHTranspose[MAX_LAYER][MAX_RX_ANT];
        hwComplex pYData[MAX_SYM-2][MAX_RX_ANT];
        hwComplex pYData_1[MAX_SYM-2][MAX_RX_ANT];
        hwComplex pYData_2[MAX_SYM-2][MAX_RX_ANT];
        hwComplex pYData_3[MAX_SYM-2][MAX_RX_ANT];
        hwComplex pYData_4[MAX_SYM-2][MAX_RX_ANT];
        hwComplex pYData_5[MAX_SYM-2][MAX_RX_ANT];
        hwComplex pYData_6[MAX_SYM-2][MAX_RX_ANT];
        hwComplex pHDH[MAX_LAYER][MAX_LAYER];
        hwComplex pHDagger_1[MAX_LAYER][MAX_RX_ANT];
        hwComplex pHDagger_2[MAX_LAYER][MAX_RX_ANT];
        hwComplex pInvHDH[MAX_LAYER][MAX_LAYER];
        hwComplex pXDX[MAX_LAYER][MAX_LAYER];
        hwComplex pInvXDX[MAX_LAYER][MAX_LAYER];
        hwComplex pInvXDX_1[MAX_LAYER][MAX_LAYER];
        hwComplex pXDY[MAX_LAYER][MAX_RX_ANT];

        int NumLayer_1, NumLayer_2, NumLayer_3, NumLayer_4;
        int NumLayer_5, NumLayer_6, NumLayer_7;
        int NumRxAntenna_1, NumRxAntenna_2, NumRxAntenna_3, NumRxAntenna_4;
        int NumRxAntenna_5, NumRxAntenna_6, NumRxAntenna_7;
        int NumULSymbSF_1, NumULSymbSF_2, NumULSymbSF_3, NumULSymbSF_4;
        int NumULSymbSF_5, NumULSymbSF_6, NumULSymbSF_7;
        int MDFT_1, MDFT_2, MDFT_3, MDFT_4;
        int MDFT_5, MDFT_6, MDFT_7;
        hardware_readData(
            hardware_pInpData,
            hardware_pDMRS,
            pXt,
            pXtDagger,
            pYt,
            pYData,
            NumLayer,
            &NumLayer_1,
            NumRxAntenna,
            &NumRxAntenna_1,
            m,
            &MDFT_1,
            NumULSymbSF,
            &NumULSymbSF_1);

        hardware_FDLSEstimation_Part1(
            pXt,
            pXtDagger,
            pXtDagger_1,
            pXDX,
            pYt,
            pYt_1,
            pYData,
            pYData_1,
            NumLayer_1,
            &NumLayer_2,
            NumRxAntenna_1,
            &NumRxAntenna_2,
            MDFT_1,
            &MDFT_2,
            NumULSymbSF_1,
            &NumULSymbSF_2);

        hardware_FDLSEstimation_Part2(
            pXDX,
            pInvXDX,
            pXtDagger_1,
            pXtDagger_2,
            pYt_1,
            pYt_2,
            pYData_1,
            pYData_2,
            NumLayer_2,
            &NumLayer_3,
            NumRxAntenna_2,
            &NumRxAntenna_3,
            MDFT_2,
            &MDFT_3,
            NumULSymbSF_2,
            &NumULSymbSF_3);

        hardware_FDLSEstimation_Part3(
            pXtDagger_2,
            pYt_2,
            pXDY,
            pYData_2,
            pYData_3,
            pInvXDX,
            pInvXDX_1,
            NumLayer_3,
            &NumLayer_4,
            NumRxAntenna_3,
            &NumRxAntenna_4,
            MDFT_3,
            &MDFT_4,
            NumULSymbSF_3,
            &NumULSymbSF_4);

        hardware_FDLSEstimation_Part4(
            pInvXDX_1,
            pXDY,
            pHTranspose,
            pYData_3,
            pYData_4,
            NumLayer_4,
            &NumLayer_5,
            NumRxAntenna_4,
            &NumRxAntenna_5,
            MDFT_4,
            &MDFT_5,
            NumULSymbSF_4,
            &NumULSymbSF_5);

        hardware_FDLSEqualization_Phase1(
            pHTranspose,
            pHDH,
            pYData_4,
            pYData_5,
            hardware_pHdm,
            pHDagger_1,
            NumLayer_5,
            &NumLayer_6,
            NumRxAntenna_5,
            &NumRxAntenna_6,
            MDFT_5,
            &MDFT_6,
            NumULSymbSF_5,
            &NumULSymbSF_6);


         hardware_FDLSEqualization_Phase2(
            pYData_5,
            pYData_6,
            hardware_pEqW,
            pHDagger_1,
            pHDagger_2,
            pHDH,
            pInvHDH,
            NumLayer_6,
            &NumLayer_7,
            NumRxAntenna_6,
            &NumRxAntenna_7,
            MDFT_6,
            &MDFT_7,
            NumULSymbSF_6,
            &NumULSymbSF_7);


         hardware_FDLSEqualization_Phase3(
             pYData_6,
             pHDagger_2,
             pInvHDH,
             hardware_pOutData,
             MDFT_7,
             NumLayer_7,
             NumRxAntenna_7,
             NumULSymbSF_7);
    }
}
#endif
#endif
