/*
 * The Clear BSD License
 * Copyright (c) 2016, Freescale Semiconductor, Inc.
 * Copyright 2016-2017 NXP
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted (subject to the limitations in the disclaimer below) provided
 * that the following conditions are met:
 *
 * o Redistributions of source code must retain the above copyright notice, this list
 *   of conditions and the following disclaimer.
 *
 * o Redistributions in binary form must reproduce the above copyright notice, this
 *   list of conditions and the following disclaimer in the documentation and/or
 *   other materials provided with the distribution.
 *
 * o Neither the name of the copyright holder nor the names of its
 *   contributors may be used to endorse or promote products derived from this
 *   software without specific prior written permission.
 *
 * NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY THIS LICENSE.
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "board.h"
#include "fsl_debug_console.h"
#include "fsl_dma.h"
#include "fsl_rit.h"

#include "pin_mux.h"
#include <stdbool.h>
#include <stdlib.h>
/*******************************************************************************
 * Definitions
 ******************************************************************************/

#define BYTE_WIDTH              4U
#define TRANSFER_BLOCK_SZ       4096U /* Maximum Transfer count (1024) X Byte width (4) = 4096 bytes*/
#define BUFF_SZ                 TRANSFER_BLOCK_SZ * 0x200
#define SRC_ADDR                0xA02EE000
#define DST_ADDR                (SRC_ADDR + BUFF_SZ)
#define MAX_NUM_DESC            512

/*******************************************************************************
 * Prototypes
 ******************************************************************************/

/*******************************************************************************
 * Variables
 ******************************************************************************/
dma_handle_t g_DMA_Handle;
dma_transfer_config_t transferConfig;
volatile bool g_Transfer_Done = false;

/*! @brief Static table of descriptors */
#if defined(__ICCARM__)
#pragma data_alignment = 512
dma_descriptor_t g_linked_desc[MAX_NUM_DESC] = {0};
#elif defined(__CC_ARM)
__attribute__((aligned(512))) dma_descriptor_t g_linked_desc[MAX_NUM_DESC] = {0};
#elif defined(__GNUC__)
__attribute__((aligned(512))) dma_descriptor_t g_linked_desc[MAX_NUM_DESC] = {0};
#endif

extern dma_descriptor_t s_dma_descriptor_table[FSL_FEATURE_DMA_NUMBER_OF_CHANNELS];
dma_descriptor_t dma_descriptors[512];

/*******************************************************************************
 * Code
 ******************************************************************************/

/* User callback function for DMA transfer. */
void DMA_Callback(dma_handle_t *handle, void *param, bool transferDone, uint32_t tcds)
{
    if (transferDone)
    {
        g_Transfer_Done = true;
    }
}

void APP_DMALinkedDescriptors(void *src, void *dest, unsigned long size)
{
    int i,num_desc = 0;
    int len;
    void *srcaddr = src;
    void *destaddr = dest;
    
    /* Calculate number of linked descriptors required */
    num_desc = size / TRANSFER_BLOCK_SZ;
    if(size % TRANSFER_BLOCK_SZ != 0)
    {
            num_desc += 1;
    }
    
    /* Setup the linked descriptors */
    for(i = 0; i < num_desc; i++)
    {
            len = (size < TRANSFER_BLOCK_SZ) ? size : TRANSFER_BLOCK_SZ;
            
            if(i == 0)
            {
                    /* If this is only descriptor in the chain */
                    if(i == num_desc - 1)
                    {
                            DMA_PrepareTransfer(&transferConfig, srcaddr, destaddr, BYTE_WIDTH, len, kDMA_MemoryToMemory,
                    NULL);
                    }
                    else /* Prepare and submit first descriptor */
                    {
                                    DMA_PrepareTransfer(&transferConfig, srcaddr, destaddr, BYTE_WIDTH, len, kDMA_MemoryToMemory,
                    &g_linked_desc[1]);
                    }

                    DMA_SubmitTransfer(&g_DMA_Handle, &transferConfig);
            }
            else if(i == num_desc -1)
            {
                      /* Create last descriptor in chain */
                            DMA_PrepareTransfer(&transferConfig, srcaddr, destaddr, BYTE_WIDTH, len, kDMA_MemoryToMemory,
                    NULL);
                                            DMA_CreateDescriptor(&g_linked_desc[i], &transferConfig.xfercfg, srcaddr, destaddr,
                    NULL);
            }
            else
            {
                    /* Create the descriptors in chain */
                    DMA_PrepareTransfer(&transferConfig, srcaddr, destaddr, BYTE_WIDTH, len, kDMA_MemoryToMemory,
                    &g_linked_desc[i+1]);
                    DMA_CreateDescriptor(&g_linked_desc[i], &transferConfig.xfercfg, srcaddr, destaddr,
                    &g_linked_desc[i+1]);
            }
            
            /* Update source address, destination address and size */
            srcaddr = (void*)((uint32_t)srcaddr + len);
            destaddr = (void*)((uint32_t)destaddr + len);
            transferConfig.srcAddr = (uint8_t*)(srcaddr);
            transferConfig.dstAddr = (uint8_t*)(destaddr);
            size = size - len;
            
    }
	
}

void optimized_DMALinkedDescriptors (void *dst, void *src, unsigned long size)
{
    DMA_Type *base;
    dma_descriptor_t *pdma_desc = &dma_descriptors[0];
    int num_desc, i, xfer_sz, len, tmp, channel;
    uint32_t xfer_reg_tmp = 0, srcaddr, dstaddr;

    base = g_DMA_Handle.base;
    channel = g_DMA_Handle.channel;

    num_desc = size / TRANSFER_BLOCK_SZ;
    if ((size % TRANSFER_BLOCK_SZ) != 0)
        num_desc++;

#define SRC_DST_INC     1   // 3 - 4 x width, 2 - 2 x width, 1 - 1 x width
#define BYTEWIDTH       4
    xfer_reg_tmp = DMA_CHANNEL_XFERCFG_CFGVALID(true);
    xfer_reg_tmp |= DMA_CHANNEL_XFERCFG_SRCINC(SRC_DST_INC);
    xfer_reg_tmp |= DMA_CHANNEL_XFERCFG_DSTINC(SRC_DST_INC);
    tmp = BYTEWIDTH ? 2 : BYTEWIDTH - 1;
    xfer_reg_tmp |= DMA_CHANNEL_XFERCFG_WIDTH(tmp);   // 2 - 32bit, 1 - 16-bit, 0 - 8bit

    g_Transfer_Done = 0;

    for (i = 0; i < num_desc; i++)
    {
        xfer_reg_tmp &= ~DMA_CHANNEL_XFERCFG_XFERCOUNT_MASK;
        len = (size < TRANSFER_BLOCK_SZ ? size : TRANSFER_BLOCK_SZ);

        xfer_sz = len / BYTEWIDTH;
        xfer_reg_tmp |= DMA_CHANNEL_XFERCFG_XFERCOUNT(xfer_sz - 1);

        tmp = (SRC_DST_INC * BYTEWIDTH * xfer_sz) - 1;
        srcaddr = ((uint32_t)src + tmp);
        dstaddr = ((uint32_t)dst + tmp);

        if (i == 0)
        {
            // desc table
            s_dma_descriptor_table[channel].srcEndAddr = (void*)((uint32_t)srcaddr);
            s_dma_descriptor_table[channel].dstEndAddr = (void*)((uint32_t)dstaddr);
            s_dma_descriptor_table[channel].xfercfg = 0;
            s_dma_descriptor_table[channel].linkToNextDesc = 0;

            base->CHANNEL[channel].XFERCFG = xfer_reg_tmp | DMA_CHANNEL_XFERCFG_CFGVALID_MASK;

            if (i < (num_desc - 1)) {
                s_dma_descriptor_table[channel].linkToNextDesc = (pdma_desc + 1);
                base->CHANNEL[channel].XFERCFG |= DMA_CHANNEL_XFERCFG_RELOAD_MASK;

                pdma_desc++;
                size -= len;

                src = (void*)((uint32_t)src + len);
                dst = (void*)((uint32_t)dst + len);
            }
            
            if (i == (num_desc - 1))
                base->CHANNEL[channel].XFERCFG |= DMA_CHANNEL_XFERCFG_SETINTA(true);
        }
        else
        {
            // descriptors
            pdma_desc->srcEndAddr = (void*)((uint32_t)srcaddr);
            pdma_desc->dstEndAddr = (void*)((uint32_t)dstaddr);
            pdma_desc->xfercfg = xfer_reg_tmp | DMA_CHANNEL_XFERCFG_CFGVALID_MASK;

            if (i < (num_desc - 1))
            {
                pdma_desc->linkToNextDesc = (pdma_desc + 1);
                pdma_desc->xfercfg |= DMA_CHANNEL_XFERCFG_RELOAD_MASK;
                pdma_desc++;
                size -= len;

                src = (void*)((uint32_t)src + len);
                dst = (void*)((uint32_t)dst + len);
            }
            else
            {
                pdma_desc->xfercfg |= DMA_CHANNEL_XFERCFG_SETINTA(true);
                pdma_desc->linkToNextDesc = 0;
            }
        }
    }

    DMA_DisableChannelPeriphRq(base, channel);
    base->COMMON[DMA_CHANNEL_GROUP(channel)].INTENSET |= 1U << DMA_CHANNEL_INDEX(channel);
}

void prepare_src_data(uint32_t *psrc, int sz)
{
    int i, size = sz >> 2;

    for (i = 0; i < size; i++) {
        *psrc = rand(); /* Fill it with some pattern */
        psrc++;
    }
}

/*!
 * @brief Main function
 */
int main(void)
{
    uint32_t *destAddr = (uint32_t *) DST_ADDR;
    uint32_t *srcAddr = (uint32_t *) SRC_ADDR;

    rit_config_t ritConfig;
    uint32_t start_time;
    uint32_t end_time;
    int i, dma_channel;

    /* attach 12 MHz clock to FLEXCOMM0 (debug console) */
    CLOCK_AttachClk(kFRO12M_to_FLEXCOMM0);
    
    BOARD_InitPins();
    BOARD_BootClockPLL180M();
    BOARD_InitDebugConsole();
    BOARD_InitSDRAM();

    RIT_GetDefaultConfig(&ritConfig);
    RIT_Init(RIT, &ritConfig);
    /* Enable RIT timers */
    RIT->CTRL |= RIT_CTRL_RITEN_MASK;

    // zero out dest buffer
    memset(destAddr, 0, BUFF_SZ);
    prepare_src_data((uint32_t *) SRC_ADDR, BUFF_SZ);

    dma_channel = 1;

    /* Print source buffer */
    PRINTF("Creating non-optimizied linked descriptors.\r\n\r\n");

    /* Configure DMA one shot transfer */
    /*
     * userConfig.enableRoundRobinArbitration = false;
     * userConfig.enableHaltOnError = true;
     * userConfig.enableContinuousLinkMode = false;
     * userConfig.enableDebugMode = false;
     */
    DMA_Init(DMA0);
    DMA_EnableChannel(DMA0, dma_channel);
    DMA_CreateHandle(&g_DMA_Handle, DMA0, dma_channel);
    DMA_SetCallback(&g_DMA_Handle, DMA_Callback, NULL);

    for (i = 0; i < 10; i++) {
    start_time = RIT_GetCounterTimerCount(RIT);
    APP_DMALinkedDescriptors(srcAddr,destAddr,BUFF_SZ >> i);
    end_time = RIT_GetCounterTimerCount(RIT);
    PRINTF("i: %d size:  %d  Time start: 0x%x end: 0x%x - %d\r\n",
           i, BUFF_SZ >> i, start_time, end_time, (end_time-start_time));
    }
//    DMA_StartTransfer(&g_DMA_Handle);
//    /* Wait for DMA transfer finish */
//    while (g_Transfer_Done != true)
//    {
//    }

    PRINTF("\r\nCreating optimizied linked descriptors.\r\n\r\n");

    for (i = 0; i < 10; i++) {
    start_time = RIT_GetCounterTimerCount(RIT);
    optimized_DMALinkedDescriptors(destAddr, srcAddr, BUFF_SZ >> i);
    end_time = RIT_GetCounterTimerCount(RIT);
    PRINTF("i: %d size:  %d  Time start: 0x%x end: 0x%x - %d\r\n",
           i, BUFF_SZ >> i, start_time, end_time, (end_time-start_time));
    }

//    g_DMA_Handle.base->CHANNEL[g_DMA_Handle.channel].XFERCFG |= DMA_CHANNEL_XFERCFG_SWTRIG_MASK;  
//    /* Wait for DMA transfer finish */
//    while (g_Transfer_Done != true)
//    {
//    }

    while (1)
    {
    }
}
